好久没弄ollvm了,可以继续了,今天给ollvm新增了一个pass,用来加密字符串,这个pass是从别的库里面扒出来的。
本文是基于在Windows 上使用VS2017编译出来的ollvm,在这个基础上来添加。
第一步:
寻找两个pass的代码
头文件

1 #ifndef _STRING_OBFUSCATION_H_
2 #define _STRING_OBFUSCATION_H_
3
4
5 // LLVM include
6 #include "llvm/Pass.h"
7 #include "llvm/IR/Function.h"
8 #include "llvm/IR/Instructions.h"
9 #include "llvm/ADT/Statistic.h"
10 #include "llvm/Transforms/IPO.h"
11 #include "llvm/IR/Module.h"
12 #include "llvm/Support/CommandLine.h"
13 #include "llvm/CryptoUtils.h"
14
15 // Namespace
16 using namespace llvm;
17 using namespace std;
18
19 namespace llvm {
20 Pass *createStringObfuscation(bool flag);
21 }
22
23 #endif
源文件

1 #define DEBUG_TYPE "objdiv"
2 #include <string>
3 #include <sstream>
4
5 #include "llvm/ADT/Statistic.h"
6 #include "llvm/IR/Function.h"
7 #include "llvm/IR/Constants.h"
8 #include "llvm/IR/Module.h"
9 #include "llvm/IR/Value.h"
10 #include "llvm/Pass.h"
11 #include "llvm/Support/raw_ostream.h"
12 #include "llvm/CryptoUtils.h"
13 #include "llvm/Transforms/Obfuscation/StringObfuscation.h"
14 #include "llvm/IR/IRBuilder.h"
15 #include "llvm/Transforms/Utils/ModuleUtils.h"
16
17 using namespace llvm;
18
19 STATISTIC(GlobalsEncoded, "Counts number of global variables encoded");
20
21 #define ZooPrint(_F, ...) fprintf(stdout, "File : [%s](%d) " _F, __FILE__, __LINE__, __VA_ARGS__)
22
23 namespace llvm {
24
25 struct encVar {
26 public:
27 GlobalVariable *var;
28 uint8_t key;
29 };
30
31 class StringObfuscationPass : public llvm::ModulePass {
32 public:
33 static char ID; // pass identification
34 bool is_flag = false;
35 StringObfuscationPass() : ModulePass(ID) {}
36 StringObfuscationPass(bool flag) : ModulePass(ID)
37 {
38 is_flag = flag;
39 }
40
41 virtual bool runOnModule(Module &M) {
42 ZooPrint(" Run On Module : %d \n", is_flag);
43 if (!is_flag)
44 return false;
45 std::vector<GlobalVariable*> toDelConstGlob;
46 //std::vector<GlobalVariable*> encGlob;
47 std::vector<encVar*> encGlob;
48 ZooPrint(" M.Size : %d \n", M.size());
49 int i = 0;
50 for (Module::global_iterator gi = M.global_begin(), ge = M.global_end(); gi != ge; ++gi)
51 {
52
53 #if 0
54 // 老式代码,原来的样子
55 @.str = private unnamed_addr constant[13 x i8] c"\E4\BD\A0\E5\A5\BD\E4\B8\96\E7\95\8C\00", align 1
56 @__CFConstantStringClassReference = external global[0 x i32]
57 @.str.1 = private unnamed_addr constant[3 x i16][i16 20320, i16 22909, i16 0], section "__TEXT,__ustring", align 2
58 // 新式字符串的样子
59 @"\01??_C@_07CHPFNFHA@123456?6?$AA@" = linkonce_odr unnamed_addr constant [8 x i8] c"123456\0A\00", comdat, align 1
60 @"\01??_C@_03PMGGPEJJ@?$CFd?6?$AA@" = linkonce_odr unnamed_addr constant [4 x i8] c"%d\0A\00", comdat, align 1
61 @__local_stdio_printf_options._OptionsStorage = internal global i64 0, align 8
62 #endif
63 // Loop over all global variables
64 GlobalVariable* gv = &(*gi);
65 //errs() << "Global var " << gv->getName();
66 //std::string::size_type str_idx = gv->getName().str().find(".str.");
67 std::string section(gv->getSection());
68
69 ZooPrint(" %d : String : \"%s\" , section : \"%s\" , isConstant : %d , hasInitializer : %d , isa : %d , r : %d \n", i++, gv->getName().str().c_str(), section.c_str(), gv->isConstant(), gv->hasInitializer(), isa<ConstantDataSequential>(gv->getInitializer()), gv->getName().str().substr(0, 8) == "\"\x01??_C@_");
70 // ZooPrint(" 0x%02X 0x%02X 0x%02X 0x%02X 0x%02X 0x%02X 0x%02X 0x%02X \n", gv->getName()[0] & 0xFF, gv->getName()[1] & 0xFF, gv->getName()[2] & 0xFF, gv->getName()[3] & 0xFF, gv->getName()[4] & 0xFF, gv->getName()[5] & 0xFF, gv->getName()[6] & 0xFF, gv->getName()[7] & 0xFF);
71
72 // Let's encode the static ones
73 //if (gv->getName().str().substr(0, 4) == ".str"&&
74 if (gv->getName().str().substr(0, 7) == "\x01??_C@_" &&
75 gv->isConstant() &&
76 gv->hasInitializer() &&
77 isa<ConstantDataSequential>(gv->getInitializer()) &&
78 section != "llvm.metadata" &&
79 section.find("__objc_methname") == std::string::npos
80 /*&&gv->getType()->getArrayElementType()->getArrayElementType()->isIntegerTy()*/)
81 {
82 ZooPrint(" In Global Encode \n");
83 ++GlobalsEncoded;
84 //errs() << " is constant";
85
86 // Duplicate global variable
87 GlobalVariable *dynGV = new GlobalVariable(M,
88 gv->getType()->getElementType(),
89 !(gv->isConstant()), gv->getLinkage(),
90 (Constant*)0, gv->getName(),
91 (GlobalVariable*)0,
92 gv->getThreadLocalMode(),
93 gv->getType()->getAddressSpace());
94 // dynGV->copyAttributesFrom(gv);
95 dynGV->setInitializer(gv->getInitializer());
96
97 std::string tmp = gv->getName().str();
98 // errs()<<"GV: "<<*gv<<"\n";
99
100 Constant *initializer = gv->getInitializer();
101 ConstantDataSequential *cdata = dyn_cast<ConstantDataSequential>(initializer);
102 if (cdata) {
103 const char *orig = cdata->getRawDataValues().data();
104 unsigned len = cdata->getNumElements()*cdata->getElementByteSize();
105
106 encVar *cur = new encVar();
107 cur->var = dynGV;
108 cur->key = llvm::cryptoutils->get_uint8_t();
109 // casting away const is undef. behavior in C++
110 // TODO a clean implementation would retrieve the data, generate a new constant
111 // set the correct type, and copy the data over.
112 //char *encr = new char[len];
113 //Constant *initnew = ConstantDataArray::getString(M.getContext(), encr, true);
114 char *encr = const_cast<char *>(orig);
115 // Simple xor encoding
116 for (unsigned i = 0; i != len; ++i) {
117 encr[i] = orig[i] ^ cur->key;
118 }
119
120 // FIXME Second part of the unclean hack.
121 dynGV->setInitializer(initializer);
122
123 // Prepare to add decode function for this variable
124 encGlob.push_back(cur);
125 }
126 else {
127 // just copying default initializer for now
128 dynGV->setInitializer(initializer);
129 }
130
131 // redirect references to new GV and remove old one
132 gv->replaceAllUsesWith(dynGV);
133 toDelConstGlob.push_back(gv);
134
135 }
136 }
137
138 // actuallte delete marked globals
139 for (unsigned i = 0, e = toDelConstGlob.size(); i != e; ++i)
140 toDelConstGlob[i]->eraseFromParent();
141
142 addDecodeFunction(&M, &encGlob);
143
144
145 return true;
146 }
147
148 private:
149 void addDecodeFunction(Module *mod, std::vector<encVar*> *gvars) {
150 ZooPrint(" Add Decode Function \n");
151 // Declare and add the function definition
152 //errs()<<"Successful enter decode function"<<"\n";
153 std::vector<Type*>FuncTy_args;
154 FunctionType* FuncTy = FunctionType::get(
155 /*Result=*/Type::getVoidTy(mod->getContext()), // returning void
156 /*Params=*/FuncTy_args, // taking no args
157 /*isVarArg=*/false);
158 uint64_t StringObfDecodeRandomName = cryptoutils->get_uint64_t();
159 std::string random_str;
160 std::stringstream random_stream;
161 random_stream << StringObfDecodeRandomName;
162 random_stream >> random_str;
163 StringObfDecodeRandomName++;
164 Constant* c = mod->getOrInsertFunction(".datadiv_decode" + random_str, FuncTy);
165 Function* fdecode = cast<Function>(c);
166 fdecode->setCallingConv(CallingConv::C);
167
168
169 BasicBlock* entry = BasicBlock::Create(mod->getContext(), "entry", fdecode);
170
171 IRBuilder<> builder(mod->getContext());
172 builder.SetInsertPoint(entry);
173
174
175 for (unsigned i = 0, e = gvars->size(); i != e; ++i) {
176 GlobalVariable *gvar = (*gvars)[i]->var;
177 uint8_t key = (*gvars)[i]->key;
178
179 Constant *init = gvar->getInitializer();
180 ConstantDataSequential *cdata = dyn_cast<ConstantDataSequential>(init);
181
182 unsigned len = cdata->getNumElements()*cdata->getElementByteSize();
183 --len;
184
185 BasicBlock *preHeaderBB = builder.GetInsertBlock();
186 BasicBlock* for_body = BasicBlock::Create(mod->getContext(), "for-body", fdecode);
187 BasicBlock* for_end = BasicBlock::Create(mod->getContext(), "for-end", fdecode);
188 builder.CreateBr(for_body);
189 builder.SetInsertPoint(for_body);
190 PHINode *variable = builder.CreatePHI(Type::getInt32Ty(mod->getContext()), 2, "i");
191 Value *startValue = builder.getInt32(0);
192 Value *endValue = builder.getInt32(len);
193 variable->addIncoming(startValue, preHeaderBB);
194 /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
195
196 //LoadInst *Load=builder.CreateLoad(gvar);
197 //errs()<<"Load: "<<*(Load->getPointerOperand())<<"\n";
198 Value* indexList[2] = { ConstantInt::get(variable->getType(), 0), variable };
199 Value *const_key = builder.getInt8(key);
200 Value *GEP = builder.CreateGEP(gvar, ArrayRef<Value*>(indexList, 2), "arrayIdx");
201 LoadInst *loadElement = builder.CreateLoad(GEP, false);
202 loadElement->setAlignment(1);
203 //errs()<<"Type: "<<*loadElement<<"\n";
204 //CastInst* extended = new ZExtInst(const_key, loadElement->getType(), "extended", for_body);
205 //Value* extended = builder.CreateZExtOrBitCast(const_key, loadElement->getType(),"extended");
206 Value *Xor = builder.CreateXor(loadElement, const_key, "xor");
207 StoreInst *Store = builder.CreateStore(Xor, GEP, false);
208 Store->setAlignment(1);
209
210 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
211 Value *stepValue = builder.getInt32(1);
212 Value *nextValue = builder.CreateAdd(variable, stepValue, "next-value");
213 Value *endCondition = builder.CreateICmpULT(variable, endValue, "end-condition");
214 endCondition = builder.CreateICmpNE(endCondition, builder.getInt1(0), "loop-condition");
215 BasicBlock *loopEndBB = builder.GetInsertBlock();
216 builder.CreateCondBr(endCondition, loopEndBB, for_end);
217 builder.SetInsertPoint(for_end);
218 variable->addIncoming(nextValue, loopEndBB);
219
220 }
221 builder.CreateRetVoid();
222 appendToGlobalCtors(*mod, fdecode, 0);
223
224
225 }
226
227 };
228
229 Pass *createStringObfuscation(bool flag);
230 }
231
232 #if 0
233 RegisterPass(const char *PassArg, const char *Name, bool CFGOnly = false, bool is_analysis = false)
234
235 上面这个是RegisterPass的构造函数。
236 参数说明:
237
238 template<typename passName> :YourPassName;
239 PassArg :opt调用时所用的命行参数;
240 Name :此pass的简要说明;
241 CFGOnly :如果一个遍历CFG而不修改它,那么这个参数被设置为true;
242 is_analysis :如果一个Pass是一个分析Pass,例如dominator tree pass,那么这个参数被设置为true。
243 例子:
244
245 static RegisterPass<Hello> X("hello", "Hello World Pass", false, false);
246 #endif
247
248 char StringObfuscationPass::ID = 0;
249 static RegisterPass<StringObfuscationPass> X("GVDiv", "Global variable (i.e., const char*) diversification pass", false, true);
250
251 Pass * llvm::createStringObfuscation(bool flag) {
252 ZooPrint("new my pass \n");
253 return new StringObfuscationPass(flag);
254 }
第二步:
将头文件放在如下位置:ollvm\obfuscator-llvm-4.0\include\llvm\Transforms\Obfuscation\StringObfuscation.h
将源文件放在如下位置:ollvm\obfuscator-llvm-4.0\lib\Transforms\Obfuscation\StringEncode.cpp
第三步:
将源文件放到如下工程中

第四步:
在此文件中新增代码:ollvm\obfuscator-llvm-4.0\lib\Transforms\IPO\PassManagerBuilder.cpp
新增导入头文件
1 #include "llvm/Transforms/Obfuscation/StringObfuscation.h"
新增全局变量代码如下
1 static cl::opt<std::string> Seed("seed", cl::init(""),
2 cl::desc("seed for the random"));
3
4 // 全局开关,根据参数判断是否设置
5 static cl::opt<bool> StringObf("sobf", cl::init(false),
6 cl::desc("Enable the string obfuscation"));
在:PassManagerBuilder::populateModulePassManager 函数中,新增挂载新的pass代码,如下
1 MPM.add(createStringObfuscation(StringObf));
意义为根据全局开关来判断是否启用当前pass
经过以上四步,问题全部解决了,直接重新编译ollvm即可。
后续可以修改pass代码,可以修改解密函数。
新增其他pass新增步骤也如上。
使用方式如下
1 G:\ollvm\Test>G:\ollvm\build\RelWithDebInfo\bin\clang.exe -mllvm -sobf -mllvm -fla main.c
含义是,开启字符串加密,并且启动代码扁平化
效果:
源代码如下

编译后如下

已经开启了代码扁平化,原始字符串也已经不一样了,具体情况,
看data段就好了:

已经完全没个人样了
重点在最后,忘了,补充一句,由于字符串在ollvm里面是以UTF8的格式保存的,所以中文字符串天然就是乱码,
有时间想办法来解决一下中文字符串的乱码问题。
