[Pkg-clamav-commits] [SCM] Debian repository for ClamAV branch, debian/unstable, updated. debian/0.95+dfsg-1-6156-g094ec9b
Török Edvin
edwin at clamav.net
Sun Apr 4 01:02:26 UTC 2010
The following commit has been merged in the debian/unstable branch:
commit ee8f1888e1487e20538677bc9cd341f521d1ffdd
Author: Török Edvin <edwin at clamav.net>
Date: Thu Aug 27 20:22:50 2009 +0300
more conversion.
diff --git a/libclamav/bytecode2llvm.cpp b/libclamav/bytecode2llvm.cpp
index b8ddb92..eb22100 100644
--- a/libclamav/bytecode2llvm.cpp
+++ b/libclamav/bytecode2llvm.cpp
@@ -28,6 +28,7 @@
#include "llvm/ExecutionEngine/JITEventListener.h"
#include "llvm/LLVMContext.h"
#include "llvm/Module.h"
+#include "llvm/PassManager.h"
#include "llvm/ModuleProvider.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/CommandLine.h"
@@ -40,7 +41,10 @@
#include "llvm/System/Signals.h"
#include "llvm/System/Threading.h"
#include "llvm/Target/TargetSelect.h"
+#include "llvm/Target/TargetData.h"
#include "llvm/Support/TargetFolder.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Analysis/Verifier.h"
#include <cstdlib>
#include <new>
@@ -81,12 +85,18 @@ private:
const Type **TypeMap;
Twine BytecodeID;
ExecutionEngine *EE;
+ TargetFolder Folder;
+ IRBuilder<false, TargetFolder> Builder;
+ Value **Values;
+ FunctionPassManager &PM;
+ unsigned numLocals;
+ unsigned numArgs;
const Type *mapType(uint16_t ty)
{
if (!ty)
return Type::getVoidTy(Context);
- if (ty < 64)
+ if (ty <= 64)
return IntegerType::get(Context, ty);
switch (ty) {
case 65:
@@ -110,56 +120,80 @@ private:
}
}
- Value *convertOperand(const struct cli_bc_func *func,
+ Value *convertOperand(const struct cli_bc_func *func, const Type *Ty, operand_t operand)
+ {
+ unsigned map[] = {0, 1, 2, 3, 3, 4, 4, 4, 4};
+ if (operand < func->numArgs)
+ return Values[operand];
+ if (operand < func->numValues)
+ return Builder.CreateLoad(Values[operand]);
+ unsigned w = (Ty->getPrimitiveSizeInBits()+7)/8;
+ return convertOperand(func, map[w], operand);
+ }
+
+ Value *convertOperand(const struct cli_bc_func *func,
const struct cli_bc_inst *inst, operand_t operand)
{
- if (operand >= func->numValues) {
- // Constant
- operand -= func->numValues;
- // This was already validated by libclamav.
- assert(operand < func->numConstants && "Constant out of range");
- uint64_t *c = &func->constants[operand-func->numValues];
- uint64_t v;
- const Type *Ty;
- switch (inst->interp_op%5) {
- case 0:
- case 1:
- Ty = (inst->interp_op%5) ? Type::getInt8Ty(Context) :
- Type::getInt1Ty(Context);
- v = *(uint8_t*)c;
- break;
- case 2:
- Ty = Type::getInt16Ty(Context);
- v = *(uint16_t*)c;
- break;
- case 3:
- Ty = Type::getInt32Ty(Context);
- v = *(uint32_t*)c;
- break;
- case 4:
- Ty = Type::getInt64Ty(Context);
- v = *(uint64_t*)c;
- break;
- }
- return ConstantInt::get(Ty, v);
+ return convertOperand(func, inst->interp_op%5, operand);
+ }
+
+ Value *convertOperand(const struct cli_bc_func *func,
+ unsigned w, operand_t operand) {
+ if (operand < func->numArgs)
+ return Values[operand];
+ if (operand < func->numValues)
+ return Builder.CreateLoad(Values[operand]);
+
+ // Constant
+ operand -= func->numValues;
+ // This was already validated by libclamav.
+ assert(operand < func->numConstants && "Constant out of range");
+ uint64_t *c = &func->constants[operand];
+ uint64_t v;
+ const Type *Ty;
+ switch (w) {
+ case 0:
+ case 1:
+ Ty = w ? Type::getInt8Ty(Context) :
+ Type::getInt1Ty(Context);
+ v = *(uint8_t*)c;
+ break;
+ case 2:
+ Ty = Type::getInt16Ty(Context);
+ v = *(uint16_t*)c;
+ break;
+ case 3:
+ Ty = Type::getInt32Ty(Context);
+ v = *(uint32_t*)c;
+ break;
+ case 4:
+ Ty = Type::getInt64Ty(Context);
+ v = *(uint64_t*)c;
+ break;
}
- assert(0 && "Not implemented yet");
+ return ConstantInt::get(Ty, v);
+ }
+
+ void Store(uint16_t dest, Value *V)
+ {
+ assert(dest >= numArgs && dest < numLocals+numArgs && "Instruction destination out of range");
+ Builder.CreateStore(V, Values[dest]);
}
public:
LLVMCodegen(const struct cli_bc *bc, Module *M, FunctionMapTy &cFuncs,
- ExecutionEngine *EE)
+ ExecutionEngine *EE, FunctionPassManager &PM)
: bc(bc), M(M), Context(M->getContext()), compiledFunctions(cFuncs),
- BytecodeID("bc"+Twine(bc->id)), EE(EE) {
+ BytecodeID("bc"+Twine(bc->id)), EE(EE),
+ Folder(EE->getTargetData(), Context), Builder(Context, Folder), PM(PM) {
TypeMap = new const Type*[bc->num_types];
}
- void generate() {
+ bool generate() {
PrettyStackTraceString Trace(BytecodeID.str().c_str());
convertTypes();
- TargetFolder Folder(EE->getTargetData(), Context);
- IRBuilder<false, TargetFolder> Builder(Context, Folder);
+ Function **Functions = new Function*[bc->num_func];
for (unsigned j=0;j<bc->num_func;j++) {
- PrettyStackTraceString CrashInfo("Generate LLVM IR");
+ PrettyStackTraceString CrashInfo("Generate LLVM IR functions");
// Create LLVM IR Function
const struct cli_bc_func *func = &bc->funcs[j];
std::vector<const Type*> argTypes;
@@ -169,36 +203,217 @@ public:
const Type *RetTy = mapType(func->returnType);
llvm::FunctionType *FTy = FunctionType::get(RetTy, argTypes,
false);
- Function *F = Function::Create(FTy, Function::InternalLinkage,
+ Functions[j] = Function::Create(FTy, Function::InternalLinkage,
BytecodeID+"f"+Twine(j), M);
-
+ }
+ for (unsigned j=0;j<bc->num_func;j++) {
+ PrettyStackTraceString CrashInfo("Generate LLVM IR");
+ const struct cli_bc_func *func = &bc->funcs[j];
// Create all BasicBlocks
+ Function *F = Functions[j];
BasicBlock **BB = new BasicBlock*[func->numBB];
for (unsigned i=0;i<func->numBB;i++) {
BB[i] = BasicBlock::Create(Context, "", F);
}
+ Values = new Value*[func->numValues];
+ Builder.SetInsertPoint(BB[0]);
+ Function::arg_iterator I = F->arg_begin();
+ for (unsigned i=0;i<func->numArgs; i++) {
+ assert(I != F->arg_end());
+ Values[i] = &*I;
+ ++I;
+ }
+ for (unsigned i=func->numArgs;i<func->numValues;i++) {
+ Values[i] = Builder.CreateAlloca(mapType(func->types[i]));
+ }
+ numLocals = func->numLocals;
+ numArgs = func->numArgs;
// Generate LLVM IR for each BB
for (unsigned i=0;i<func->numBB;i++) {
const struct cli_bc_bb *bb = &func->BB[i];
Builder.SetInsertPoint(BB[i]);
for (unsigned j=0;j<bb->numInsts;j++) {
- const struct cli_bc_inst *inst = &bb->insts[i];
+ const struct cli_bc_inst *inst = &bb->insts[j];
+ Value *Op0, *Op1, *Op2;
+ // libclamav has already validated this.
+ assert(inst->opcode < OP_INVALID && "Invalid opcode");
+ switch (inst->opcode) {
+ case OP_JMP:
+ case OP_BRANCH:
+ case OP_CALL_API:
+ case OP_CALL_DIRECT:
+ case OP_ZEXT:
+ case OP_SEXT:
+ case OP_TRUNC:
+ // these instructions represents operands differently
+ break;
+ default:
+ switch (operand_counts[inst->opcode]) {
+ case 1:
+ Op0 = convertOperand(func, inst, inst->u.unaryop);
+ break;
+ case 2:
+ Op0 = convertOperand(func, inst, inst->u.binop[0]);
+ Op1 = convertOperand(func, inst, inst->u.binop[1]);
+ break;
+ case 3:
+ Op0 = convertOperand(func, inst, inst->u.three[0]);
+ Op1 = convertOperand(func, inst, inst->u.three[1]);
+ Op2 = convertOperand(func, inst, inst->u.three[2]);
+ break;
+ }
+ }
switch (inst->opcode) {
+ case OP_ADD:
+ Store(inst->dest, Builder.CreateAdd(Op0, Op1));
+ break;
+ case OP_SUB:
+ Store(inst->dest, Builder.CreateSub(Op0, Op1));
+ break;
+ case OP_MUL:
+ Store(inst->dest, Builder.CreateMul(Op0, Op1));
+ break;
+ case OP_UDIV:
+ Store(inst->dest, Builder.CreateUDiv(Op0, Op1));
+ break;
+ case OP_SDIV:
+ Store(inst->dest, Builder.CreateSDiv(Op0, Op1));
+ break;
+ case OP_UREM:
+ Store(inst->dest, Builder.CreateURem(Op0, Op1));
+ break;
+ case OP_SREM:
+ Store(inst->dest, Builder.CreateSRem(Op0, Op1));
+ break;
+ case OP_SHL:
+ Store(inst->dest, Builder.CreateShl(Op0, Op1));
+ break;
+ case OP_LSHR:
+ Store(inst->dest, Builder.CreateLShr(Op0, Op1));
+ break;
+ case OP_ASHR:
+ Store(inst->dest, Builder.CreateAShr(Op0, Op1));
+ break;
+ case OP_AND:
+ Store(inst->dest, Builder.CreateAnd(Op0, Op1));
+ break;
+ case OP_OR:
+ Store(inst->dest, Builder.CreateOr(Op0, Op1));
+ break;
+ case OP_XOR:
+ Store(inst->dest, Builder.CreateXor(Op0, Op1));
+ break;
+ case OP_TRUNC:
+ {
+ Value *Src = convertOperand(func, inst, inst->u.cast.source);
+ const Type *Ty = mapType(func->types[inst->dest]);
+ Store(inst->dest, Builder.CreateTrunc(Src, Ty));
+ break;
+ }
+ case OP_ZEXT:
+ {
+ Value *Src = convertOperand(func, inst, inst->u.cast.source);
+ const Type *Ty = mapType(func->types[inst->dest]);
+ Store(inst->dest, Builder.CreateZExt(Src, Ty));
+ break;
+ }
+ case OP_SEXT:
+ {
+ Value *Src = convertOperand(func, inst, inst->u.cast.source);
+ const Type *Ty = mapType(func->types[inst->dest]);
+ Store(inst->dest, Builder.CreateSExt(Src, Ty));
+ break;
+ }
+ case OP_BRANCH:
+ {
+ Value *Cond = convertOperand(func, inst, inst->u.branch.condition);
+ BasicBlock *True = BB[inst->u.branch.br_true];
+ BasicBlock *False = BB[inst->u.branch.br_false];
+ if (Cond->getType() != Type::getInt1Ty(Context)) {
+ errs() << MODULE << "type mismatch in condition\n";
+ return false;
+ }
+ Builder.CreateCondBr(Cond, True, False);
+ break;
+ }
+ case OP_JMP:
+ {
+ BasicBlock *Jmp = BB[inst->u.jump];
+ Builder.CreateBr(Jmp);
+ break;
+ }
case OP_RET:
- Value *V = convertOperand(func, inst, inst->u.unaryop);
- Builder.CreateRet(V);
+ Builder.CreateRet(Op0);
+ break;
+ case OP_ICMP_EQ:
+ Store(inst->dest, Builder.CreateICmpEQ(Op0, Op1));
+ break;
+ case OP_ICMP_NE:
+ Store(inst->dest, Builder.CreateICmpNE(Op0, Op1));
+ break;
+ case OP_ICMP_UGT:
+ Store(inst->dest, Builder.CreateICmpNE(Op0, Op1));
+ break;
+ case OP_ICMP_UGE:
+ Store(inst->dest, Builder.CreateICmpNE(Op0, Op1));
break;
+ case OP_ICMP_ULT:
+ Store(inst->dest, Builder.CreateICmpNE(Op0, Op1));
+ break;
+ case OP_ICMP_ULE:
+ Store(inst->dest, Builder.CreateICmpNE(Op0, Op1));
+ break;
+ case OP_ICMP_SGT:
+ Store(inst->dest, Builder.CreateICmpNE(Op0, Op1));
+ break;
+ case OP_ICMP_SGE:
+ Store(inst->dest, Builder.CreateICmpNE(Op0, Op1));
+ break;
+ case OP_ICMP_SLT:
+ Store(inst->dest, Builder.CreateICmpNE(Op0, Op1));
+ break;
+ case OP_SELECT:
+ Store(inst->dest, Builder.CreateSelect(Op0, Op1, Op2));
+ break;
+ case OP_COPY:
+ Builder.CreateStore(Op0, Op1);
+ break;
+ case OP_CALL_DIRECT:
+ {
+ Function *DestF = Functions[inst->u.ops.funcid];
+ SmallVector<Value*, 2> args;
+ for (unsigned a=0;a<inst->u.ops.numOps;a++) {
+ operand_t op = inst->u.ops.ops[a];
+ args.push_back(convertOperand(func, DestF->getFunctionType()->getParamType(a), op));
+ }
+ Store(inst->dest, Builder.CreateCall(DestF, args.begin(), args.end()));
+ break;
+ }
+ default:
+ assert(0 && "Not implemented yet");
}
}
}
+ if (verifyFunction(*F, PrintMessageAction)) {
+ errs() << MODULE << "Verification failed\n";
+ // verification failed
+ return false;
+ }
+ PM.run(*F);
+ delete [] Values;
+ }
+
+ for (unsigned j=0;j<bc->num_func;j++) {
+ const struct cli_bc_func *func = &bc->funcs[j];
PrettyStackTraceString CrashInfo2("Native machine codegen");
// Codegen current function as executable machine code.
- compiledFunctions[func] = EE->getPointerToFunction(F);
+ compiledFunctions[func] = EE->getPointerToFunction(Functions[j]);
}
- delete TypeMap;
+ delete [] TypeMap;
+ return true;
}
};
}
@@ -214,10 +429,11 @@ int cli_bytecode_prepare_jit(struct cli_all_bc *bcs)
// LLVM itself never throws exceptions, but operator new may throw bad_alloc
try {
Module *M = new Module("ClamAV jit module", bcs->engine->Context);
+ ExistingModuleProvider *MP = new ExistingModuleProvider(M);
{
// Create the JIT.
std::string ErrorMsg;
- EngineBuilder builder(M);
+ EngineBuilder builder(MP);
builder.setErrorStr(&ErrorMsg);
builder.setEngineKind(EngineKind::JIT);
builder.setOptLevel(CodeGenOpt::Aggressive);
@@ -233,10 +449,22 @@ int cli_bytecode_prepare_jit(struct cli_all_bc *bcs)
EE->RegisterJITEventListener(createOProfileJITEventListener());
EE->DisableLazyCompilation();
+ FunctionPassManager OurFPM(MP);
+ // Set up the optimizer pipeline. Start with registering info about how
+ // the target lays out data structures.
+ OurFPM.add(new TargetData(*EE->getTargetData()));
+ // Promote allocas to registers.
+ OurFPM.add(createPromoteMemoryToRegisterPass());
+ // Do simple "peephole" optimizations and bit-twiddling optzns.
+ OurFPM.add(createInstructionCombiningPass());
+ OurFPM.doInitialization();
for (unsigned i=0;i<bcs->count;i++) {
const struct cli_bc *bc = &bcs->all_bcs[i];
- LLVMCodegen Codegen(bc, M, bcs->engine->compiledFunctions, EE);
- Codegen.generate();
+ LLVMCodegen Codegen(bc, M, bcs->engine->compiledFunctions, EE, OurFPM);
+ if (!Codegen.generate()) {
+ errs() << MODULE << "JIT codegen failed\n";
+ return CL_EBYTECODE;
+ }
}
// compile all functions now, not lazily!
@@ -283,7 +511,7 @@ int cli_bytecode_done_jit(struct cli_all_bc *bcs)
{
if (bcs->engine->EE)
delete bcs->engine->EE;
- free(bcs->engine);
+ delete bcs->engine;
bcs->engine = 0;
return 0;
}
--
Debian repository for ClamAV
More information about the Pkg-clamav-commits
mailing list