From afe16a71f77798be1314e046585367c5aca0d2d1 Mon Sep 17 00:00:00 2001 From: Sean Callanan Date: Wed, 17 Nov 2010 23:00:36 +0000 Subject: [PATCH] Added support for constant strings of the form @"this-is-a-string". They are replaced with calls to the CoreFoundation function CFStringCreateWithBytes() by a portion of the IRForTarget pass. llvm-svn: 119582 --- lldb/include/lldb/Expression/IRForTarget.h | 86 ++++- lldb/source/Expression/IRForTarget.cpp | 388 ++++++++++++++++++--- 2 files changed, 431 insertions(+), 43 deletions(-) diff --git a/lldb/include/lldb/Expression/IRForTarget.h b/lldb/include/lldb/Expression/IRForTarget.h index 826277c55740..48bc5ab546e0 100644 --- a/lldb/include/lldb/Expression/IRForTarget.h +++ b/lldb/include/lldb/Expression/IRForTarget.h @@ -17,6 +17,7 @@ namespace llvm { class CallInst; class Constant; class Function; + class GlobalVariable; class Instruction; class Module; class Value; @@ -112,6 +113,54 @@ private: //------------------------------------------------------------------ bool createResultVariable(llvm::Module &M, llvm::Function &F); + + //------------------------------------------------------------------ + /// A function-level pass to find Objective-C constant strings and + /// transform them to calls to CFStringCreateWithBytes. + //------------------------------------------------------------------ + + //------------------------------------------------------------------ + /// Rewrite a single Objective-C constant string. + /// + /// @param[in] M + /// The module currently being processed. + /// + /// @param[in] NSStr + /// The constant NSString to be transformed + /// + /// @param[in] CStr + /// The constant C string inside the NSString. This will be + /// passed as the bytes argument to CFStringCreateWithBytes. + /// + /// @param[in] FirstEntryInstruction + /// An instruction early in the execution of the function. + /// When this function synthesizes a call to + /// CFStringCreateWithBytes, it places the call before this + /// instruction. The instruction should come before all + /// uses of the NSString. + /// + /// @return + /// True on success; false otherwise + //------------------------------------------------------------------ + bool rewriteObjCConstString(llvm::Module &M, + llvm::GlobalVariable *NSStr, + llvm::GlobalVariable *CStr, + llvm::Instruction *FirstEntryInstruction); + + //------------------------------------------------------------------ + /// The top-level pass implementation + /// + /// @param[in] M + /// The module currently being processed. + /// + /// @param[in] F + /// The function currently being processed. + /// + /// @return + /// True on success; false otherwise + //------------------------------------------------------------------ + bool rewriteObjCConstStrings(llvm::Module &M, + llvm::Function &F); //------------------------------------------------------------------ /// A basic block-level pass to find all Objective-C method calls and @@ -323,11 +372,40 @@ private: llvm::Function &F); /// Flags - bool m_resolve_vars; ///< True if external variable references and persistent variable references should be resolved + bool m_resolve_vars; ///< True if external variable references and persistent variable references should be resolved - std::string m_func_name; ///< The name of the function to translate - lldb_private::ClangExpressionDeclMap *m_decl_map; ///< The DeclMap containing the Decls - llvm::Constant *m_sel_registerName; ///< The address of the function sel_registerName, cast to the appropriate function pointer type + std::string m_func_name; ///< The name of the function to translate + lldb_private::ClangExpressionDeclMap *m_decl_map; ///< The DeclMap containing the Decls + llvm::Constant *m_CFStringCreateWithBytes; ///< The address of the function CFStringCreateWithBytes, cast to the appropriate function pointer type + llvm::Constant *m_sel_registerName; ///< The address of the function sel_registerName, cast to the appropriate function pointer type + +private: + //------------------------------------------------------------------ + /// UnfoldConstant operates on a constant [Old] which has just been + /// replaced with a value [New]. We assume that new_value has + /// been properly placed early in the function, in front of the + /// first instruction in the entry basic block + /// [FirstEntryInstruction]. + /// + /// UnfoldConstant reads through the uses of Old and replaces Old + /// in those uses with New. Where those uses are constants, the + /// function generates new instructions to compute the result of the + /// new, non-constant expression and places them before + /// FirstEntryInstruction. These instructions replace the constant + /// uses, so UnfoldConstant calls itself recursively for those. + /// + /// @param[in] M + /// The module currently being processed. + /// + /// @param[in] F + /// The function currently being processed. + /// + /// @return + /// True on success; false otherwise + //------------------------------------------------------------------ + static bool UnfoldConstant(llvm::Constant *Old, + llvm::Value *New, + llvm::Instruction *FirstEntryInstruction); }; #endif diff --git a/lldb/source/Expression/IRForTarget.cpp b/lldb/source/Expression/IRForTarget.cpp index bd121d57bb74..c3379a12b3b8 100644 --- a/lldb/source/Expression/IRForTarget.cpp +++ b/lldb/source/Expression/IRForTarget.cpp @@ -37,6 +37,7 @@ IRForTarget::IRForTarget(lldb_private::ClangExpressionDeclMap *decl_map, const char *func_name) : ModulePass(ID), m_decl_map(decl_map), + m_CFStringCreateWithBytes(NULL), m_sel_registerName(NULL), m_func_name(func_name), m_resolve_vars(resolve_vars) @@ -256,6 +257,295 @@ IRForTarget::createResultVariable (llvm::Module &llvm_module, llvm::Function &ll return true; } +static void DebugUsers(lldb::LogSP &log, Value *V, uint8_t depth) +{ + if (!depth) + return; + + depth--; + + log->Printf(" ", V->getNumUses()); + + for (Value::use_iterator ui = V->use_begin(), ue = V->use_end(); + ui != ue; + ++ui) + { + log->Printf(" %s", *ui, PrintValue(*ui).c_str()); + DebugUsers(log, *ui, depth); + } + + log->Printf(" "); +} + +bool +IRForTarget::rewriteObjCConstString(llvm::Module &M, + llvm::GlobalVariable *NSStr, + llvm::GlobalVariable *CStr, + Instruction *FirstEntryInstruction) +{ + lldb::LogSP log(lldb_private::GetLogIfAllCategoriesSet (LIBLLDB_LOG_EXPRESSIONS)); + + const Type *i8_ptr_ty = Type::getInt8PtrTy(M.getContext()); + const IntegerType *intptr_ty = Type::getIntNTy(M.getContext(), + (M.getPointerSize() == Module::Pointer64) ? 64 : 32); + const Type *i32_ty = Type::getInt32Ty(M.getContext()); + const Type *i8_ty = Type::getInt8Ty(M.getContext()); + + if (!m_CFStringCreateWithBytes) + { + lldb::addr_t CFStringCreateWithBytes_addr; + + static lldb_private::ConstString g_CFStringCreateWithBytes_str ("CFStringCreateWithBytes"); + + if (!m_decl_map->GetFunctionAddress (g_CFStringCreateWithBytes_str, CFStringCreateWithBytes_addr)) + { + if (log) + log->PutCString("Couldn't find CFStringCreateWithBytes in the target"); + + return false; + } + + if (log) + log->Printf("Found CFStringCreateWithBytes at 0x%llx", CFStringCreateWithBytes_addr); + + // Build the function type: + // + // CFStringRef CFStringCreateWithBytes ( + // CFAllocatorRef alloc, + // const UInt8 *bytes, + // CFIndex numBytes, + // CFStringEncoding encoding, + // Boolean isExternalRepresentation + // ); + // + // We make the following substitutions: + // + // CFStringRef -> i8* + // CFAllocatorRef -> i8* + // UInt8 * -> i8* + // CFIndex -> long (i32 or i64, as appropriate; we ask the module for its pointer size for now) + // CFStringEncoding -> i32 + // Boolean -> i8 + + std::vector CFSCWB_arg_types; + CFSCWB_arg_types.push_back(i8_ptr_ty); + CFSCWB_arg_types.push_back(i8_ptr_ty); + CFSCWB_arg_types.push_back(intptr_ty); + CFSCWB_arg_types.push_back(i32_ty); + CFSCWB_arg_types.push_back(i8_ty); + llvm::Type *CFSCWB_ty = FunctionType::get(i8_ptr_ty, CFSCWB_arg_types, false); + + // Build the constant containing the pointer to the function + PointerType *CFSCWB_ptr_ty = PointerType::getUnqual(CFSCWB_ty); + Constant *CFSCWB_addr_int = ConstantInt::get(intptr_ty, CFStringCreateWithBytes_addr, false); + m_CFStringCreateWithBytes = ConstantExpr::getIntToPtr(CFSCWB_addr_int, CFSCWB_ptr_ty); + } + + ConstantArray *string_array = dyn_cast(CStr->getInitializer()); + + SmallVector CFSCWB_arguments; + + Constant *alloc_arg = Constant::getNullValue(i8_ptr_ty); + Constant *bytes_arg = ConstantExpr::getBitCast(CStr, i8_ptr_ty); + Constant *numBytes_arg = ConstantInt::get(intptr_ty, string_array->getType()->getNumElements(), false); + Constant *encoding_arg = ConstantInt::get(i32_ty, 0x0600, false); /* 0x0600 is kCFStringEncodingASCII */ + Constant *isExternal_arg = ConstantInt::get(i8_ty, 0x0, false); /* 0x0 is false */ + + CFSCWB_arguments.push_back(alloc_arg); + CFSCWB_arguments.push_back(bytes_arg); + CFSCWB_arguments.push_back(numBytes_arg); + CFSCWB_arguments.push_back(encoding_arg); + CFSCWB_arguments.push_back(isExternal_arg); + + CallInst *CFSCWB_call = CallInst::Create(m_CFStringCreateWithBytes, + CFSCWB_arguments.begin(), + CFSCWB_arguments.end(), + "CFStringCreateWithBytes", + FirstEntryInstruction); + + Constant *initializer = NSStr->getInitializer(); + + if (!UnfoldConstant(NSStr, CFSCWB_call, FirstEntryInstruction)) + { + if (log) + log->PutCString("Couldn't replace the NSString with the result of the call"); + + return false; + } + + NSStr->eraseFromParent(); + + return true; +} + +bool +IRForTarget::rewriteObjCConstStrings(Module &M, + Function &F) +{ + lldb::LogSP log(lldb_private::GetLogIfAllCategoriesSet (LIBLLDB_LOG_EXPRESSIONS)); + + ValueSymbolTable& value_symbol_table = M.getValueSymbolTable(); + + BasicBlock &entry_block(F.getEntryBlock()); + Instruction *FirstEntryInstruction(entry_block.getFirstNonPHIOrDbg()); + + if (!FirstEntryInstruction) + { + if (log) + log->PutCString("Couldn't find first instruction for rewritten Objective-C strings"); + + return false; + } + + for (ValueSymbolTable::iterator vi = value_symbol_table.begin(), ve = value_symbol_table.end(); + vi != ve; + ++vi) + { + if (strstr(vi->first(), "_unnamed_cfstring_")) + { + Value *nsstring_value = vi->second; + + GlobalVariable *nsstring_global = dyn_cast(nsstring_value); + + if (!nsstring_global) + { + if (log) + log->PutCString("NSString variable is not a GlobalVariable"); + return false; + } + + if (!nsstring_global->hasInitializer()) + { + if (log) + log->PutCString("NSString variable does not have an initializer"); + return false; + } + + ConstantStruct *nsstring_struct = dyn_cast(nsstring_global->getInitializer()); + + if (!nsstring_struct) + { + if (log) + log->PutCString("NSString variable's initializer is not a ConstantStruct"); + return false; + } + + // We expect the following structure: + // + // struct { + // int *isa; + // int flags; + // char *str; + // long length; + // }; + + if (nsstring_struct->getNumOperands() != 4) + { + if (log) + log->Printf("NSString variable's initializer structure has an unexpected number of members. Should be 4, is %d", nsstring_struct->getNumOperands()); + return false; + } + + Constant *nsstring_member = nsstring_struct->getOperand(2); + + if (!nsstring_member) + { + if (log) + log->PutCString("NSString initializer's str element was empty"); + return false; + } + + ConstantExpr *nsstring_expr = dyn_cast(nsstring_member); + + if (!nsstring_expr) + { + if (log) + log->PutCString("NSString initializer's str element is not a ConstantExpr"); + return false; + } + + if (nsstring_expr->getOpcode() != Instruction::GetElementPtr) + { + if (log) + log->Printf("NSString initializer's str element is not a GetElementPtr expression, it's a %s", nsstring_expr->getOpcodeName()); + return false; + } + + Constant *nsstring_cstr = nsstring_expr->getOperand(0); + + GlobalVariable *cstr_global = dyn_cast(nsstring_cstr); + + if (!cstr_global) + { + if (log) + log->PutCString("NSString initializer's str element is not a GlobalVariable"); + + nsstring_cstr->dump(); + + return false; + } + + if (!cstr_global->hasInitializer()) + { + if (log) + log->PutCString("NSString initializer's str element does not have an initializer"); + return false; + } + + ConstantArray *cstr_array = dyn_cast(cstr_global->getInitializer()); + + if (!cstr_array) + { + if (log) + log->PutCString("NSString initializer's str element is not a ConstantArray"); + return false; + } + + if (!cstr_array->isCString()) + { + if (log) + log->PutCString("NSString initializer's str element is not a C string array"); + return false; + } + + if (log) + log->Printf("Found NSString constant %s, which contains \"%s\"", vi->first(), cstr_array->getAsString().c_str()); + + if (!rewriteObjCConstString(M, nsstring_global, cstr_global, FirstEntryInstruction)) + { + if (log) + log->PutCString("Error rewriting the constant string"); + return false; + } + + + } + } + + for (ValueSymbolTable::iterator vi = value_symbol_table.begin(), ve = value_symbol_table.end(); + vi != ve; + ++vi) + { + if (!strcmp(vi->first(), "__CFConstantStringClassReference")) + { + GlobalVariable *gv = dyn_cast(vi->second); + + if (!gv) + { + if (log) + log->PutCString("__CFConstantStringClassReference is not a global variable"); + return false; + } + + gv->eraseFromParent(); + + break; + } + } + + return true; +} + static bool isObjCSelectorRef(Value *V) { GlobalVariable *GV = dyn_cast(V); @@ -366,7 +656,7 @@ IRForTarget::RewriteObjCSelector(Instruction* selector_load, CallInst *srN_call = CallInst::Create(m_sel_registerName, srN_arguments.begin(), srN_arguments.end(), - "srN", + "sel_registerName", selector_load); // Replace the load with the call in all users @@ -638,14 +928,14 @@ IRForTarget::MaybeHandleVariable bool IRForTarget::MaybeHandleCallArguments(Module &M, - CallInst *C) + CallInst *Old) { // lldb::LogSP log(lldb_private::GetLogIfAllCategoriesSet (LIBLLDB_LOG_EXPRESSIONS)); - for (unsigned op_index = 0, num_ops = C->getNumArgOperands(); + for (unsigned op_index = 0, num_ops = Old->getNumArgOperands(); op_index < num_ops; ++op_index) - if (!MaybeHandleVariable(M, C->getArgOperand(op_index))) // conservatively believe that this is a store + if (!MaybeHandleVariable(M, Old->getArgOperand(op_index))) // conservatively believe that this is a store return false; return true; @@ -813,9 +1103,9 @@ IRForTarget::resolveExternals(Module &M, static bool isGuardVariableRef(Value *V) { - Constant *C; + Constant *Old; - if (!(C = dyn_cast(V))) + if (!(Old = dyn_cast(V))) return false; ConstantExpr *CE; @@ -825,10 +1115,10 @@ static bool isGuardVariableRef(Value *V) if (CE->getOpcode() != Instruction::BitCast) return false; - C = CE->getOperand(0); + Old = CE->getOperand(0); } - GlobalVariable *GV = dyn_cast(C); + GlobalVariable *GV = dyn_cast(Old); if (!GV || !GV->hasName() || !GV->getName().startswith("_ZGV")) return false; @@ -909,19 +1199,8 @@ IRForTarget::removeGuards(Module &M, BasicBlock &BB) return true; } -// UnfoldConstant operates on a constant [C] which has just been replaced with a value -// [new_value]. We assume that new_value has been properly placed early in the function, -// most likely somewhere in front of the first instruction in the entry basic block -// [first_entry_instruction]. -// -// UnfoldConstant reads through the uses of C and replaces C in those uses with new_value. -// Where those uses are constants, the function generates new instructions to compute the -// result of the new, non-constant expression and places them before first_entry_instruction. -// These instructions replace the constant uses, so UnfoldConstant calls itself recursively -// for those. - -static bool -UnfoldConstant(Constant *C, Value *new_value, Instruction *first_entry_instruction) +bool +IRForTarget::UnfoldConstant(Constant *Old, Value *New, Instruction *FirstEntryInstruction) { lldb::LogSP log(lldb_private::GetLogIfAllCategoriesSet (LIBLLDB_LOG_EXPRESSIONS)); @@ -931,8 +1210,8 @@ UnfoldConstant(Constant *C, Value *new_value, Instruction *first_entry_instructi // We do this because the use list might change, invalidating our iterator. // Much better to keep a work list ourselves. - for (ui = C->use_begin(); - ui != C->use_end(); + for (ui = Old->use_begin(); + ui != Old->use_end(); ++ui) users.push_back(*ui); @@ -961,12 +1240,12 @@ UnfoldConstant(Constant *C, Value *new_value, Instruction *first_entry_instructi Value *s = constant_expr->getOperand(0); - if (s == C) - s = new_value; + if (s == Old) + s = New; - BitCastInst *bit_cast(new BitCastInst(s, C->getType(), "", first_entry_instruction)); + BitCastInst *bit_cast(new BitCastInst(s, Old->getType(), "", FirstEntryInstruction)); - UnfoldConstant(constant_expr, bit_cast, first_entry_instruction); + UnfoldConstant(constant_expr, bit_cast, FirstEntryInstruction); } break; case Instruction::GetElementPtr: @@ -977,8 +1256,8 @@ UnfoldConstant(Constant *C, Value *new_value, Instruction *first_entry_instructi Value *ptr = constant_expr->getOperand(0); - if (ptr == C) - ptr = new_value; + if (ptr == Old) + ptr = New; SmallVector indices; @@ -991,15 +1270,15 @@ UnfoldConstant(Constant *C, Value *new_value, Instruction *first_entry_instructi { Value *operand = constant_expr->getOperand(operand_index); - if (operand == C) - operand = new_value; + if (operand == Old) + operand = New; indices.push_back(operand); } - GetElementPtrInst *get_element_ptr(GetElementPtrInst::Create(ptr, indices.begin(), indices.end(), "", first_entry_instruction)); + GetElementPtrInst *get_element_ptr(GetElementPtrInst::Create(ptr, indices.begin(), indices.end(), "", FirstEntryInstruction)); - UnfoldConstant(constant_expr, get_element_ptr, first_entry_instruction); + UnfoldConstant(constant_expr, get_element_ptr, FirstEntryInstruction); } break; } @@ -1014,7 +1293,7 @@ UnfoldConstant(Constant *C, Value *new_value, Instruction *first_entry_instructi else { // simple fall-through case for non-constants - user->replaceUsesOfWith(C, new_value); + user->replaceUsesOfWith(Old, New); } } @@ -1067,9 +1346,9 @@ IRForTarget::replaceVariables(Module &M, Function &F) log->Printf("Arg: \"%s\"", PrintValue(argument).c_str()); BasicBlock &entry_block(F.getEntryBlock()); - Instruction *first_entry_instruction(entry_block.getFirstNonPHIOrDbg()); + Instruction *FirstEntryInstruction(entry_block.getFirstNonPHIOrDbg()); - if (!first_entry_instruction) + if (!FirstEntryInstruction) return false; LLVMContext &context(M.getContext()); @@ -1096,11 +1375,11 @@ IRForTarget::replaceVariables(Module &M, Function &F) offset); ConstantInt *offset_int(ConstantInt::getSigned(offset_type, offset)); - GetElementPtrInst *get_element_ptr = GetElementPtrInst::Create(argument, offset_int, "", first_entry_instruction); - BitCastInst *bit_cast = new BitCastInst(get_element_ptr, value->getType(), "", first_entry_instruction); + GetElementPtrInst *get_element_ptr = GetElementPtrInst::Create(argument, offset_int, "", FirstEntryInstruction); + BitCastInst *bit_cast = new BitCastInst(get_element_ptr, value->getType(), "", FirstEntryInstruction); if (Constant *constant = dyn_cast(value)) - UnfoldConstant(constant, bit_cast, first_entry_instruction); + UnfoldConstant(constant, bit_cast, FirstEntryInstruction); else value->replaceAllUsesWith(bit_cast); @@ -1138,6 +1417,37 @@ IRForTarget::runOnModule(Module &M) if (!createResultVariable(M, *function)) return false; + /////////////////////////////////////////////////////////////////////////////// + // Fix all Objective-C constant strings to use NSStringWithCString:encoding: + // + + if (log) + { + std::string s; + raw_string_ostream oss(s); + + M.print(oss, NULL); + + oss.flush(); + + log->Printf("Module after creating the result variable: \n\"%s\"", s.c_str()); + } + + if (!rewriteObjCConstStrings(M, *function)) + return false; + + if (log) + { + std::string s; + raw_string_ostream oss(s); + + M.print(oss, NULL); + + oss.flush(); + + log->Printf("Module after rewriting Objective-C const strings: \n\"%s\"", s.c_str()); + } + ////////////////////////////////// // Run basic-block level passes //