From 2ab222c475c1dea2777129d0a7cb01cbefea3f64 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Fri, 28 Mar 2025 14:39:37 +0100 Subject: [PATCH 1/7] minor fix in CHA and RTA resolvers --- lib/PhasarLLVM/ControlFlow/Resolver/CHAResolver.cpp | 2 +- lib/PhasarLLVM/ControlFlow/Resolver/RTAResolver.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/PhasarLLVM/ControlFlow/Resolver/CHAResolver.cpp b/lib/PhasarLLVM/ControlFlow/Resolver/CHAResolver.cpp index ba464cd0a6..15dcdf0e1c 100644 --- a/lib/PhasarLLVM/ControlFlow/Resolver/CHAResolver.cpp +++ b/lib/PhasarLLVM/ControlFlow/Resolver/CHAResolver.cpp @@ -76,7 +76,7 @@ auto CHAResolver::resolveVirtualCall(const llvm::CallBase *CallSite) for (const auto &FallbackTy : FallbackTys) { const auto *Target = getNonPureVirtualVFTEntry(FallbackTy, VtableIndex, CallSite); - if (Target) { + if (Target && psr::isConsistentCall(CallSite, Target)) { PossibleCallees.insert(Target); } } diff --git a/lib/PhasarLLVM/ControlFlow/Resolver/RTAResolver.cpp b/lib/PhasarLLVM/ControlFlow/Resolver/RTAResolver.cpp index 05342b05ac..b36584ff69 100644 --- a/lib/PhasarLLVM/ControlFlow/Resolver/RTAResolver.cpp +++ b/lib/PhasarLLVM/ControlFlow/Resolver/RTAResolver.cpp @@ -76,7 +76,7 @@ auto RTAResolver::resolveVirtualCall(const llvm::CallBase *CallSite) if (ReachableTypes.find(PossibleType) != EndIt) { const auto *Target = getNonPureVirtualVFTEntry(PossibleType, VtableIndex, CallSite); - if (Target) { + if (Target && psr::isConsistentCall(CallSite, Target)) { PossibleCallTargets.insert(Target); } } From 4d20486e2dff98018fd2b125ce8a83148e18a492 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Tue, 1 Apr 2025 16:35:05 +0200 Subject: [PATCH 2/7] Several small improvements for call-graph resolving --- .../ControlFlow/Resolver/OTFResolver.h | 7 - .../ControlFlow/Resolver/Resolver.h | 31 +++-- .../ControlFlow/LLVMBasedCallGraphBuilder.cpp | 7 - .../ControlFlow/Resolver/OTFResolver.cpp | 128 ++++++++---------- .../ControlFlow/Resolver/RTAResolver.cpp | 43 +++++- .../ControlFlow/Resolver/Resolver.cpp | 80 ++++++++++- 6 files changed, 192 insertions(+), 104 deletions(-) diff --git a/include/phasar/PhasarLLVM/ControlFlow/Resolver/OTFResolver.h b/include/phasar/PhasarLLVM/ControlFlow/Resolver/OTFResolver.h index 3bbdc83f5e..f15640023e 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/Resolver/OTFResolver.h +++ b/include/phasar/PhasarLLVM/ControlFlow/Resolver/OTFResolver.h @@ -50,13 +50,6 @@ class OTFResolver : public Resolver { FunctionSetTy resolveFunctionPointer(const llvm::CallBase *CallSite) override; - static std::set - getReachableTypes(const LLVMAliasInfo::AliasSetTy &Values); - - static std::vector> - getActualFormalPointerPairs(const llvm::CallBase *CallSite, - const llvm::Function *CalleeTarget); - [[nodiscard]] std::string str() const override; [[nodiscard]] bool diff --git a/include/phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h b/include/phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h index 8748e56aa2..b124df7b1d 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h +++ b/include/phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h @@ -44,6 +44,10 @@ enum class CallGraphAnalysisType; [[nodiscard]] std::optional getVFTIndex(const llvm::CallBase *CallSite); +/// Similar to getVFTIndex(), but also returns a pointer to the vtable +[[nodiscard]] std::optional> +getVFTIndexAndVT(const llvm::CallBase *CallSite); + /// Assuming that `CallSite` is a call to a non-static member function, /// retrieves the type of the receiver. Returns nullptr, if the receiver-type /// could not be extracted @@ -68,6 +72,11 @@ getNonPureVirtualVFTEntry(const llvm::DIType *T, unsigned Idx, [[nodiscard]] bool isVirtualCall(const llvm::Instruction *Inst, const LLVMVFTableProvider &VTP); +/// A variant of F->hasAddressTaken() that is better suited for our use cases. +/// +/// Especially, it filteres out global aliases. +[[nodiscard]] bool isAddressTakenFunction(const llvm::Function *F); + class Resolver { protected: const LLVMProjectIRDB *IRDB; @@ -89,12 +98,16 @@ class Resolver { virtual ~Resolver() = default; - virtual void preCall(const llvm::Instruction *Inst); + [[deprecated("With the removal of DTAResolver, this is not used " + "anymore")]] virtual void + preCall(const llvm::Instruction *Inst); virtual void handlePossibleTargets(const llvm::CallBase *CallSite, FunctionSetTy &PossibleTargets); - virtual void postCall(const llvm::Instruction *Inst); + [[deprecated("With the removal of DTAResolver, this is not used " + "anymore")]] virtual void + postCall(const llvm::Instruction *Inst); [[nodiscard]] FunctionSetTy resolveIndirectCall(const llvm::CallBase *CallSite); @@ -105,7 +118,9 @@ class Resolver { [[nodiscard]] virtual FunctionSetTy resolveFunctionPointer(const llvm::CallBase *CallSite); - virtual void otherInst(const llvm::Instruction *Inst); + [[deprecated("With the removal of DTAResolver, this is not used " + "anymore")]] virtual void + otherInst(const llvm::Instruction *Inst); [[nodiscard]] virtual std::string str() const = 0; @@ -113,11 +128,11 @@ class Resolver { // Conservatively returns true. Override if possible return true; } - static std::unique_ptr create(CallGraphAnalysisType Ty, - const LLVMProjectIRDB *IRDB, - const LLVMVFTableProvider *VTP, - const DIBasedTypeHierarchy *TH, - LLVMAliasInfoRef PT = nullptr); + + [[nodiscard]] static std::unique_ptr + create(CallGraphAnalysisType Ty, const LLVMProjectIRDB *IRDB, + const LLVMVFTableProvider *VTP, const DIBasedTypeHierarchy *TH, + LLVMAliasInfoRef PT = nullptr); }; } // namespace psr diff --git a/lib/PhasarLLVM/ControlFlow/LLVMBasedCallGraphBuilder.cpp b/lib/PhasarLLVM/ControlFlow/LLVMBasedCallGraphBuilder.cpp index 10ea6c257c..51d39b0d14 100644 --- a/lib/PhasarLLVM/ControlFlow/LLVMBasedCallGraphBuilder.cpp +++ b/lib/PhasarLLVM/ControlFlow/LLVMBasedCallGraphBuilder.cpp @@ -153,13 +153,9 @@ bool Builder::processFunction(const llvm::Function *F) { for (const auto &I : llvm::instructions(F)) { const auto *CS = llvm::dyn_cast(&I); if (!CS) { - Res->otherInst(&I); continue; } - Res->preCall(&I); - scope_exit PostCall = [&] { Res->postCall(&I); }; - FixpointReached &= fillPossibleTargets(PossibleTargets, *Res, CS, IndirectCalls); @@ -203,9 +199,6 @@ bool Builder::constructDynamicCall(const llvm::Instruction *CS) { "Looking into dynamic call-site: "); PHASAR_LOG_LEVEL_CAT(DEBUG, "LLVMBasedICFG", " " << llvmIRToString(CS)); - Res->preCall(CallSite); - scope_exit PostCall = [&] { Res->postCall(CallSite); }; - // call the resolve routine auto PossibleTargets = Res->resolveIndirectCall(CallSite); diff --git a/lib/PhasarLLVM/ControlFlow/Resolver/OTFResolver.cpp b/lib/PhasarLLVM/ControlFlow/Resolver/OTFResolver.cpp index 6e70e7de01..114f79ae72 100644 --- a/lib/PhasarLLVM/ControlFlow/Resolver/OTFResolver.cpp +++ b/lib/PhasarLLVM/ControlFlow/Resolver/OTFResolver.cpp @@ -34,6 +34,55 @@ OTFResolver::OTFResolver(const LLVMProjectIRDB *IRDB, const LLVMVFTableProvider *VTP, LLVMAliasInfoRef PT) : Resolver(IRDB, VTP), PT(PT) {} +static std::vector> +getActualFormalPointerPairs(const llvm::CallBase *CallSite, + const llvm::Function *CalleeTarget) { + std::vector> Pairs; + Pairs.reserve(CallSite->arg_size()); + // ordinary case + + unsigned Idx = 0; + for (; Idx < CallSite->arg_size() && Idx < CalleeTarget->arg_size(); ++Idx) { + // only collect pointer typed pairs + if (CallSite->getArgOperand(Idx)->getType()->isPointerTy() && + CalleeTarget->getArg(Idx)->getType()->isPointerTy()) { + Pairs.emplace_back(CallSite->getArgOperand(Idx), + CalleeTarget->getArg(Idx)); + } + } + + if (CalleeTarget->isVarArg()) { + // in case of vararg, we can pair-up incoming pointer parameters with the + // vararg pack of the callee target. the vararg pack will alias + // (intra-procedurally) with any pointer values loaded from the pack + const llvm::AllocaInst *VarArgs = nullptr; + + for (const auto &I : llvm::instructions(CalleeTarget)) { + if (const auto *Alloca = llvm::dyn_cast(&I)) { + if (const auto *AT = + llvm::dyn_cast(Alloca->getAllocatedType())) { + if (const auto *ST = + llvm::dyn_cast(AT->getArrayElementType())) { + if (ST->hasName() && ST->getName() == "struct.__va_list_tag") { + VarArgs = Alloca; + break; + } + } + } + } + } + + if (VarArgs) { + for (; Idx < CallSite->arg_size(); ++Idx) { + if (CallSite->getArgOperand(Idx)->getType()->isPointerTy()) { + Pairs.emplace_back(CallSite->getArgOperand(Idx), VarArgs); + } + } + } + } + return Pairs; +} + void OTFResolver::handlePossibleTargets(const llvm::CallBase *CallSite, FunctionSetTy &CalleeTargets) { // if we have no inter-procedural points-to information, use call-graph @@ -72,7 +121,7 @@ auto OTFResolver::resolveVirtualCall(const llvm::CallBase *CallSite) PHASAR_LOG_LEVEL(DEBUG, "Call virtual function: " << llvmIRToString(CallSite)); - auto RetrievedVtableIndex = getVFTIndex(CallSite); + auto RetrievedVtableIndex = getVFTIndexAndVT(CallSite); if (!RetrievedVtableIndex.has_value()) { // An error occured PHASAR_LOG_LEVEL(DEBUG, @@ -82,11 +131,12 @@ auto OTFResolver::resolveVirtualCall(const llvm::CallBase *CallSite) return {}; } - auto VtableIndex = RetrievedVtableIndex.value(); + auto [VtablePtr, VtableIndex] = RetrievedVtableIndex.value(); PHASAR_LOG_LEVEL(DEBUG, "Virtual function table entry is: " << VtableIndex); - auto PTS = PT.getAliasSet(CallSite->getCalledOperand(), CallSite); + auto PTS = PT.getAliasSet(VtablePtr, CallSite); + for (const auto *P : *PTS) { if (const auto *PGV = llvm::dyn_cast(P)) { if (PGV->hasName() && @@ -208,76 +258,4 @@ auto OTFResolver::resolveFunctionPointer(const llvm::CallBase *CallSite) return Callees; } -std::set -OTFResolver::getReachableTypes(const LLVMAliasInfo::AliasSetTy &Values) { - std::set Types; - // an allocation site can either be an AllocaInst or a call to an - // allocating function - for (const auto *V : Values) { - if (const auto *Alloc = llvm::dyn_cast(V)) { - Types.insert(Alloc->getAllocatedType()); - } else { - // usually if an allocating function is called, it is immediately - // bit-casted - // to the desired allocated value and hence we can determine it from - // the destination type of that cast instruction. - for (const auto *User : V->users()) { - if (const auto *Cast = llvm::dyn_cast(User)) { - Types.insert(Cast->getDestTy()); - } - } - } - } - return Types; -} - -std::vector> -OTFResolver::getActualFormalPointerPairs(const llvm::CallBase *CallSite, - const llvm::Function *CalleeTarget) { - std::vector> Pairs; - Pairs.reserve(CallSite->arg_size()); - // ordinary case - - unsigned Idx = 0; - for (; Idx < CallSite->arg_size() && Idx < CalleeTarget->arg_size(); ++Idx) { - // only collect pointer typed pairs - if (CallSite->getArgOperand(Idx)->getType()->isPointerTy() && - CalleeTarget->getArg(Idx)->getType()->isPointerTy()) { - Pairs.emplace_back(CallSite->getArgOperand(Idx), - CalleeTarget->getArg(Idx)); - } - } - - if (CalleeTarget->isVarArg()) { - // in case of vararg, we can pair-up incoming pointer parameters with the - // vararg pack of the callee target. the vararg pack will alias - // (intra-procedurally) with any pointer values loaded from the pack - const llvm::AllocaInst *VarArgs = nullptr; - - for (const auto &I : llvm::instructions(CalleeTarget)) { - if (const auto *Alloca = llvm::dyn_cast(&I)) { - if (const auto *AT = - llvm::dyn_cast(Alloca->getAllocatedType())) { - if (const auto *ST = - llvm::dyn_cast(AT->getArrayElementType())) { - if (ST->hasName() && ST->getName() == "struct.__va_list_tag") { - VarArgs = Alloca; - break; - } - } - } - } - } - - if (VarArgs) { - for (; Idx < CallSite->arg_size(); ++Idx) { - if (CallSite->getArgOperand(Idx)->getType()->isPointerTy()) { - Pairs.emplace_back(CallSite->getArgOperand(Idx), VarArgs); - } - } - } - } - return Pairs; -} - std::string OTFResolver::str() const { return "OTF"; } diff --git a/lib/PhasarLLVM/ControlFlow/Resolver/RTAResolver.cpp b/lib/PhasarLLVM/ControlFlow/Resolver/RTAResolver.cpp index b36584ff69..2635cb7d55 100644 --- a/lib/PhasarLLVM/ControlFlow/Resolver/RTAResolver.cpp +++ b/lib/PhasarLLVM/ControlFlow/Resolver/RTAResolver.cpp @@ -18,10 +18,12 @@ #include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" #include "phasar/PhasarLLVM/TypeHierarchy/DIBasedTypeHierarchy.h" +#include "phasar/PhasarLLVM/Utils/LLVMIRToSrc.h" #include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" #include "phasar/Utils/Logger.h" #include "phasar/Utils/Utilities.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DerivedTypes.h" @@ -91,18 +93,47 @@ auto RTAResolver::resolveVirtualCall(const llvm::CallBase *CallSite) std::string RTAResolver::str() const { return "RTA"; } -/// More or less copied from GeneralStatisticsAnalysis +static const llvm::DICompositeType * +isCompositeStructType(const llvm::DIType *Ty) { + if (const auto *CompTy = llvm::dyn_cast_if_present(Ty); + CompTy && (CompTy->getTag() == llvm::dwarf::DW_TAG_structure_type || + CompTy->getTag() == llvm::dwarf::DW_TAG_class_type)) { + + return CompTy; + } + + return nullptr; +} + void RTAResolver::resolveAllocatedCompositeTypes() { if (!AllocatedCompositeTypes.empty()) { return; } - llvm::DebugInfoFinder DIF; - DIF.processModule(*IRDB->getModule()); + llvm::DenseSet AllocatedTypes; - for (const auto *Ty : DIF.types()) { - if (const auto *CompTy = llvm::dyn_cast(Ty)) { - AllocatedCompositeTypes.push_back(CompTy); + for (const auto *Inst : IRDB->getAllInstructions()) { + if (const auto *Alloca = llvm::dyn_cast(Inst)) { + if (const auto *Ty = isCompositeStructType(getVarTypeFromIR(Alloca))) { + AllocatedTypes.insert(Ty); + } + } else if (const auto *Call = llvm::dyn_cast(Inst)) { + if (const auto *Callee = llvm::dyn_cast( + Call->getCalledOperand()->stripPointerCastsAndAliases())) { + if (psr::isHeapAllocatingFunction(Callee)) { + const auto *MDNode = Call->getMetadata("heapallocsite"); + if (const auto *CompTy = + llvm::dyn_cast_if_present(MDNode); + isCompositeStructType(CompTy)) { + + AllocatedTypes.insert(CompTy); + } + } + } } } + + AllocatedCompositeTypes.reserve(AllocatedTypes.size()); + AllocatedCompositeTypes.insert(AllocatedCompositeTypes.end(), + AllocatedTypes.begin(), AllocatedTypes.end()); } diff --git a/lib/PhasarLLVM/ControlFlow/Resolver/Resolver.cpp b/lib/PhasarLLVM/ControlFlow/Resolver/Resolver.cpp index 27c91bcd4a..1e1853158e 100644 --- a/lib/PhasarLLVM/ControlFlow/Resolver/Resolver.cpp +++ b/lib/PhasarLLVM/ControlFlow/Resolver/Resolver.cpp @@ -36,6 +36,7 @@ #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -62,6 +63,29 @@ std::optional psr::getVFTIndex(const llvm::CallBase *CallSite) { return std::nullopt; } +std::optional> +psr::getVFTIndexAndVT(const llvm::CallBase *CallSite) { + // deal with a virtual member function + // retrieve the vtable entry that is called + const auto *Load = + llvm::dyn_cast(CallSite->getCalledOperand()); + if (Load == nullptr) { + return std::nullopt; + } + + const auto *GEP = + llvm::dyn_cast(Load->getPointerOperand()); + if (GEP == nullptr) { + return std::nullopt; + } + + if (auto *CI = llvm::dyn_cast(GEP->getOperand(1))) { + return {{GEP->getPointerOperand(), CI->getZExtValue()}}; + } + + return std::nullopt; +} + const llvm::DIType *psr::getReceiverType(const llvm::CallBase *CallSite) { if (CallSite->arg_empty() || (CallSite->hasStructRetAttr() && CallSite->arg_size() < 2)) { @@ -145,6 +169,60 @@ bool psr::isVirtualCall(const llvm::Instruction *Inst, return getVFTIndex(CallSite) >= 0; } +// Derived from LLVM's llvm::Function::hasAddressTaken() +static bool isAddressTakenImpl(const llvm::Value *F) { + if (!F) { + return false; + } + + for (const auto &Use : F->uses()) { + const auto *User = Use.getUser(); + + if (llvm::isa(User)) { + if (isAddressTakenImpl(User)) { + return true; + } + + continue; + } + + if (const auto *Glob = llvm::dyn_cast(User)) { + if (Glob->getName() == "llvm.compiler.used" || + Glob->getName() == "llvm.used") { + continue; + } + + return true; + } + + const auto *Call = llvm::dyn_cast(User); + if (!Call) { + return true; + } + + if (Call->isDebugOrPseudoInst()) { + continue; + } + + const auto *Intrinsic = llvm::dyn_cast(Call); + if (Intrinsic && Intrinsic->isAssumeLikeIntrinsic()) { + continue; + } + + if (Call->isCallee(&Use)) { + continue; + } + + return true; + } + + return false; +} + +bool psr::isAddressTakenFunction(const llvm::Function *F) { + return isAddressTakenImpl(F); +} + namespace psr { Resolver::Resolver(const LLVMProjectIRDB *IRDB, const LLVMVFTableProvider *VTP) @@ -178,7 +256,7 @@ auto Resolver::resolveFunctionPointer(const llvm::CallBase *CallSite) FunctionSetTy CalleeTargets; for (const auto *F : IRDB->getAllFunctions()) { - if (F->hasAddressTaken() && isConsistentCall(CallSite, F)) { + if (isAddressTakenFunction(F) && isConsistentCall(CallSite, F)) { CalleeTargets.insert(F); } } From ca44e1c7164ce6a356dd56194e199137167e464c Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Wed, 2 Apr 2025 14:28:30 +0200 Subject: [PATCH 3/7] Fix out-of-bounds access in getVarTypeFromIR() --- lib/PhasarLLVM/Utils/LLVMIRToSrc.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/PhasarLLVM/Utils/LLVMIRToSrc.cpp b/lib/PhasarLLVM/Utils/LLVMIRToSrc.cpp index ac36114d33..5bc38c4691 100644 --- a/lib/PhasarLLVM/Utils/LLVMIRToSrc.cpp +++ b/lib/PhasarLLVM/Utils/LLVMIRToSrc.cpp @@ -190,7 +190,7 @@ static llvm::DIType *getStructElementType(llvm::DIType *BaseTy, size_t Offset) { if (const auto *CompositeTy = llvm::dyn_cast(StructTy)) { - if (Offset > CompositeTy->getElements().size()) { + if (Offset >= CompositeTy->getElements().size()) { return nullptr; } auto Elems = CompositeTy->getElements(); From b49252574bdf8feef6cd3426f64a7c4fcf10047f Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Mon, 30 Jun 2025 18:11:00 +0200 Subject: [PATCH 4/7] Add address-taken functions caching in base resolver --- .../ControlFlow/Resolver/Resolver.h | 32 +++++++++++-------- .../ControlFlow/Resolver/Resolver.cpp | 19 +++++++++-- 2 files changed, 36 insertions(+), 15 deletions(-) diff --git a/include/phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h b/include/phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h index c4e142628a..483b14ee41 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h +++ b/include/phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h @@ -20,6 +20,7 @@ #include "phasar/PhasarLLVM/Pointer/LLVMAliasInfo.h" #include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/IR/DerivedTypes.h" #include @@ -82,19 +83,6 @@ getNonPureVirtualVFTEntry(const llvm::DIType *T, unsigned Idx, /// Create a specific resolver by making a new class, inheriting this resolver /// class and implementing the virtual functions as needed. class Resolver { -protected: - const LLVMProjectIRDB *IRDB; - const LLVMVFTableProvider *VTP; - - const llvm::Function * - getNonPureVirtualVFTEntry(const llvm::DIType *T, unsigned Idx, - const llvm::CallBase *CallSite) { - if (!VTP) { - return nullptr; - } - return psr::getNonPureVirtualVFTEntry(T, Idx, CallSite, *VTP); - } - public: using FunctionSetTy = llvm::SmallDenseSet; @@ -133,10 +121,28 @@ class Resolver { return true; } + [[nodiscard]] llvm::ArrayRef + getAddressTakenFunctions(); + [[nodiscard]] static std::unique_ptr create(CallGraphAnalysisType Ty, const LLVMProjectIRDB *IRDB, const LLVMVFTableProvider *VTP, const DIBasedTypeHierarchy *TH, LLVMAliasInfoRef PT = nullptr); + +protected: + const llvm::Function * + getNonPureVirtualVFTEntry(const llvm::DIType *T, unsigned Idx, + const llvm::CallBase *CallSite) { + if (!VTP) { + return nullptr; + } + return psr::getNonPureVirtualVFTEntry(T, Idx, CallSite, *VTP); + } + + const LLVMProjectIRDB *IRDB{}; + const LLVMVFTableProvider *VTP{}; + std::optional> + AddressTakenFunctions{}; }; } // namespace psr diff --git a/lib/PhasarLLVM/ControlFlow/Resolver/Resolver.cpp b/lib/PhasarLLVM/ControlFlow/Resolver/Resolver.cpp index 89958c1d31..69c0624d21 100644 --- a/lib/PhasarLLVM/ControlFlow/Resolver/Resolver.cpp +++ b/lib/PhasarLLVM/ControlFlow/Resolver/Resolver.cpp @@ -245,6 +245,21 @@ auto Resolver::resolveIndirectCall(const llvm::CallBase *CallSite) return resolveFunctionPointer(CallSite); } +llvm::ArrayRef Resolver::getAddressTakenFunctions() { + if (!AddressTakenFunctions) { + auto &ATF = AddressTakenFunctions.emplace(); + // XXX: Find better heuristic + ATF.reserve(IRDB->getNumFunctions() / 2); + for (const auto *F : IRDB->getAllFunctions()) { + if (isAddressTakenFunction(F)) { + ATF.push_back(F); + } + } + } + + return *AddressTakenFunctions; +} + auto Resolver::resolveFunctionPointer(const llvm::CallBase *CallSite) -> FunctionSetTy { // we may wish to optimise this function @@ -254,8 +269,8 @@ auto Resolver::resolveFunctionPointer(const llvm::CallBase *CallSite) "Call function pointer: " << llvmIRToString(CallSite)); FunctionSetTy CalleeTargets; - for (const auto *F : IRDB->getAllFunctions()) { - if (isAddressTakenFunction(F) && isConsistentCall(CallSite, F)) { + for (const auto *F : getAddressTakenFunctions()) { + if (isConsistentCall(CallSite, F)) { CalleeTargets.insert(F); } } From 4f5bafb4570d6b90decc96a654dce0bfb40ffe4d Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Mon, 30 Jun 2025 18:27:31 +0200 Subject: [PATCH 5/7] Some cleanup in resolvers --- .../ControlFlow/Resolver/CHAResolver.h | 4 ---- .../PhasarLLVM/ControlFlow/Resolver/NOResolver.h | 4 ---- .../ControlFlow/Resolver/OTFResolver.h | 16 +--------------- .../ControlFlow/Resolver/RTAResolver.h | 2 -- .../ControlFlow/Resolver/CHAResolver.cpp | 5 ----- .../ControlFlow/Resolver/NOResolver.cpp | 6 ------ .../ControlFlow/Resolver/OTFResolver.cpp | 1 - .../ControlFlow/Resolver/RTAResolver.cpp | 6 ------ lib/PhasarLLVM/ControlFlow/Resolver/Resolver.cpp | 6 ++---- 9 files changed, 3 insertions(+), 47 deletions(-) diff --git a/include/phasar/PhasarLLVM/ControlFlow/Resolver/CHAResolver.h b/include/phasar/PhasarLLVM/ControlFlow/Resolver/CHAResolver.h index dc6f7c8ff1..82e2dcbbd4 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/Resolver/CHAResolver.h +++ b/include/phasar/PhasarLLVM/ControlFlow/Resolver/CHAResolver.h @@ -20,10 +20,6 @@ #include "phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h" #include "phasar/Utils/MaybeUniquePtr.h" -namespace llvm { -class CallBase; -} // namespace llvm - namespace psr { class DIBasedTypeHierarchy; diff --git a/include/phasar/PhasarLLVM/ControlFlow/Resolver/NOResolver.h b/include/phasar/PhasarLLVM/ControlFlow/Resolver/NOResolver.h index 88afa796e5..eb6800dc15 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/Resolver/NOResolver.h +++ b/include/phasar/PhasarLLVM/ControlFlow/Resolver/NOResolver.h @@ -12,10 +12,6 @@ #include "phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h" -namespace llvm { -class CallBase; -} // namespace llvm - namespace psr { /// \brief A resolver that doesn't resolve indirect- and virtual calls diff --git a/include/phasar/PhasarLLVM/ControlFlow/Resolver/OTFResolver.h b/include/phasar/PhasarLLVM/ControlFlow/Resolver/OTFResolver.h index 5fbd9fc710..6cddc84c92 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/Resolver/OTFResolver.h +++ b/include/phasar/PhasarLLVM/ControlFlow/Resolver/OTFResolver.h @@ -20,22 +20,8 @@ #include "phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h" #include "phasar/PhasarLLVM/Pointer/LLVMAliasInfo.h" -#include -#include -#include -#include - -namespace llvm { -class CallBase; -class Function; -class Type; -class Value; -} // namespace llvm - namespace psr { -class DIBasedTypeHierarchy; - /// \brief A resolver that uses alias information to resolve indirect and /// virtual calls class OTFResolver : public Resolver { @@ -56,7 +42,7 @@ class OTFResolver : public Resolver { [[nodiscard]] bool mutatesHelperAnalysisInformation() const noexcept override { - return true; + return !PT.isInterProcedural(); } protected: diff --git a/include/phasar/PhasarLLVM/ControlFlow/Resolver/RTAResolver.h b/include/phasar/PhasarLLVM/ControlFlow/Resolver/RTAResolver.h index c6e003211f..cc3c99d034 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/Resolver/RTAResolver.h +++ b/include/phasar/PhasarLLVM/ControlFlow/Resolver/RTAResolver.h @@ -22,12 +22,10 @@ #include namespace llvm { -class CallBase; class DICompositeType; } // namespace llvm namespace psr { -class DIBasedTypeHierarchy; /// \brief A resolver that performs Rapid Type Analysis to resolve calls /// to C++ virtual functions. Requires debug information. diff --git a/lib/PhasarLLVM/ControlFlow/Resolver/CHAResolver.cpp b/lib/PhasarLLVM/ControlFlow/Resolver/CHAResolver.cpp index 15dcdf0e1c..da0a71f438 100644 --- a/lib/PhasarLLVM/ControlFlow/Resolver/CHAResolver.cpp +++ b/lib/PhasarLLVM/ControlFlow/Resolver/CHAResolver.cpp @@ -21,15 +21,10 @@ #include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" #include "phasar/Utils/Logger.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" -#include "llvm/IR/Instruction.h" -#include "llvm/IR/Module.h" #include -using namespace std; using namespace psr; CHAResolver::CHAResolver(const LLVMProjectIRDB *IRDB, diff --git a/lib/PhasarLLVM/ControlFlow/Resolver/NOResolver.cpp b/lib/PhasarLLVM/ControlFlow/Resolver/NOResolver.cpp index f825f52549..f98483c2d5 100644 --- a/lib/PhasarLLVM/ControlFlow/Resolver/NOResolver.cpp +++ b/lib/PhasarLLVM/ControlFlow/Resolver/NOResolver.cpp @@ -16,12 +16,8 @@ #include "phasar/PhasarLLVM/ControlFlow/Resolver/NOResolver.h" -#include - using namespace psr; -namespace psr { - NOResolver::NOResolver(const LLVMProjectIRDB *IRDB, const LLVMVFTableProvider *VTP) : Resolver(IRDB, VTP) {} @@ -37,5 +33,3 @@ auto NOResolver::resolveFunctionPointer(const llvm::CallBase * /*CallSite*/) } std::string NOResolver::str() const { return "NOResolver"; } - -} // namespace psr diff --git a/lib/PhasarLLVM/ControlFlow/Resolver/OTFResolver.cpp b/lib/PhasarLLVM/ControlFlow/Resolver/OTFResolver.cpp index 114f79ae72..348aa5a5ed 100644 --- a/lib/PhasarLLVM/ControlFlow/Resolver/OTFResolver.cpp +++ b/lib/PhasarLLVM/ControlFlow/Resolver/OTFResolver.cpp @@ -22,7 +22,6 @@ #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/InstIterator.h" -#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" #include "llvm/Support/Casting.h" diff --git a/lib/PhasarLLVM/ControlFlow/Resolver/RTAResolver.cpp b/lib/PhasarLLVM/ControlFlow/Resolver/RTAResolver.cpp index 2635cb7d55..0cfd9cdbb0 100644 --- a/lib/PhasarLLVM/ControlFlow/Resolver/RTAResolver.cpp +++ b/lib/PhasarLLVM/ControlFlow/Resolver/RTAResolver.cpp @@ -21,21 +21,15 @@ #include "phasar/PhasarLLVM/Utils/LLVMIRToSrc.h" #include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" #include "phasar/Utils/Logger.h" -#include "phasar/Utils/Utilities.h" #include "llvm/BinaryFormat/Dwarf.h" -#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" -#include "llvm/IR/InstIterator.h" #include "llvm/IR/InstrTypes.h" -#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/Module.h" #include "llvm/Support/Casting.h" -using namespace std; using namespace psr; RTAResolver::RTAResolver(const LLVMProjectIRDB *IRDB, diff --git a/lib/PhasarLLVM/ControlFlow/Resolver/Resolver.cpp b/lib/PhasarLLVM/ControlFlow/Resolver/Resolver.cpp index 69c0624d21..ee3c3aa466 100644 --- a/lib/PhasarLLVM/ControlFlow/Resolver/Resolver.cpp +++ b/lib/PhasarLLVM/ControlFlow/Resolver/Resolver.cpp @@ -43,6 +43,8 @@ #include #include +using namespace psr; + std::optional psr::getVFTIndex(const llvm::CallBase *CallSite) { // deal with a virtual member function // retrieve the vtable entry that is called @@ -222,8 +224,6 @@ bool psr::isAddressTakenFunction(const llvm::Function *F) { return isAddressTakenImpl(F); } -namespace psr { - Resolver::Resolver(const LLVMProjectIRDB *IRDB, const LLVMVFTableProvider *VTP) : IRDB(IRDB), VTP(VTP) { assert(IRDB != nullptr); @@ -310,5 +310,3 @@ std::unique_ptr Resolver::create(CallGraphAnalysisType Ty, llvm_unreachable("All possible callgraph algorithms should be handled in the " "above switch"); } - -} // namespace psr From e21602716d03d0858e16c2d99be2b58e19df2fd9 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Mon, 30 Jun 2025 18:32:50 +0200 Subject: [PATCH 6/7] pre-commit --- lib/PhasarLLVM/ControlFlow/LLVMBasedCallGraphBuilder.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/PhasarLLVM/ControlFlow/LLVMBasedCallGraphBuilder.cpp b/lib/PhasarLLVM/ControlFlow/LLVMBasedCallGraphBuilder.cpp index 51d39b0d14..8e6755d309 100644 --- a/lib/PhasarLLVM/ControlFlow/LLVMBasedCallGraphBuilder.cpp +++ b/lib/PhasarLLVM/ControlFlow/LLVMBasedCallGraphBuilder.cpp @@ -111,8 +111,8 @@ static bool fillPossibleTargets( PossibleTargets.insert(StaticCallee); PHASAR_LOG_LEVEL_CAT(DEBUG, "LLVMBasedICFG", - "Found static call-site: " - << " " << llvmIRToString(CS)); + "Found static call-site: " << " " + << llvmIRToString(CS)); return true; } @@ -122,8 +122,8 @@ static bool fillPossibleTargets( // the function call must be resolved dynamically PHASAR_LOG_LEVEL_CAT(DEBUG, "LLVMBasedICFG", - "Found dynamic call-site: " - << " " << llvmIRToString(CS)); + "Found dynamic call-site: " << " " + << llvmIRToString(CS)); PossibleTargets = Res.resolveIndirectCall(CS); From dfb72fbd01ffa33a8ed6566860d0d52d9cf72f41 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Tue, 1 Jul 2025 20:32:18 +0200 Subject: [PATCH 7/7] Fix bug in the overloads of buildLLVMBasedCallGraph() that takes a CallGraphAnalysisType --- lib/PhasarLLVM/ControlFlow/LLVMBasedCallGraphBuilder.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/PhasarLLVM/ControlFlow/LLVMBasedCallGraphBuilder.cpp b/lib/PhasarLLVM/ControlFlow/LLVMBasedCallGraphBuilder.cpp index 8e6755d309..c759de9361 100644 --- a/lib/PhasarLLVM/ControlFlow/LLVMBasedCallGraphBuilder.cpp +++ b/lib/PhasarLLVM/ControlFlow/LLVMBasedCallGraphBuilder.cpp @@ -268,7 +268,7 @@ auto psr::buildLLVMBasedCallGraph( PT = PTOwn.asRef(); } - auto Res = Resolver::create(CGType, &IRDB, &VTP, &TH); + auto Res = Resolver::create(CGType, &IRDB, &VTP, &TH, PT); return buildLLVMBasedCallGraph(IRDB, *Res, EntryPoints, S); }