diff --git a/include/llvm/IR/ModuleSummaryIndex.h b/include/llvm/IR/ModuleSummaryIndex.h index 497e0d6d8b0c7709d8b4a70d3cc36ee0601a1df8..5db49af45b47a70a4b93869cd8d9caf34c263f26 100644 --- a/include/llvm/IR/ModuleSummaryIndex.h +++ b/include/llvm/IR/ModuleSummaryIndex.h @@ -26,6 +26,7 @@ #include "llvm/IR/GlobalValue.h" #include "llvm/IR/Module.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Support/ScaledNumber.h" #include <algorithm> #include <array> #include <cassert> @@ -59,7 +60,11 @@ struct CalleeInfo { // The size of the bit-field might need to be adjusted if more values are // added to HotnessType enum. uint32_t Hotness : 3; + + /// The value stored in RelBlockFreq has to be interpreted as the digits of + /// a scaled number with a scale of \p -ScaleShift. uint32_t RelBlockFreq : 29; + static constexpr int32_t ScaleShift = 8; static constexpr uint64_t MaxRelBlockFreq = (1 << 29) - 1; CalleeInfo() @@ -73,10 +78,20 @@ struct CalleeInfo { HotnessType getHotness() const { return HotnessType(Hotness); } - // When there are multiple edges between the same (caller, callee) pair, the - // relative block frequencies are summed up. - void updateRelBlockFreq(uint64_t RBF) { - uint64_t Sum = SaturatingAdd<uint64_t>(RelBlockFreq, RBF); + /// Update \p RelBlockFreq from \p BlockFreq and \p EntryFreq + /// + /// BlockFreq is divided by EntryFreq and added to RelBlockFreq. To represent + /// fractional values, the result is represented as a fixed point number with + /// scale of -ScaleShift. + void updateRelBlockFreq(uint64_t BlockFreq, uint64_t EntryFreq) { + if (EntryFreq == 0) + return; + using Scaled64 = ScaledNumber<uint64_t>; + Scaled64 Temp(BlockFreq, ScaleShift); + Temp /= Scaled64::get(EntryFreq); + + uint64_t Sum = + SaturatingAdd<uint64_t>(Temp.toInt<uint64_t>(), RelBlockFreq); Sum = std::min(Sum, uint64_t(MaxRelBlockFreq)); RelBlockFreq = static_cast<uint32_t>(Sum); } diff --git a/lib/Analysis/ModuleSummaryAnalysis.cpp b/lib/Analysis/ModuleSummaryAnalysis.cpp index 9293f6034798dea7c38054df1325fcab9938c320..4a84816f616c7791d3d0a83ee3ff2cb6876724cf 100644 --- a/lib/Analysis/ModuleSummaryAnalysis.cpp +++ b/lib/Analysis/ModuleSummaryAnalysis.cpp @@ -279,17 +279,9 @@ computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M, // Add the relative block frequency to CalleeInfo if there is no profile // information. if (BFI != nullptr && Hotness == CalleeInfo::HotnessType::Unknown) { - auto BBFreq = BFI->getBlockFreq(&BB).getFrequency(); - // FIXME: This might need some scaling to prevent BBFreq values from - // being rounded down to 0. - auto EntryFreq = BFI->getEntryFreq(); - // Block frequencies can be directly set for a block and so we need to - // handle the case of entry frequency being 0. - if (EntryFreq) - BBFreq /= EntryFreq; - else - BBFreq = 0; - ValueInfo.updateRelBlockFreq(BBFreq); + uint64_t BBFreq = BFI->getBlockFreq(&BB).getFrequency(); + uint64_t EntryFreq = BFI->getEntryFreq(); + ValueInfo.updateRelBlockFreq(BBFreq, EntryFreq); } } else { // Skip inline assembly calls. diff --git a/test/Bitcode/thinlto-function-summary-callgraph-relbf.ll b/test/Bitcode/thinlto-function-summary-callgraph-relbf.ll index 779acada52087548d503b23f8aab93ad6bdf38ff..9955de6d958b58372a430d29f9bea297d07f8cab 100644 --- a/test/Bitcode/thinlto-function-summary-callgraph-relbf.ll +++ b/test/Bitcode/thinlto-function-summary-callgraph-relbf.ll @@ -11,7 +11,7 @@ ; CHECK: <GLOBALVAL_SUMMARY_BLOCK ; CHECK-NEXT: <VERSION ; See if the call to func is registered. -; CHECK-NEXT: <PERMODULE_RELBF {{.*}} op4=1 {{.*}} op7=1 +; CHECK-NEXT: <PERMODULE_RELBF {{.*}} op4=1 {{.*}} op7=256 ; CHECK-NEXT: </GLOBALVAL_SUMMARY_BLOCK> ; CHECK: <STRTAB_BLOCK ; CHECK-NEXT: blob data = 'undefinedglobmainfunc{{.*}}'