diff --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h index ff00900a1466a..110e697702641 100644 --- a/llvm/include/llvm/ProfileData/MemProf.h +++ b/llvm/include/llvm/ProfileData/MemProf.h @@ -22,6 +22,8 @@ enum IndexedVersion : uint64_t { Version0 = 0, // Version 1: Added a version field to the header. Version1 = 1, + // Version 2: Added a call stack table. Under development. + Version2 = 2, }; constexpr uint64_t MinimumSupportedVersion = Version0; @@ -289,23 +291,14 @@ struct IndexedAllocationInfo { : CallStack(CS.begin(), CS.end()), CSId(CSId), Info(MB) {} // Returns the size in bytes when this allocation info struct is serialized. - size_t serializedSize() const { - return sizeof(uint64_t) + // The number of frames to serialize. - sizeof(FrameId) * CallStack.size() + // The callstack frame ids. - PortableMemInfoBlock::serializedSize(); // The size of the payload. - } + size_t serializedSize(IndexedVersion Version) const; bool operator==(const IndexedAllocationInfo &Other) const { if (Other.Info != Info) return false; - if (Other.CallStack.size() != CallStack.size()) + if (Other.CSId != CSId) return false; - - for (size_t J = 0; J < Other.CallStack.size(); J++) { - if (Other.CallStack[J] != CallStack[J]) - return false; - } return true; } @@ -357,6 +350,9 @@ struct IndexedMemProfRecord { // inline location list may include additional entries, users should pick // the last entry in the list with the same function GUID. llvm::SmallVector> CallSites; + // Conceptually the same as above. We are going to keep both CallSites and + // CallSiteIds while we are transitioning from CallSites to CallSiteIds. + llvm::SmallVector CallSiteIds; void clear() { AllocSites.clear(); @@ -370,47 +366,31 @@ struct IndexedMemProfRecord { CallSites.append(Other.CallSites); } - size_t serializedSize() const { - size_t Result = sizeof(GlobalValue::GUID); - for (const IndexedAllocationInfo &N : AllocSites) - Result += N.serializedSize(); - - // The number of callsites we have information for. - Result += sizeof(uint64_t); - for (const auto &Frames : CallSites) { - // The number of frame ids to serialize. - Result += sizeof(uint64_t); - Result += Frames.size() * sizeof(FrameId); - } - return Result; - } + size_t serializedSize(IndexedVersion Version) const; bool operator==(const IndexedMemProfRecord &Other) const { if (Other.AllocSites.size() != AllocSites.size()) return false; - if (Other.CallSites.size() != CallSites.size()) - return false; - for (size_t I = 0; I < AllocSites.size(); I++) { if (AllocSites[I] != Other.AllocSites[I]) return false; } - for (size_t I = 0; I < CallSites.size(); I++) { - if (CallSites[I] != Other.CallSites[I]) - return false; - } + if (Other.CallSiteIds != CallSiteIds) + return false; return true; } // Serializes the memprof records in \p Records to the ostream \p OS based // on the schema provided in \p Schema. - void serialize(const MemProfSchema &Schema, raw_ostream &OS); + void serialize(const MemProfSchema &Schema, raw_ostream &OS, + IndexedVersion Version); // Deserializes memprof records from the Buffer. static IndexedMemProfRecord deserialize(const MemProfSchema &Schema, - const unsigned char *Buffer); + const unsigned char *Buffer, + IndexedVersion Version); // Returns the GUID for the function name after canonicalization. For // memprof, we remove any .llvm suffix added by LTO. MemProfRecords are @@ -480,7 +460,8 @@ class RecordLookupTrait { using offset_type = uint64_t; RecordLookupTrait() = delete; - RecordLookupTrait(const MemProfSchema &S) : Schema(S) {} + RecordLookupTrait(IndexedVersion V, const MemProfSchema &S) + : Version(V), Schema(S) {} static bool EqualKey(uint64_t A, uint64_t B) { return A == B; } static uint64_t GetInternalKey(uint64_t K) { return K; } @@ -507,11 +488,13 @@ class RecordLookupTrait { data_type ReadData(uint64_t K, const unsigned char *D, offset_type /*Unused*/) { - Record = IndexedMemProfRecord::deserialize(Schema, D); + Record = IndexedMemProfRecord::deserialize(Schema, D, Version); return Record; } private: + // Holds the MemProf version. + IndexedVersion Version; // Holds the memprof schema used to deserialize records. MemProfSchema Schema; // Holds the records from one function deserialized from the indexed format. @@ -519,7 +502,7 @@ class RecordLookupTrait { }; // Trait for writing IndexedMemProfRecord data to the on-disk hash table. -class RecordWriterTrait { +template class RecordWriterTrait { public: using key_type = uint64_t; using key_type_ref = uint64_t; @@ -546,7 +529,7 @@ class RecordWriterTrait { endian::Writer LE(Out, llvm::endianness::little); offset_type N = sizeof(K); LE.write(N); - offset_type M = V.serializedSize(); + offset_type M = V.serializedSize(Version); LE.write(M); return std::make_pair(N, M); } @@ -560,7 +543,7 @@ class RecordWriterTrait { void EmitData(raw_ostream &Out, key_type_ref /*Unused*/, data_type_ref V, offset_type /*Unused*/) { assert(Schema != nullptr && "MemProf schema is not initialized!"); - V.serialize(*Schema, Out); + V.serialize(*Schema, Out, Version); // Clear the IndexedMemProfRecord which results in clearing/freeing its // vectors of allocs and callsites. This is owned by the associated on-disk // hash table, but unused after this point. See also the comment added to diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp index 7ac5c561dc080..884334ed070e8 100644 --- a/llvm/lib/ProfileData/InstrProfReader.cpp +++ b/llvm/lib/ProfileData/InstrProfReader.cpp @@ -1303,7 +1303,7 @@ Error IndexedInstrProfReader::readHeader() { MemProfRecordTable.reset(MemProfRecordHashTable::Create( /*Buckets=*/Start + RecordTableOffset, /*Payload=*/Ptr, - /*Base=*/Start, memprof::RecordLookupTrait(Schema))); + /*Base=*/Start, memprof::RecordLookupTrait(memprof::Version1, Schema))); // Initialize the frame table reader with the payload and bucket offsets. MemProfFrameTable.reset(MemProfFrameHashTable::Create( diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp index c2c94ba30c658..a1bc180a53ca3 100644 --- a/llvm/lib/ProfileData/InstrProfWriter.cpp +++ b/llvm/lib/ProfileData/InstrProfWriter.cpp @@ -557,9 +557,11 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) { OS.write(static_cast(Id)); } - auto RecordWriter = std::make_unique(); + auto RecordWriter = + std::make_unique>(); RecordWriter->Schema = &Schema; - OnDiskChainedHashTableGenerator + OnDiskChainedHashTableGenerator< + memprof::RecordWriterTrait> RecordTableGenerator; for (auto &I : MemProfRecordData) { // Insert the key (func hash) and value (memprof record). diff --git a/llvm/lib/ProfileData/MemProf.cpp b/llvm/lib/ProfileData/MemProf.cpp index 6c419811d59e2..ac0a8702c3f9c 100644 --- a/llvm/lib/ProfileData/MemProf.cpp +++ b/llvm/lib/ProfileData/MemProf.cpp @@ -10,15 +10,88 @@ namespace llvm { namespace memprof { +namespace { +size_t serializedSizeV0(const IndexedAllocationInfo &IAI) { + size_t Size = 0; + // The number of frames to serialize. + Size += sizeof(uint64_t); + // The callstack frame ids. + Size += sizeof(FrameId) * IAI.CallStack.size(); + // The size of the payload. + Size += PortableMemInfoBlock::serializedSize(); + return Size; +} -void IndexedMemProfRecord::serialize(const MemProfSchema &Schema, - raw_ostream &OS) { +size_t serializedSizeV2(const IndexedAllocationInfo &IAI) { + size_t Size = 0; + // The CallStackId + Size += sizeof(CallStackId); + // The size of the payload. + Size += PortableMemInfoBlock::serializedSize(); + return Size; +} +} // namespace + +size_t IndexedAllocationInfo::serializedSize(IndexedVersion Version) const { + switch (Version) { + case Version0: + case Version1: + return serializedSizeV0(*this); + case Version2: + return serializedSizeV2(*this); + } + llvm_unreachable("unsupported MemProf version"); +} + +namespace { +size_t serializedSizeV0(const IndexedMemProfRecord &Record) { + size_t Result = sizeof(GlobalValue::GUID); + for (const IndexedAllocationInfo &N : Record.AllocSites) + Result += N.serializedSize(Version0); + + // The number of callsites we have information for. + Result += sizeof(uint64_t); + for (const auto &Frames : Record.CallSites) { + // The number of frame ids to serialize. + Result += sizeof(uint64_t); + Result += Frames.size() * sizeof(FrameId); + } + return Result; +} + +size_t serializedSizeV2(const IndexedMemProfRecord &Record) { + size_t Result = sizeof(GlobalValue::GUID); + for (const IndexedAllocationInfo &N : Record.AllocSites) + Result += N.serializedSize(Version2); + + // The number of callsites we have information for. + Result += sizeof(uint64_t); + // The CallStackId + Result += Record.CallSiteIds.size() * sizeof(CallStackId); + return Result; +} +} // namespace + +size_t IndexedMemProfRecord::serializedSize(IndexedVersion Version) const { + switch (Version) { + case Version0: + case Version1: + return serializedSizeV0(*this); + case Version2: + return serializedSizeV2(*this); + } + llvm_unreachable("unsupported MemProf version"); +} + +namespace { +void serializeV0(const IndexedMemProfRecord &Record, + const MemProfSchema &Schema, raw_ostream &OS) { using namespace support; endian::Writer LE(OS, llvm::endianness::little); - LE.write(AllocSites.size()); - for (const IndexedAllocationInfo &N : AllocSites) { + LE.write(Record.AllocSites.size()); + for (const IndexedAllocationInfo &N : Record.AllocSites) { LE.write(N.CallStack.size()); for (const FrameId &Id : N.CallStack) LE.write(Id); @@ -26,17 +99,50 @@ void IndexedMemProfRecord::serialize(const MemProfSchema &Schema, } // Related contexts. - LE.write(CallSites.size()); - for (const auto &Frames : CallSites) { + LE.write(Record.CallSites.size()); + for (const auto &Frames : Record.CallSites) { LE.write(Frames.size()); for (const FrameId &Id : Frames) LE.write(Id); } } -IndexedMemProfRecord -IndexedMemProfRecord::deserialize(const MemProfSchema &Schema, - const unsigned char *Ptr) { +void serializeV2(const IndexedMemProfRecord &Record, + const MemProfSchema &Schema, raw_ostream &OS) { + using namespace support; + + endian::Writer LE(OS, llvm::endianness::little); + + LE.write(Record.AllocSites.size()); + for (const IndexedAllocationInfo &N : Record.AllocSites) { + LE.write(N.CSId); + N.Info.serialize(Schema, OS); + } + + // Related contexts. + LE.write(Record.CallSiteIds.size()); + for (const auto &CSId : Record.CallSiteIds) + LE.write(CSId); +} +} // namespace + +void IndexedMemProfRecord::serialize(const MemProfSchema &Schema, + raw_ostream &OS, IndexedVersion Version) { + switch (Version) { + case Version0: + case Version1: + serializeV0(*this, Schema, OS); + return; + case Version2: + serializeV2(*this, Schema, OS); + return; + } + llvm_unreachable("unsupported MemProf version"); +} + +namespace { +IndexedMemProfRecord deserializeV0(const MemProfSchema &Schema, + const unsigned char *Ptr) { using namespace support; IndexedMemProfRecord Record; @@ -73,11 +179,57 @@ IndexedMemProfRecord::deserialize(const MemProfSchema &Schema, Frames.push_back(Id); } Record.CallSites.push_back(Frames); + Record.CallSiteIds.push_back(hashCallStack(Frames)); } return Record; } +IndexedMemProfRecord deserializeV2(const MemProfSchema &Schema, + const unsigned char *Ptr) { + using namespace support; + + IndexedMemProfRecord Record; + + // Read the meminfo nodes. + const uint64_t NumNodes = + endian::readNext(Ptr); + for (uint64_t I = 0; I < NumNodes; I++) { + IndexedAllocationInfo Node; + Node.CSId = + endian::readNext(Ptr); + Node.Info.deserialize(Schema, Ptr); + Ptr += PortableMemInfoBlock::serializedSize(); + Record.AllocSites.push_back(Node); + } + + // Read the callsite information. + const uint64_t NumCtxs = + endian::readNext(Ptr); + for (uint64_t J = 0; J < NumCtxs; J++) { + CallStackId CSId = + endian::readNext(Ptr); + Record.CallSiteIds.push_back(CSId); + } + + return Record; +} +} // namespace + +IndexedMemProfRecord +IndexedMemProfRecord::deserialize(const MemProfSchema &Schema, + const unsigned char *Ptr, + IndexedVersion Version) { + switch (Version) { + case Version0: + case Version1: + return deserializeV0(Schema, Ptr); + case Version2: + return deserializeV2(Schema, Ptr); + } + llvm_unreachable("unsupported MemProf version"); +} + GlobalValue::GUID IndexedMemProfRecord::getGUID(const StringRef FunctionName) { // Canonicalize the function name to drop suffixes such as ".llvm.". Note // we do not drop any ".__uniq." suffixes, as getCanonicalFnName does not drop diff --git a/llvm/unittests/ProfileData/MemProfTest.cpp b/llvm/unittests/ProfileData/MemProfTest.cpp index 1cca44e9b0370..f1aa6f37aa399 100644 --- a/llvm/unittests/ProfileData/MemProfTest.cpp +++ b/llvm/unittests/ProfileData/MemProfTest.cpp @@ -265,7 +265,9 @@ TEST(MemProf, PortableWrapper) { EXPECT_EQ(3UL, ReadBlock.getAllocCpuId()); } -TEST(MemProf, RecordSerializationRoundTrip) { +// Version0 and Version1 serialize IndexedMemProfRecord in the same format, so +// we share one test. +TEST(MemProf, RecordSerializationRoundTripVersion0And1) { const MemProfSchema Schema = getFullSchema(); MemInfoBlock Info(/*size=*/16, /*access_count=*/7, /*alloc_timestamp=*/1000, @@ -284,14 +286,47 @@ TEST(MemProf, RecordSerializationRoundTrip) { Info); } Record.CallSites.assign(CallSites); + for (const auto &CS : CallSites) + Record.CallSiteIds.push_back(llvm::memprof::hashCallStack(CS)); std::string Buffer; llvm::raw_string_ostream OS(Buffer); - Record.serialize(Schema, OS); + Record.serialize(Schema, OS, llvm::memprof::Version0); OS.flush(); const IndexedMemProfRecord GotRecord = IndexedMemProfRecord::deserialize( - Schema, reinterpret_cast(Buffer.data())); + Schema, reinterpret_cast(Buffer.data()), + llvm::memprof::Version0); + + EXPECT_EQ(Record, GotRecord); +} + +TEST(MemProf, RecordSerializationRoundTripVerion2) { + const MemProfSchema Schema = getFullSchema(); + + MemInfoBlock Info(/*size=*/16, /*access_count=*/7, /*alloc_timestamp=*/1000, + /*dealloc_timestamp=*/2000, /*alloc_cpu=*/3, + /*dealloc_cpu=*/4); + + llvm::SmallVector CallStackIds = {0x123, 0x456}; + + llvm::SmallVector CallSiteIds = {0x333, 0x444}; + + IndexedMemProfRecord Record; + for (const auto &CSId : CallStackIds) { + // Use the same info block for both allocation sites. + Record.AllocSites.emplace_back(llvm::SmallVector(), CSId, Info); + } + Record.CallSiteIds.assign(CallSiteIds); + + std::string Buffer; + llvm::raw_string_ostream OS(Buffer); + Record.serialize(Schema, OS, llvm::memprof::Version2); + OS.flush(); + + const IndexedMemProfRecord GotRecord = IndexedMemProfRecord::deserialize( + Schema, reinterpret_cast(Buffer.data()), + llvm::memprof::Version2); EXPECT_EQ(Record, GotRecord); }