llvm-for-llvmta/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp

//===- DbiStreamBuilder.cpp - PDB Dbi Stream Creation -----------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// The data structures defined in this file are based on the reference
// implementation which is available at
// https://github.com/Microsoft/microsoft-pdb/blob/master/PDB/dbi/gsi.cpp
//
//===----------------------------------------------------------------------===//

#include "llvm/DebugInfo/PDB/Native/GSIStreamBuilder.h"
#include "llvm/DebugInfo/CodeView/RecordName.h"
#include "llvm/DebugInfo/CodeView/SymbolDeserializer.h"
#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
#include "llvm/DebugInfo/CodeView/SymbolSerializer.h"
#include "llvm/DebugInfo/MSF/MSFBuilder.h"
#include "llvm/DebugInfo/MSF/MSFCommon.h"
#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
#include "llvm/DebugInfo/PDB/Native/GlobalsStream.h"
#include "llvm/DebugInfo/PDB/Native/Hash.h"
#include "llvm/Support/BinaryItemStream.h"
#include "llvm/Support/BinaryStreamWriter.h"
#include "llvm/Support/Parallel.h"
#include "llvm/Support/xxhash.h"
#include <algorithm>
#include <vector>

using namespace llvm;
using namespace llvm::msf;
using namespace llvm::pdb;
using namespace llvm::codeview;

// Helper class for building the public and global PDB hash table buckets.
struct llvm::pdb::GSIHashStreamBuilder {
  // Sum of the size of all public or global records.
  uint32_t RecordByteSize = 0;

  std::vector<PSHashRecord> HashRecords;

  // The hash bitmap has `ceil((IPHR_HASH + 1) / 32)` words in it. The
  // reference implementation builds a hash table with IPHR_HASH buckets in it.
  // The last bucket is used to link together free hash table cells in a linked
  // list, but it is always empty in the compressed, on-disk format. However,
  // the bitmap must have a bit for it.
  std::array<support::ulittle32_t, (IPHR_HASH + 32) / 32> HashBitmap;

  std::vector<support::ulittle32_t> HashBuckets;

  uint32_t calculateSerializedLength() const;
  Error commit(BinaryStreamWriter &Writer);

  void finalizePublicBuckets();
  void finalizeGlobalBuckets(uint32_t RecordZeroOffset);

  // Assign public and global symbol records into hash table buckets.
  // Modifies the list of records to store the bucket index, but does not
  // change the order.
  void finalizeBuckets(uint32_t RecordZeroOffset,
                       MutableArrayRef<BulkPublic> Globals);
};

// DenseMapInfo implementation for deduplicating symbol records.
struct llvm::pdb::SymbolDenseMapInfo {
  static inline CVSymbol getEmptyKey() {
    static CVSymbol Empty;
    return Empty;
  }
  static inline CVSymbol getTombstoneKey() {
    static CVSymbol Tombstone(
        DenseMapInfo<ArrayRef<uint8_t>>::getTombstoneKey());
    return Tombstone;
  }
  static unsigned getHashValue(const CVSymbol &Val) {
    return xxHash64(Val.RecordData);
  }
  static bool isEqual(const CVSymbol &LHS, const CVSymbol &RHS) {
    return LHS.RecordData == RHS.RecordData;
  }
};

namespace {
LLVM_PACKED_START
struct PublicSym32Layout {
  RecordPrefix Prefix;
  PublicSym32Header Pub;
  // char Name[];
};
LLVM_PACKED_END
} // namespace

// Calculate how much memory this public needs when serialized.
static uint32_t sizeOfPublic(const BulkPublic &Pub) {
  uint32_t NameLen = Pub.NameLen;
  NameLen = std::min(NameLen,
                     uint32_t(MaxRecordLength - sizeof(PublicSym32Layout) - 1));
  return alignTo(sizeof(PublicSym32Layout) + NameLen + 1, 4);
}

static CVSymbol serializePublic(uint8_t *Mem, const BulkPublic &Pub) {
  // Assume the caller has allocated sizeOfPublic bytes.
  uint32_t NameLen = std::min(
      Pub.NameLen, uint32_t(MaxRecordLength - sizeof(PublicSym32Layout) - 1));
  size_t Size = alignTo(sizeof(PublicSym32Layout) + NameLen + 1, 4);
  assert(Size == sizeOfPublic(Pub));
  auto *FixedMem = reinterpret_cast<PublicSym32Layout *>(Mem);
  FixedMem->Prefix.RecordKind = static_cast<uint16_t>(codeview::S_PUB32);
  FixedMem->Prefix.RecordLen = static_cast<uint16_t>(Size - 2);
  FixedMem->Pub.Flags = Pub.Flags;
  FixedMem->Pub.Offset = Pub.Offset;
  FixedMem->Pub.Segment = Pub.Segment;
  char *NameMem = reinterpret_cast<char *>(FixedMem + 1);
  memcpy(NameMem, Pub.Name, NameLen);
  // Zero the null terminator and remaining bytes.
  memset(&NameMem[NameLen], 0, Size - sizeof(PublicSym32Layout) - NameLen);
  return CVSymbol(makeArrayRef(reinterpret_cast<uint8_t *>(Mem), Size));
}

uint32_t GSIHashStreamBuilder::calculateSerializedLength() const {
  uint32_t Size = sizeof(GSIHashHeader);
  Size += HashRecords.size() * sizeof(PSHashRecord);
  Size += HashBitmap.size() * sizeof(uint32_t);
  Size += HashBuckets.size() * sizeof(uint32_t);
  return Size;
}

Error GSIHashStreamBuilder::commit(BinaryStreamWriter &Writer) {
  GSIHashHeader Header;
  Header.VerSignature = GSIHashHeader::HdrSignature;
  Header.VerHdr = GSIHashHeader::HdrVersion;
  Header.HrSize = HashRecords.size() * sizeof(PSHashRecord);
  Header.NumBuckets = HashBitmap.size() * 4 + HashBuckets.size() * 4;

  if (auto EC = Writer.writeObject(Header))
    return EC;

  if (auto EC = Writer.writeArray(makeArrayRef(HashRecords)))
    return EC;
  if (auto EC = Writer.writeArray(makeArrayRef(HashBitmap)))
    return EC;
  if (auto EC = Writer.writeArray(makeArrayRef(HashBuckets)))
    return EC;
  return Error::success();
}

static bool isAsciiString(StringRef S) {
  return llvm::all_of(S, [](char C) { return unsigned(C) < 0x80; });
}

// See `caseInsensitiveComparePchPchCchCch` in gsi.cpp
static int gsiRecordCmp(StringRef S1, StringRef S2) {
  size_t LS = S1.size();
  size_t RS = S2.size();
  // Shorter strings always compare less than longer strings.
  if (LS != RS)
    return LS - RS;

  // If either string contains non ascii characters, memcmp them.
  if (LLVM_UNLIKELY(!isAsciiString(S1) || !isAsciiString(S2)))
    return memcmp(S1.data(), S2.data(), LS);

  // Both strings are ascii, perform a case-insensitive comparison.
  return S1.compare_lower(S2.data());
}

void GSIStreamBuilder::finalizePublicBuckets() {
  PSH->finalizeBuckets(0, Publics);
}

void GSIStreamBuilder::finalizeGlobalBuckets(uint32_t RecordZeroOffset) {
  // Build up a list of globals to be bucketed. Use the BulkPublic data
  // structure for this purpose, even though these are global records, not
  // public records. Most of the same fields are required:
  // - Name
  // - NameLen
  // - SymOffset
  // - BucketIdx
  // The dead fields are Offset, Segment, and Flags.
  std::vector<BulkPublic> Records;
  Records.resize(Globals.size());
  uint32_t SymOffset = RecordZeroOffset;
  for (size_t I = 0, E = Globals.size(); I < E; ++I) {
    StringRef Name = getSymbolName(Globals[I]);
    Records[I].Name = Name.data();
    Records[I].NameLen = Name.size();
    Records[I].SymOffset = SymOffset;
    SymOffset += Globals[I].length();
  }

  GSH->finalizeBuckets(RecordZeroOffset, Records);
}

void GSIHashStreamBuilder::finalizeBuckets(
    uint32_t RecordZeroOffset, MutableArrayRef<BulkPublic> Records) {
  // Hash every name in parallel.
  parallelForEachN(0, Records.size(), [&](size_t I) {
    Records[I].setBucketIdx(hashStringV1(Records[I].Name) % IPHR_HASH);
  });

  // Count up the size of each bucket. Then, use an exclusive prefix sum to
  // calculate the bucket start offsets. This is C++17 std::exclusive_scan, but
  // we can't use it yet.
  uint32_t BucketStarts[IPHR_HASH] = {0};
  for (const BulkPublic &P : Records)
    ++BucketStarts[P.BucketIdx];
  uint32_t Sum = 0;
  for (uint32_t &B : BucketStarts) {
    uint32_t Size = B;
    B = Sum;
    Sum += Size;
  }

  // Place globals into the hash table in bucket order. When placing a global,
  // update the bucket start. Every hash table slot should be filled. Always use
  // a refcount of one for now.
  HashRecords.resize(Records.size());
  uint32_t BucketCursors[IPHR_HASH];
  memcpy(BucketCursors, BucketStarts, sizeof(BucketCursors));
  for (int I = 0, E = Records.size(); I < E; ++I) {
    uint32_t HashIdx = BucketCursors[Records[I].BucketIdx]++;
    HashRecords[HashIdx].Off = I;
    HashRecords[HashIdx].CRef = 1;
  }

  // Within the buckets, sort each bucket by memcmp of the symbol's name.  It's
  // important that we use the same sorting algorithm as is used by the
  // reference implementation to ensure that the search for a record within a
  // bucket can properly early-out when it detects the record won't be found.
  // The algorithm used here corresponds to the function
  // caseInsensitiveComparePchPchCchCch in the reference implementation.
  parallelForEachN(0, IPHR_HASH, [&](size_t I) {
    auto B = HashRecords.begin() + BucketStarts[I];
    auto E = HashRecords.begin() + BucketCursors[I];
    if (B == E)
      return;
    auto BucketCmp = [Records](const PSHashRecord &LHash,
                               const PSHashRecord &RHash) {
      const BulkPublic &L = Records[uint32_t(LHash.Off)];
      const BulkPublic &R = Records[uint32_t(RHash.Off)];
      assert(L.BucketIdx == R.BucketIdx);
      int Cmp = gsiRecordCmp(L.getName(), R.getName());
      if (Cmp != 0)
        return Cmp < 0;
      // This comparison is necessary to make the sorting stable in the presence
      // of two static globals with the same name. The easiest way to observe
      // this is with S_LDATA32 records.
      return L.SymOffset < R.SymOffset;
    };
    llvm::sort(B, E, BucketCmp);

    // After we are done sorting, replace the global indices with the stream
    // offsets of each global. Add one when writing symbol offsets to disk.
    // See GSI1::fixSymRecs.
    for (PSHashRecord &HRec : make_range(B, E))
      HRec.Off = Records[uint32_t(HRec.Off)].SymOffset + 1;
  });

  // For each non-empty bucket, push the bucket start offset into HashBuckets
  // and set a bit in the hash bitmap.
  for (uint32_t I = 0; I < HashBitmap.size(); ++I) {
    uint32_t Word = 0;
    for (uint32_t J = 0; J < 32; ++J) {
      // Skip empty buckets.
      uint32_t BucketIdx = I * 32 + J;
      if (BucketIdx >= IPHR_HASH ||
          BucketStarts[BucketIdx] == BucketCursors[BucketIdx])
        continue;
      Word |= (1U << J);

      // Calculate what the offset of the first hash record in the chain would
      // be if it were inflated to contain 32-bit pointers. On a 32-bit system,
      // each record would be 12 bytes. See HROffsetCalc in gsi.h.
      const int SizeOfHROffsetCalc = 12;
      ulittle32_t ChainStartOff =
          ulittle32_t(BucketStarts[BucketIdx] * SizeOfHROffsetCalc);
      HashBuckets.push_back(ChainStartOff);
    }
    HashBitmap[I] = Word;
  }
}

GSIStreamBuilder::GSIStreamBuilder(msf::MSFBuilder &Msf)
    : Msf(Msf), PSH(std::make_unique<GSIHashStreamBuilder>()),
      GSH(std::make_unique<GSIHashStreamBuilder>()) {}

GSIStreamBuilder::~GSIStreamBuilder() {}

uint32_t GSIStreamBuilder::calculatePublicsHashStreamSize() const {
  uint32_t Size = 0;
  Size += sizeof(PublicsStreamHeader);
  Size += PSH->calculateSerializedLength();
  Size += Publics.size() * sizeof(uint32_t); // AddrMap
  // FIXME: Add thunk map and section offsets for incremental linking.

  return Size;
}

uint32_t GSIStreamBuilder::calculateGlobalsHashStreamSize() const {
  return GSH->calculateSerializedLength();
}

Error GSIStreamBuilder::finalizeMsfLayout() {
  // First we write public symbol records, then we write global symbol records.
  finalizePublicBuckets();
  finalizeGlobalBuckets(PSH->RecordByteSize);

  Expected<uint32_t> Idx = Msf.addStream(calculateGlobalsHashStreamSize());
  if (!Idx)
    return Idx.takeError();
  GlobalsStreamIndex = *Idx;

  Idx = Msf.addStream(calculatePublicsHashStreamSize());
  if (!Idx)
    return Idx.takeError();
  PublicsStreamIndex = *Idx;

  uint32_t RecordBytes = PSH->RecordByteSize + GSH->RecordByteSize;

  Idx = Msf.addStream(RecordBytes);
  if (!Idx)
    return Idx.takeError();
  RecordStreamIndex = *Idx;
  return Error::success();
}

void GSIStreamBuilder::addPublicSymbols(std::vector<BulkPublic> &&PublicsIn) {
  assert(Publics.empty() && PSH->RecordByteSize == 0 &&
         "publics can only be added once");
  Publics = std::move(PublicsIn);

  // Sort the symbols by name. PDBs contain lots of symbols, so use parallelism.
  parallelSort(Publics, [](const BulkPublic &L, const BulkPublic &R) {
    return L.getName() < R.getName();
  });

  // Assign offsets and calculate the length of the public symbol records.
  uint32_t SymOffset = 0;
  for (BulkPublic &Pub : Publics) {
    Pub.SymOffset = SymOffset;
    SymOffset += sizeOfPublic(Pub);
  }

  // Remember the length of the public stream records.
  PSH->RecordByteSize = SymOffset;
}

void GSIStreamBuilder::addGlobalSymbol(const ProcRefSym &Sym) {
  serializeAndAddGlobal(Sym);
}

void GSIStreamBuilder::addGlobalSymbol(const DataSym &Sym) {
  serializeAndAddGlobal(Sym);
}

void GSIStreamBuilder::addGlobalSymbol(const ConstantSym &Sym) {
  serializeAndAddGlobal(Sym);
}

template <typename T>
void GSIStreamBuilder::serializeAndAddGlobal(const T &Symbol) {
  T Copy(Symbol);
  addGlobalSymbol(SymbolSerializer::writeOneSymbol(Copy, Msf.getAllocator(),
                                                   CodeViewContainer::Pdb));
}

void GSIStreamBuilder::addGlobalSymbol(const codeview::CVSymbol &Symbol) {
  // Ignore duplicate typedefs and constants.
  if (Symbol.kind() == S_UDT || Symbol.kind() == S_CONSTANT) {
    auto Iter = GlobalsSeen.insert(Symbol);
    if (!Iter.second)
      return;
  }
  GSH->RecordByteSize += Symbol.length();
  Globals.push_back(Symbol);
}

// Serialize each public and write it.
static Error writePublics(BinaryStreamWriter &Writer,
                          ArrayRef<BulkPublic> Publics) {
  std::vector<uint8_t> Storage;
  for (const BulkPublic &Pub : Publics) {
    Storage.resize(sizeOfPublic(Pub));
    serializePublic(Storage.data(), Pub);
    if (Error E = Writer.writeBytes(Storage))
      return E;
  }
  return Error::success();
}

static Error writeRecords(BinaryStreamWriter &Writer,
                          ArrayRef<CVSymbol> Records) {
  BinaryItemStream<CVSymbol> ItemStream(support::endianness::little);
  ItemStream.setItems(Records);
  BinaryStreamRef RecordsRef(ItemStream);
  return Writer.writeStreamRef(RecordsRef);
}

Error GSIStreamBuilder::commitSymbolRecordStream(
    WritableBinaryStreamRef Stream) {
  BinaryStreamWriter Writer(Stream);

  // Write public symbol records first, followed by global symbol records.  This
  // must match the order that we assume in finalizeMsfLayout when computing
  // PSHZero and GSHZero.
  if (auto EC = writePublics(Writer, Publics))
    return EC;
  if (auto EC = writeRecords(Writer, Globals))
    return EC;

  return Error::success();
}

static std::vector<support::ulittle32_t>
computeAddrMap(ArrayRef<BulkPublic> Publics) {
  // Build a parallel vector of indices into the Publics vector, and sort it by
  // address.
  std::vector<ulittle32_t> PubAddrMap;
  PubAddrMap.reserve(Publics.size());
  for (int I = 0, E = Publics.size(); I < E; ++I)
    PubAddrMap.push_back(ulittle32_t(I));

  auto AddrCmp = [Publics](const ulittle32_t &LIdx, const ulittle32_t &RIdx) {
    const BulkPublic &L = Publics[LIdx];
    const BulkPublic &R = Publics[RIdx];
    if (L.Segment != R.Segment)
      return L.Segment < R.Segment;
    if (L.Offset != R.Offset)
      return L.Offset < R.Offset;
    // parallelSort is unstable, so we have to do name comparison to ensure
    // that two names for the same location come out in a deterministic order.
    return L.getName() < R.getName();
  };
  parallelSort(PubAddrMap, AddrCmp);

  // Rewrite the public symbol indices into symbol offsets.
  for (ulittle32_t &Entry : PubAddrMap)
    Entry = Publics[Entry].SymOffset;
  return PubAddrMap;
}

Error GSIStreamBuilder::commitPublicsHashStream(
    WritableBinaryStreamRef Stream) {
  BinaryStreamWriter Writer(Stream);
  PublicsStreamHeader Header;

  // FIXME: Fill these in. They are for incremental linking.
  Header.SymHash = PSH->calculateSerializedLength();
  Header.AddrMap = Publics.size() * 4;
  Header.NumThunks = 0;
  Header.SizeOfThunk = 0;
  Header.ISectThunkTable = 0;
  memset(Header.Padding, 0, sizeof(Header.Padding));
  Header.OffThunkTable = 0;
  Header.NumSections = 0;
  if (auto EC = Writer.writeObject(Header))
    return EC;

  if (auto EC = PSH->commit(Writer))
    return EC;

  std::vector<support::ulittle32_t> PubAddrMap = computeAddrMap(Publics);
  assert(PubAddrMap.size() == Publics.size());
  if (auto EC = Writer.writeArray(makeArrayRef(PubAddrMap)))
    return EC;

  return Error::success();
}

Error GSIStreamBuilder::commitGlobalsHashStream(
    WritableBinaryStreamRef Stream) {
  BinaryStreamWriter Writer(Stream);
  return GSH->commit(Writer);
}

Error GSIStreamBuilder::commit(const msf::MSFLayout &Layout,
                               WritableBinaryStreamRef Buffer) {
  auto GS = WritableMappedBlockStream::createIndexedStream(
      Layout, Buffer, getGlobalsStreamIndex(), Msf.getAllocator());
  auto PS = WritableMappedBlockStream::createIndexedStream(
      Layout, Buffer, getPublicsStreamIndex(), Msf.getAllocator());
  auto PRS = WritableMappedBlockStream::createIndexedStream(
      Layout, Buffer, getRecordStreamIndex(), Msf.getAllocator());

  if (auto EC = commitSymbolRecordStream(*PRS))
    return EC;
  if (auto EC = commitGlobalsHashStream(*GS))
    return EC;
  if (auto EC = commitPublicsHashStream(*PS))
    return EC;
  return Error::success();
}
first commit 2022-04-25 10:02:23 +02:00			`//===- DbiStreamBuilder.cpp - PDB Dbi Stream Creation ------------ C++ --===//`
			`//`
			`// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.`
			`// See https://llvm.org/LICENSE.txt for license information.`
			`// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception`
			`//`
			`//===----------------------------------------------------------------------===//`
			`//`
			`// The data structures defined in this file are based on the reference`
			`// implementation which is available at`
			`// https://github.com/Microsoft/microsoft-pdb/blob/master/PDB/dbi/gsi.cpp`
			`//`
			`//===----------------------------------------------------------------------===//`

			`#include "llvm/DebugInfo/PDB/Native/GSIStreamBuilder.h"`
			`#include "llvm/DebugInfo/CodeView/RecordName.h"`
			`#include "llvm/DebugInfo/CodeView/SymbolDeserializer.h"`
			`#include "llvm/DebugInfo/CodeView/SymbolRecord.h"`
			`#include "llvm/DebugInfo/CodeView/SymbolSerializer.h"`
			`#include "llvm/DebugInfo/MSF/MSFBuilder.h"`
			`#include "llvm/DebugInfo/MSF/MSFCommon.h"`
			`#include "llvm/DebugInfo/MSF/MappedBlockStream.h"`
			`#include "llvm/DebugInfo/PDB/Native/GlobalsStream.h"`
			`#include "llvm/DebugInfo/PDB/Native/Hash.h"`
			`#include "llvm/Support/BinaryItemStream.h"`
			`#include "llvm/Support/BinaryStreamWriter.h"`
			`#include "llvm/Support/Parallel.h"`
			`#include "llvm/Support/xxhash.h"`
			`#include <algorithm>`
			`#include <vector>`

			`using namespace llvm;`
			`using namespace llvm::msf;`
			`using namespace llvm::pdb;`
			`using namespace llvm::codeview;`

			`// Helper class for building the public and global PDB hash table buckets.`
			`struct llvm::pdb::GSIHashStreamBuilder {`
			`// Sum of the size of all public or global records.`
			`uint32_t RecordByteSize = 0;`

			`std::vector<PSHashRecord> HashRecords;`

			// The hash bitmap has `ceil((IPHR_HASH + 1) / 32)` words in it. The
			`// reference implementation builds a hash table with IPHR_HASH buckets in it.`
			`// The last bucket is used to link together free hash table cells in a linked`
			`// list, but it is always empty in the compressed, on-disk format. However,`
			`// the bitmap must have a bit for it.`
			`std::array<support::ulittle32_t, (IPHR_HASH + 32) / 32> HashBitmap;`

			`std::vector<support::ulittle32_t> HashBuckets;`

			`uint32_t calculateSerializedLength() const;`
			`Error commit(BinaryStreamWriter &Writer);`

			`void finalizePublicBuckets();`
			`void finalizeGlobalBuckets(uint32_t RecordZeroOffset);`

			`// Assign public and global symbol records into hash table buckets.`
			`// Modifies the list of records to store the bucket index, but does not`
			`// change the order.`
			`void finalizeBuckets(uint32_t RecordZeroOffset,`
			`MutableArrayRef<BulkPublic> Globals);`
			`};`

			`// DenseMapInfo implementation for deduplicating symbol records.`
			`struct llvm::pdb::SymbolDenseMapInfo {`
			`static inline CVSymbol getEmptyKey() {`
			`static CVSymbol Empty;`
			`return Empty;`
			`}`
			`static inline CVSymbol getTombstoneKey() {`
			`static CVSymbol Tombstone(`
			`DenseMapInfo<ArrayRef<uint8_t>>::getTombstoneKey());`
			`return Tombstone;`
			`}`
			`static unsigned getHashValue(const CVSymbol &Val) {`
			`return xxHash64(Val.RecordData);`
			`}`
			`static bool isEqual(const CVSymbol &LHS, const CVSymbol &RHS) {`
			`return LHS.RecordData == RHS.RecordData;`
			`}`
			`};`

			`namespace {`
			`LLVM_PACKED_START`
			`struct PublicSym32Layout {`
			`RecordPrefix Prefix;`
			`PublicSym32Header Pub;`
			`// char Name[];`
			`};`
			`LLVM_PACKED_END`
			`} // namespace`

			`// Calculate how much memory this public needs when serialized.`
			`static uint32_t sizeOfPublic(const BulkPublic &Pub) {`
			`uint32_t NameLen = Pub.NameLen;`
			`NameLen = std::min(NameLen,`
			`uint32_t(MaxRecordLength - sizeof(PublicSym32Layout) - 1));`
			`return alignTo(sizeof(PublicSym32Layout) + NameLen + 1, 4);`
			`}`

			`static CVSymbol serializePublic(uint8_t *Mem, const BulkPublic &Pub) {`
			`// Assume the caller has allocated sizeOfPublic bytes.`
			`uint32_t NameLen = std::min(`
			`Pub.NameLen, uint32_t(MaxRecordLength - sizeof(PublicSym32Layout) - 1));`
			`size_t Size = alignTo(sizeof(PublicSym32Layout) + NameLen + 1, 4);`
			`assert(Size == sizeOfPublic(Pub));`
			`auto FixedMem = reinterpret_cast<PublicSym32Layout >(Mem);`
			`FixedMem->Prefix.RecordKind = static_cast<uint16_t>(codeview::S_PUB32);`
			`FixedMem->Prefix.RecordLen = static_cast<uint16_t>(Size - 2);`
			`FixedMem->Pub.Flags = Pub.Flags;`
			`FixedMem->Pub.Offset = Pub.Offset;`
			`FixedMem->Pub.Segment = Pub.Segment;`
			`char NameMem = reinterpret_cast<char >(FixedMem + 1);`
			`memcpy(NameMem, Pub.Name, NameLen);`
			`// Zero the null terminator and remaining bytes.`
			`memset(&NameMem[NameLen], 0, Size - sizeof(PublicSym32Layout) - NameLen);`
			`return CVSymbol(makeArrayRef(reinterpret_cast<uint8_t *>(Mem), Size));`
			`}`

			`uint32_t GSIHashStreamBuilder::calculateSerializedLength() const {`
			`uint32_t Size = sizeof(GSIHashHeader);`
			`Size += HashRecords.size() * sizeof(PSHashRecord);`
			`Size += HashBitmap.size() * sizeof(uint32_t);`
			`Size += HashBuckets.size() * sizeof(uint32_t);`
			`return Size;`
			`}`

			`Error GSIHashStreamBuilder::commit(BinaryStreamWriter &Writer) {`
			`GSIHashHeader Header;`
			`Header.VerSignature = GSIHashHeader::HdrSignature;`
			`Header.VerHdr = GSIHashHeader::HdrVersion;`
			`Header.HrSize = HashRecords.size() * sizeof(PSHashRecord);`
			`Header.NumBuckets = HashBitmap.size() * 4 + HashBuckets.size() * 4;`

			`if (auto EC = Writer.writeObject(Header))`
			`return EC;`

			`if (auto EC = Writer.writeArray(makeArrayRef(HashRecords)))`
			`return EC;`
			`if (auto EC = Writer.writeArray(makeArrayRef(HashBitmap)))`
			`return EC;`
			`if (auto EC = Writer.writeArray(makeArrayRef(HashBuckets)))`
			`return EC;`
			`return Error::success();`
			`}`

			`static bool isAsciiString(StringRef S) {`
			`return llvm::all_of(S, [](char C) { return unsigned(C) < 0x80; });`
			`}`

			// See `caseInsensitiveComparePchPchCchCch` in gsi.cpp
			`static int gsiRecordCmp(StringRef S1, StringRef S2) {`
			`size_t LS = S1.size();`
			`size_t RS = S2.size();`
			`// Shorter strings always compare less than longer strings.`
			`if (LS != RS)`
			`return LS - RS;`

			`// If either string contains non ascii characters, memcmp them.`
			`if (LLVM_UNLIKELY(!isAsciiString(S1) \|\| !isAsciiString(S2)))`
			`return memcmp(S1.data(), S2.data(), LS);`

			`// Both strings are ascii, perform a case-insensitive comparison.`
			`return S1.compare_lower(S2.data());`
			`}`

			`void GSIStreamBuilder::finalizePublicBuckets() {`
			`PSH->finalizeBuckets(0, Publics);`
			`}`

			`void GSIStreamBuilder::finalizeGlobalBuckets(uint32_t RecordZeroOffset) {`
			`// Build up a list of globals to be bucketed. Use the BulkPublic data`
			`// structure for this purpose, even though these are global records, not`
			`// public records. Most of the same fields are required:`
			`// - Name`
			`// - NameLen`
			`// - SymOffset`
			`// - BucketIdx`
			`// The dead fields are Offset, Segment, and Flags.`
			`std::vector<BulkPublic> Records;`
			`Records.resize(Globals.size());`
			`uint32_t SymOffset = RecordZeroOffset;`
			`for (size_t I = 0, E = Globals.size(); I < E; ++I) {`
			`StringRef Name = getSymbolName(Globals[I]);`
			`Records[I].Name = Name.data();`
			`Records[I].NameLen = Name.size();`
			`Records[I].SymOffset = SymOffset;`
			`SymOffset += Globals[I].length();`
			`}`

			`GSH->finalizeBuckets(RecordZeroOffset, Records);`
			`}`

			`void GSIHashStreamBuilder::finalizeBuckets(`
			`uint32_t RecordZeroOffset, MutableArrayRef<BulkPublic> Records) {`
			`// Hash every name in parallel.`
			`parallelForEachN(0, Records.size(), [&](size_t I) {`
			`Records[I].setBucketIdx(hashStringV1(Records[I].Name) % IPHR_HASH);`
			`});`

			`// Count up the size of each bucket. Then, use an exclusive prefix sum to`
			`// calculate the bucket start offsets. This is C++17 std::exclusive_scan, but`
			`// we can't use it yet.`
			`uint32_t BucketStarts[IPHR_HASH] = {0};`
			`for (const BulkPublic &P : Records)`
			`++BucketStarts[P.BucketIdx];`
			`uint32_t Sum = 0;`
			`for (uint32_t &B : BucketStarts) {`
			`uint32_t Size = B;`
			`B = Sum;`
			`Sum += Size;`
			`}`

			`// Place globals into the hash table in bucket order. When placing a global,`
			`// update the bucket start. Every hash table slot should be filled. Always use`
			`// a refcount of one for now.`
			`HashRecords.resize(Records.size());`
			`uint32_t BucketCursors[IPHR_HASH];`
			`memcpy(BucketCursors, BucketStarts, sizeof(BucketCursors));`
			`for (int I = 0, E = Records.size(); I < E; ++I) {`
			`uint32_t HashIdx = BucketCursors[Records[I].BucketIdx]++;`
			`HashRecords[HashIdx].Off = I;`
			`HashRecords[HashIdx].CRef = 1;`
			`}`

			`// Within the buckets, sort each bucket by memcmp of the symbol's name. It's`
			`// important that we use the same sorting algorithm as is used by the`
			`// reference implementation to ensure that the search for a record within a`
			`// bucket can properly early-out when it detects the record won't be found.`
			`// The algorithm used here corresponds to the function`
			`// caseInsensitiveComparePchPchCchCch in the reference implementation.`
			`parallelForEachN(0, IPHR_HASH, [&](size_t I) {`
			`auto B = HashRecords.begin() + BucketStarts[I];`
			`auto E = HashRecords.begin() + BucketCursors[I];`
			`if (B == E)`
			`return;`
			`auto BucketCmp = [Records](const PSHashRecord &LHash,`
			`const PSHashRecord &RHash) {`
			`const BulkPublic &L = Records[uint32_t(LHash.Off)];`
			`const BulkPublic &R = Records[uint32_t(RHash.Off)];`
			`assert(L.BucketIdx == R.BucketIdx);`
			`int Cmp = gsiRecordCmp(L.getName(), R.getName());`
			`if (Cmp != 0)`
			`return Cmp < 0;`
			`// This comparison is necessary to make the sorting stable in the presence`
			`// of two static globals with the same name. The easiest way to observe`
			`// this is with S_LDATA32 records.`
			`return L.SymOffset < R.SymOffset;`
			`};`
			`llvm::sort(B, E, BucketCmp);`

			`// After we are done sorting, replace the global indices with the stream`
			`// offsets of each global. Add one when writing symbol offsets to disk.`
			`// See GSI1::fixSymRecs.`
			`for (PSHashRecord &HRec : make_range(B, E))`
			`HRec.Off = Records[uint32_t(HRec.Off)].SymOffset + 1;`
			`});`

			`// For each non-empty bucket, push the bucket start offset into HashBuckets`
			`// and set a bit in the hash bitmap.`
			`for (uint32_t I = 0; I < HashBitmap.size(); ++I) {`
			`uint32_t Word = 0;`
			`for (uint32_t J = 0; J < 32; ++J) {`
			`// Skip empty buckets.`
			`uint32_t BucketIdx = I * 32 + J;`
			`if (BucketIdx >= IPHR_HASH \|\|`
			`BucketStarts[BucketIdx] == BucketCursors[BucketIdx])`
			`continue;`
			`Word \|= (1U << J);`

			`// Calculate what the offset of the first hash record in the chain would`
			`// be if it were inflated to contain 32-bit pointers. On a 32-bit system,`
			`// each record would be 12 bytes. See HROffsetCalc in gsi.h.`
			`const int SizeOfHROffsetCalc = 12;`
			`ulittle32_t ChainStartOff =`
			`ulittle32_t(BucketStarts[BucketIdx] * SizeOfHROffsetCalc);`
			`HashBuckets.push_back(ChainStartOff);`
			`}`
			`HashBitmap[I] = Word;`
			`}`
			`}`

			`GSIStreamBuilder::GSIStreamBuilder(msf::MSFBuilder &Msf)`
			`: Msf(Msf), PSH(std::make_unique<GSIHashStreamBuilder>()),`
			`GSH(std::make_unique<GSIHashStreamBuilder>()) {}`

			`GSIStreamBuilder::~GSIStreamBuilder() {}`

			`uint32_t GSIStreamBuilder::calculatePublicsHashStreamSize() const {`
			`uint32_t Size = 0;`
			`Size += sizeof(PublicsStreamHeader);`
			`Size += PSH->calculateSerializedLength();`
			`Size += Publics.size() * sizeof(uint32_t); // AddrMap`
			`// FIXME: Add thunk map and section offsets for incremental linking.`

			`return Size;`
			`}`

			`uint32_t GSIStreamBuilder::calculateGlobalsHashStreamSize() const {`
			`return GSH->calculateSerializedLength();`
			`}`

			`Error GSIStreamBuilder::finalizeMsfLayout() {`
			`// First we write public symbol records, then we write global symbol records.`
			`finalizePublicBuckets();`
			`finalizeGlobalBuckets(PSH->RecordByteSize);`

			`Expected<uint32_t> Idx = Msf.addStream(calculateGlobalsHashStreamSize());`
			`if (!Idx)`
			`return Idx.takeError();`
			`GlobalsStreamIndex = *Idx;`

			`Idx = Msf.addStream(calculatePublicsHashStreamSize());`
			`if (!Idx)`
			`return Idx.takeError();`
			`PublicsStreamIndex = *Idx;`

			`uint32_t RecordBytes = PSH->RecordByteSize + GSH->RecordByteSize;`

			`Idx = Msf.addStream(RecordBytes);`
			`if (!Idx)`
			`return Idx.takeError();`
			`RecordStreamIndex = *Idx;`
			`return Error::success();`
			`}`

			`void GSIStreamBuilder::addPublicSymbols(std::vector<BulkPublic> &&PublicsIn) {`
			`assert(Publics.empty() && PSH->RecordByteSize == 0 &&`
			`"publics can only be added once");`
			`Publics = std::move(PublicsIn);`

			`// Sort the symbols by name. PDBs contain lots of symbols, so use parallelism.`
			`parallelSort(Publics, [](const BulkPublic &L, const BulkPublic &R) {`
			`return L.getName() < R.getName();`
			`});`

			`// Assign offsets and calculate the length of the public symbol records.`
			`uint32_t SymOffset = 0;`
			`for (BulkPublic &Pub : Publics) {`
			`Pub.SymOffset = SymOffset;`
			`SymOffset += sizeOfPublic(Pub);`
			`}`

			`// Remember the length of the public stream records.`
			`PSH->RecordByteSize = SymOffset;`
			`}`

			`void GSIStreamBuilder::addGlobalSymbol(const ProcRefSym &Sym) {`
			`serializeAndAddGlobal(Sym);`
			`}`

			`void GSIStreamBuilder::addGlobalSymbol(const DataSym &Sym) {`
			`serializeAndAddGlobal(Sym);`
			`}`

			`void GSIStreamBuilder::addGlobalSymbol(const ConstantSym &Sym) {`
			`serializeAndAddGlobal(Sym);`
			`}`

			`template <typename T>`
			`void GSIStreamBuilder::serializeAndAddGlobal(const T &Symbol) {`
			`T Copy(Symbol);`
			`addGlobalSymbol(SymbolSerializer::writeOneSymbol(Copy, Msf.getAllocator(),`
			`CodeViewContainer::Pdb));`
			`}`

			`void GSIStreamBuilder::addGlobalSymbol(const codeview::CVSymbol &Symbol) {`
			`// Ignore duplicate typedefs and constants.`
			`if (Symbol.kind() == S_UDT \|\| Symbol.kind() == S_CONSTANT) {`
			`auto Iter = GlobalsSeen.insert(Symbol);`
			`if (!Iter.second)`
			`return;`
			`}`
			`GSH->RecordByteSize += Symbol.length();`
			`Globals.push_back(Symbol);`
			`}`

			`// Serialize each public and write it.`
			`static Error writePublics(BinaryStreamWriter &Writer,`
			`ArrayRef<BulkPublic> Publics) {`
			`std::vector<uint8_t> Storage;`
			`for (const BulkPublic &Pub : Publics) {`
			`Storage.resize(sizeOfPublic(Pub));`
			`serializePublic(Storage.data(), Pub);`
			`if (Error E = Writer.writeBytes(Storage))`
			`return E;`
			`}`
			`return Error::success();`
			`}`

			`static Error writeRecords(BinaryStreamWriter &Writer,`
			`ArrayRef<CVSymbol> Records) {`
			`BinaryItemStream<CVSymbol> ItemStream(support::endianness::little);`
			`ItemStream.setItems(Records);`
			`BinaryStreamRef RecordsRef(ItemStream);`
			`return Writer.writeStreamRef(RecordsRef);`
			`}`

			`Error GSIStreamBuilder::commitSymbolRecordStream(`
			`WritableBinaryStreamRef Stream) {`
			`BinaryStreamWriter Writer(Stream);`

			`// Write public symbol records first, followed by global symbol records. This`
			`// must match the order that we assume in finalizeMsfLayout when computing`
			`// PSHZero and GSHZero.`
			`if (auto EC = writePublics(Writer, Publics))`
			`return EC;`
			`if (auto EC = writeRecords(Writer, Globals))`
			`return EC;`

			`return Error::success();`
			`}`

			`static std::vector<support::ulittle32_t>`
			`computeAddrMap(ArrayRef<BulkPublic> Publics) {`
			`// Build a parallel vector of indices into the Publics vector, and sort it by`
			`// address.`
			`std::vector<ulittle32_t> PubAddrMap;`
			`PubAddrMap.reserve(Publics.size());`
			`for (int I = 0, E = Publics.size(); I < E; ++I)`
			`PubAddrMap.push_back(ulittle32_t(I));`

			`auto AddrCmp = [Publics](const ulittle32_t &LIdx, const ulittle32_t &RIdx) {`
			`const BulkPublic &L = Publics[LIdx];`
			`const BulkPublic &R = Publics[RIdx];`
			`if (L.Segment != R.Segment)`
			`return L.Segment < R.Segment;`
			`if (L.Offset != R.Offset)`
			`return L.Offset < R.Offset;`
			`// parallelSort is unstable, so we have to do name comparison to ensure`
			`// that two names for the same location come out in a deterministic order.`
			`return L.getName() < R.getName();`
			`};`
			`parallelSort(PubAddrMap, AddrCmp);`

			`// Rewrite the public symbol indices into symbol offsets.`
			`for (ulittle32_t &Entry : PubAddrMap)`
			`Entry = Publics[Entry].SymOffset;`
			`return PubAddrMap;`
			`}`

			`Error GSIStreamBuilder::commitPublicsHashStream(`
			`WritableBinaryStreamRef Stream) {`
			`BinaryStreamWriter Writer(Stream);`
			`PublicsStreamHeader Header;`

			`// FIXME: Fill these in. They are for incremental linking.`
			`Header.SymHash = PSH->calculateSerializedLength();`
			`Header.AddrMap = Publics.size() * 4;`
			`Header.NumThunks = 0;`
			`Header.SizeOfThunk = 0;`
			`Header.ISectThunkTable = 0;`
			`memset(Header.Padding, 0, sizeof(Header.Padding));`
			`Header.OffThunkTable = 0;`
			`Header.NumSections = 0;`
			`if (auto EC = Writer.writeObject(Header))`
			`return EC;`

			`if (auto EC = PSH->commit(Writer))`
			`return EC;`

			`std::vector<support::ulittle32_t> PubAddrMap = computeAddrMap(Publics);`
			`assert(PubAddrMap.size() == Publics.size());`
			`if (auto EC = Writer.writeArray(makeArrayRef(PubAddrMap)))`
			`return EC;`

			`return Error::success();`
			`}`

			`Error GSIStreamBuilder::commitGlobalsHashStream(`
			`WritableBinaryStreamRef Stream) {`
			`BinaryStreamWriter Writer(Stream);`
			`return GSH->commit(Writer);`
			`}`

			`Error GSIStreamBuilder::commit(const msf::MSFLayout &Layout,`
			`WritableBinaryStreamRef Buffer) {`
			`auto GS = WritableMappedBlockStream::createIndexedStream(`
			`Layout, Buffer, getGlobalsStreamIndex(), Msf.getAllocator());`
			`auto PS = WritableMappedBlockStream::createIndexedStream(`
			`Layout, Buffer, getPublicsStreamIndex(), Msf.getAllocator());`
			`auto PRS = WritableMappedBlockStream::createIndexedStream(`
			`Layout, Buffer, getRecordStreamIndex(), Msf.getAllocator());`

			`if (auto EC = commitSymbolRecordStream(*PRS))`
			`return EC;`
			`if (auto EC = commitGlobalsHashStream(*GS))`
			`return EC;`
			`if (auto EC = commitPublicsHashStream(*PS))`
			`return EC;`
			`return Error::success();`
			`}`