Skip to content

[WASM] wasm-ld: split up __wasm_apply_data_relocs #129007

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 11 additions & 4 deletions lld/wasm/InputChunks.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -361,12 +361,11 @@ uint64_t InputChunk::getVA(uint64_t offset) const {
// Generate code to apply relocations to the data section at runtime.
// This is only called when generating shared libraries (PIC) where address are
// not known at static link time.
bool InputChunk::generateRelocationCode(raw_ostream &os) const {
void InputChunk::generateRelocationCode(std::vector<std::string> &funcs) const {
LLVM_DEBUG(dbgs() << "generating runtime relocations: " << name
<< " count=" << relocations.size() << "\n");

bool is64 = ctx.arg.is64.value_or(false);
bool generated = false;
unsigned opcode_ptr_const = is64 ? WASM_OPCODE_I64_CONST
: WASM_OPCODE_I32_CONST;
unsigned opcode_ptr_add = is64 ? WASM_OPCODE_I64_ADD
Expand All @@ -385,6 +384,16 @@ bool InputChunk::generateRelocationCode(raw_ostream &os) const {
if (!requiresRuntimeReloc)
continue;

// https://www.w3.org/TR/wasm-js-api-2/#limits
// The maximum size of a function body, including locals declarations, is 7,654,321 bytes.
if (funcs.empty() || funcs.back().size() >= 7654321) {
funcs.emplace_back(std::string());
raw_string_ostream os(funcs.back());
writeUleb128(os, 0, "num locals");
}

raw_string_ostream os(funcs.back());

LLVM_DEBUG(dbgs() << "gen reloc: type=" << relocTypeToString(rel.Type)
<< " addend=" << rel.Addend << " index=" << rel.Index
<< " output offset=" << offset << "\n");
Expand Down Expand Up @@ -439,9 +448,7 @@ bool InputChunk::generateRelocationCode(raw_ostream &os) const {
writeU8(os, opcode_reloc_store, "I32_STORE");
writeUleb128(os, 2, "align");
writeUleb128(os, 0, "offset");
generated = true;
}
return generated;
}

// Split WASM_SEG_FLAG_STRINGS section. Such a section is a sequence of
Expand Down
2 changes: 1 addition & 1 deletion lld/wasm/InputChunks.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ class InputChunk {

size_t getNumRelocations() const { return relocations.size(); }
void writeRelocations(llvm::raw_ostream &os) const;
bool generateRelocationCode(raw_ostream &os) const;
void generateRelocationCode(std::vector<std::string> &funcs) const;

bool isTLS() const { return flags & llvm::wasm::WASM_SEG_FLAG_TLS; }
bool isRetained() const { return flags & llvm::wasm::WASM_SEG_FLAG_RETAIN; }
Expand Down
2 changes: 2 additions & 0 deletions lld/wasm/SyntheticSections.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,8 @@ void FunctionSection::writeBody() {
void FunctionSection::addFunction(InputFunction *func) {
if (!func->live)
return;
if (func->hasFunctionIndex())
return;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm curious why this was needed?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe assert(!hasFunctionIndex()); was being triggered in setFunctionIndex, and if (func->hasFunctionIndex()) return; was introduced to keep program termination from occurring.

void InputFunction::setFunctionIndex(uint32_t index) {                                                                                                 
  LLVM_DEBUG(dbgs() << "InputFunction::setFunctionIndex: " << name << " -> "                                                                           
                    << index << "\n");                                                                                                                 
  assert(!hasFunctionIndex());                                                                                                                         
  functionIndex = index;                                                                                                                               
}  

Copy link
Author

@dmjio dmjio Feb 28, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, it's described below

https://github.com/llvm/llvm-project/pull/129007/files#diff-e826be2acc8b58c5d040525dc8a509e90810d3edcd93190d4810e476919ef9aaR1509-R1513

addFunction or markLive is setting the index already it seems. So this explains the if (func->hasFunctionIndex()) return; (since the assert invariant no longer holds)

uint32_t functionIndex =
out.importSec->getNumImportedFunctions() + inputFunctions.size();
inputFunctions.emplace_back(func);
Expand Down
74 changes: 59 additions & 15 deletions lld/wasm/Writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1459,20 +1459,21 @@ void Writer::createStartFunction() {
void Writer::createApplyDataRelocationsFunction() {
LLVM_DEBUG(dbgs() << "createApplyDataRelocationsFunction\n");
// First write the body's contents to a string.
std::string bodyContent;
std::vector<std::string> funcs;
{
raw_string_ostream os(bodyContent);
writeUleb128(os, 0, "num locals");
bool generated = false;
for (const OutputSegment *seg : segments)
if (!ctx.arg.sharedMemory || !seg->isTLS())
for (const InputChunk *inSeg : seg->inputSegments)
generated |= inSeg->generateRelocationCode(os);
inSeg->generateRelocationCode(funcs);
}

if (!generated) {
LLVM_DEBUG(dbgs() << "skipping empty __wasm_apply_data_relocs\n");
return;
}
if (funcs.empty()) {
LLVM_DEBUG(dbgs() << "skipping empty __wasm_apply_data_relocs\n");
return;
}

for (auto &func : funcs) {
raw_string_ostream os(func);
writeU8(os, WASM_OPCODE_END, "END");
}

Expand All @@ -1485,24 +1486,67 @@ void Writer::createApplyDataRelocationsFunction() {
make<SyntheticFunction>(nullSignature, "__wasm_apply_data_relocs"));
def->markLive();

createFunction(def, bodyContent);
if (funcs.size() == 1) {
createFunction(def, funcs.back());
return;
}

std::string body;
{
raw_string_ostream os(body);
writeUleb128(os, 0, "num locals");

for (std::size_t i = 0; i < funcs.size(); ++i) {
auto &name =
*make<std::string>("__wasm_apply_data_relocs_" + std::to_string(i));
auto *func = make<SyntheticFunction>(nullSignature, name);
auto *def = symtab->addSyntheticFunction(
name, WASM_SYMBOL_VISIBILITY_HIDDEN, func);
def->markLive();
// Normally this shouldn't be called manually for a synthetic
// function, since the function indices in
// ctx.syntheticFunctions will be calculated later (check
// functionSec->addFunction call hierarchy for details).
// However, at this point we already need the correct index. The
// solution is to place the new synthetic function eagerly, and
// also making addFunction idempotent by skipping when there's
// already a function index.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess that alternative to this would be to somehow generate relocations here.

I wonder how this works for other synthetic functions that can call each such as __wasm_init_memory?

out.functionSec->addFunction(func);
createFunction(def, funcs[i]);

writeU8(os, WASM_OPCODE_CALL, "CALL");
writeUleb128(os, def->getFunctionIndex(), "function index");
}

writeU8(os, WASM_OPCODE_END, "END");
}
createFunction(def, body);
}

void Writer::createApplyTLSRelocationsFunction() {
LLVM_DEBUG(dbgs() << "createApplyTLSRelocationsFunction\n");
std::string bodyContent;
std::vector<std::string> funcs;
{
raw_string_ostream os(bodyContent);
writeUleb128(os, 0, "num locals");
for (const OutputSegment *seg : segments)
if (seg->isTLS())
for (const InputChunk *inSeg : seg->inputSegments)
inSeg->generateRelocationCode(os);
inSeg->generateRelocationCode(funcs);
}

if (funcs.empty()) {
funcs.emplace_back(std::string());
raw_string_ostream os(funcs.back());
writeUleb128(os, 0, "num locals");
}

for (auto &func : funcs) {
raw_string_ostream os(func);
writeU8(os, WASM_OPCODE_END, "END");
}

createFunction(WasmSym::applyTLSRelocs, bodyContent);
assert(funcs.size() == 1);

createFunction(WasmSym::applyTLSRelocs, funcs.back());
}

// Similar to createApplyDataRelocationsFunction but generates relocation code
Expand Down