From 028600d4a34f062625fa3306d17e1b5fa4957fd8 Mon Sep 17 00:00:00 2001 From: "Ertl, Stefan (DI FA SEA R&D-AT S7P)" Date: Thu, 21 May 2026 09:13:46 +0200 Subject: [PATCH 1/2] build-scripts/build_llvm.py: bump to llvm 22 --- build-scripts/build_llvm.py | 4 ++-- core/iwasm/compilation/aot_llvm.c | 7 ++++--- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/build-scripts/build_llvm.py b/build-scripts/build_llvm.py index e2221b8e54..29d7b91977 100755 --- a/build-scripts/build_llvm.py +++ b/build-scripts/build_llvm.py @@ -299,7 +299,7 @@ def main(): "arc": { "repo": "https://github.com/llvm/llvm-project.git", "repo_ssh": "git@github.com:llvm/llvm-project.git", - "branch": "release/18.x", + "branch": "release/22.x", }, "xtensa": { "repo": "https://github.com/espressif/llvm-project.git", @@ -309,7 +309,7 @@ def main(): "default": { "repo": "https://github.com/llvm/llvm-project.git", "repo_ssh": "git@github.com:llvm/llvm-project.git", - "branch": "llvmorg-18.1.8", + "branch": "llvmorg-22.1.6", }, } diff --git a/core/iwasm/compilation/aot_llvm.c b/core/iwasm/compilation/aot_llvm.c index e9f86817f0..b9d9aa56f2 100644 --- a/core/iwasm/compilation/aot_llvm.c +++ b/core/iwasm/compilation/aot_llvm.c @@ -2705,8 +2705,8 @@ aot_create_comp_context(const AOTCompData *comp_data, aot_comp_option_t option) #if WASM_ENABLE_LINUX_PERF != 0 if (wasm_runtime_get_linux_perf()) { /* FramePointerKind.All */ - LLVMMetadataRef val = - LLVMValueAsMetadata(LLVMConstInt(LLVMInt32Type(), 2, false)); + LLVMMetadataRef val = LLVMValueAsMetadata( + LLVMConstInt(LLVMInt32TypeInContext(comp_ctx->context), 2, false)); const char *key = "frame-pointer"; LLVMAddModuleFlag(comp_ctx->module, LLVMModuleFlagBehaviorWarning, key, strlen(key), val); @@ -2728,7 +2728,8 @@ aot_create_comp_context(const AOTCompData *comp_data, aot_comp_option_t option) LLVMAddModuleFlag( comp_ctx->module, LLVMModuleFlagBehaviorWarning, "Debug Info Version", strlen("Debug Info Version"), - LLVMValueAsMetadata(LLVMConstInt(LLVMInt32Type(), 3, false))); + LLVMValueAsMetadata( + LLVMConstInt(LLVMInt32TypeInContext(comp_ctx->context), 3, false))); comp_ctx->debug_file = dwarf_gen_file_info(comp_ctx); if (!comp_ctx->debug_file) { From 8793961b2dea785b38bb87872db13163d39e3795 Mon Sep 17 00:00:00 2001 From: Stefan Ertl Date: Wed, 10 Jun 2026 06:39:18 +0000 Subject: [PATCH 2/2] fix(aot): correctly pair RISC-V PCREL_HI20/LO12 relocations on LLVM 22 The RISC-V AOT relocation loader assumed that the R_RISCV_PCREL_LO12_I/S instruction always immediately follows its R_RISCV_PCREL_HI20 (AUIPC) at AUIPC+4. The HI20 handler patched both the AUIPC high 20 bits and the instruction at AUIPC+4, while the LO12 handler did nothing unless it sat exactly 4 bytes after the AUIPC (otherwise it took the unimplemented goto fail_addr_out_of_range path). This assumption holds for older toolchains but breaks with LLVM 22, whose instruction scheduler may place unrelated instructions between the AUIPC and its %pcrel_lo instruction (and may emit several %pcrel_lo accesses sharing a single AUIPC). The result was corrupted code: the HI20 handler clobbered the unrelated instruction at AUIPC+4, and the LO12 patch was skipped, leading to illegal-instruction faults or hangs at runtime (e.g. i32.ctz de Bruijn tables, switch/br_table jump tables, and any PC-relative rodata access). Pair the two relocations by address, the way a linker does: - Split R_RISCV_CALL/CALL_PLT (whose AUIPC+JALR pair really is adjacent) into its own case, leaving its AUIPC+4 patching unchanged. - R_RISCV_PCREL_HI20 now patches only the AUIPC high 20 bits and records the resolved PC-relative value keyed by the AUIPC address in a small thread-local most-recently-used cache. - R_RISCV_PCREL_LO12_I/S reconstructs the AUIPC address from symbol+addend, looks up the cached value, and patches its own instruction with the correct I-type (load/ADDI) or S-type (store) immediate. This works regardless of instruction placement and when multiple LO12 accesses share one AUIPC. The cache is thread-local so concurrent module loads do not interfere. Fixes the RISC-V64 AOT spec-test suite under QEMU/NuttX. --- core/iwasm/aot/arch/aot_reloc_riscv.c | 137 ++++++++++++++++++++------ 1 file changed, 106 insertions(+), 31 deletions(-) diff --git a/core/iwasm/aot/arch/aot_reloc_riscv.c b/core/iwasm/aot/arch/aot_reloc_riscv.c index 8df9f9f8ed..6339e125cc 100644 --- a/core/iwasm/aot/arch/aot_reloc_riscv.c +++ b/core/iwasm/aot/arch/aot_reloc_riscv.c @@ -366,6 +366,40 @@ check_reloc_offset(uint32 target_section_size, uint64 reloc_offset, return true; } +/** + * Cache used to pair an R_RISCV_PCREL_HI20 (AUIPC) relocation with its + * R_RISCV_PCREL_LO12_I/S relocation(s). + * + * The %pcrel_lo relocation references the label of the AUIPC instruction + * instead of the final target symbol, so its low 12 bits can only be computed + * from the PC-relative value that was resolved for the corresponding AUIPC. + * Older compilers always emitted the paired load/store/ADDI instruction + * immediately after the AUIPC, but newer LLVM versions may schedule unrelated + * instructions in between (and may emit several %pcrel_lo accesses for a single + * AUIPC). We therefore record the value resolved for each AUIPC here so that + * the LO12 relocation can recover the low 12 bits regardless of placement. + * + * Relocations of a section are applied sequentially and an AUIPC is always + * relocated before the LO12 relocation(s) that reference it (lower offset), so + * a small most-recently-used cache is sufficient. + */ +#define PCREL_HI20_CACHE_SIZE 8 + +typedef struct PcrelHi20Entry { + uint8 *auipc_addr; + int32 val; +} PcrelHi20Entry; + +#ifdef os_thread_local_attribute +#define RELOC_THREAD_LOCAL os_thread_local_attribute +#else +#define RELOC_THREAD_LOCAL +#endif + +static RELOC_THREAD_LOCAL PcrelHi20Entry + pcrel_hi20_cache[PCREL_HI20_CACHE_SIZE]; +static RELOC_THREAD_LOCAL uint32 pcrel_hi20_cache_pos; + bool apply_relocation(AOTModule *module, uint8 *target_section_addr, uint32 target_section_size, uint64 reloc_offset, @@ -414,8 +448,7 @@ apply_relocation(AOTModule *module, uint8 *target_section_addr, #endif case R_RISCV_CALL: - case R_RISCV_CALL_PLT: - case R_RISCV_PCREL_HI20: /* S + A - P */ + case R_RISCV_CALL_PLT: /* S + A - P */ { val = (int32)(intptr_t)((uint8 *)symbol_addr + reloc_addend - addr); @@ -437,17 +470,46 @@ apply_relocation(AOTModule *module, uint8 *target_section_addr, rv_calc_imm(val, &imm_hi, &imm_lo); rv_add_val((uint16 *)addr, (imm_hi << 12)); - if ((rv_get_val((uint16 *)(addr + 4)) & 0x7f) == RV_OPCODE_SW) { - /* Adjust imm for SW : S-type */ - val = (((int32)imm_lo >> 5) << 25) - + (((int32)imm_lo & 0x1f) << 7); + /* The JALR paired with the AUIPC of a call always immediately + * follows it and uses an I-type immediate. */ + rv_add_val((uint16 *)(addr + 4), ((int32)imm_lo << 20)); + break; + } + + case R_RISCV_PCREL_HI20: /* S + A - P */ + { + uint32 cache_idx; - rv_add_val((uint16 *)(addr + 4), val); + val = (int32)(intptr_t)((uint8 *)symbol_addr + reloc_addend - addr); + + CHECK_RELOC_OFFSET(sizeof(uint32)); + if (val != (intptr_t)((uint8 *)symbol_addr + reloc_addend - addr)) { + if (symbol_index >= 0) { + /* Reach the target by plt code */ + symbol_addr = (uint8 *)module->code + module->code_size + - get_plt_table_size() + + get_plt_item_size() * symbol_index; + val = (int32)(intptr_t)((uint8 *)symbol_addr - addr); + } } - else { - /* Adjust imm for MV(ADDI)/JALR : I-type */ - rv_add_val((uint16 *)(addr + 4), ((int32)imm_lo << 20)); + + if (val != (intptr_t)((uint8 *)symbol_addr + reloc_addend - addr)) { + goto fail_addr_out_of_range; } + + /* Patch the high 20 bits of the AUIPC only. The low 12 bits are + * patched separately by the R_RISCV_PCREL_LO12_I/S relocation, + * whose instruction is not guaranteed to immediately follow this + * AUIPC. */ + rv_calc_imm(val, &imm_hi, &imm_lo); + rv_add_val((uint16 *)addr, (imm_hi << 12)); + + /* Record the resolved PC-relative value so the matching LO12 + * relocation(s) can recover the low 12 bits. */ + cache_idx = pcrel_hi20_cache_pos % PCREL_HI20_CACHE_SIZE; + pcrel_hi20_cache[cache_idx].auipc_addr = addr; + pcrel_hi20_cache[cache_idx].val = val; + pcrel_hi20_cache_pos++; break; } @@ -470,29 +532,42 @@ apply_relocation(AOTModule *module, uint8 *target_section_addr, case R_RISCV_PCREL_LO12_I: /* S - P */ case R_RISCV_PCREL_LO12_S: /* S - P */ { - /* Already handled in R_RISCV_PCREL_HI20, it should be skipped for - * most cases. But it is still needed for some special cases, e.g. - * ``` - * label: - * auipc t0, %pcrel_hi(symbol) # R_RISCV_PCREL_HI20 (symbol) - * lui t1, 1 - * lw t2, t0, %pcrel_lo(label) # R_RISCV_PCREL_LO12_I (label) - * add t2, t2, t1 - * sw t2, t0, %pcrel_lo(label) # R_RISCV_PCREL_LO12_S (label) - * ``` - * In this case, the R_RISCV_PCREL_LO12_I/S relocation should be - * handled after R_RISCV_PCREL_HI20 relocation. - * - * So, if the R_RISCV_PCREL_LO12_I/S relocation is not followed by - * R_RISCV_PCREL_HI20 relocation, it should be handled here but - * not implemented yet. - */ - - if ((uintptr_t)addr - (uintptr_t)symbol_addr - - (uintptr_t)reloc_addend - != 4) { + /* A %pcrel_lo relocation references the label of its AUIPC + * (%pcrel_hi) instruction, transformed by the AOT compiler into + * the section symbol plus the AUIPC offset as the addend. The low + * 12 bits must be derived from the PC-relative value that was + * resolved when the AUIPC's R_RISCV_PCREL_HI20 relocation was + * applied. Look that value up by the AUIPC address; this works + * whether or not the LO12 instruction immediately follows the + * AUIPC and when several LO12 accesses share one AUIPC. */ + uint8 *auipc_addr = (uint8 *)symbol_addr + (intptr_t)reloc_addend; + uint32 k; + bool found = false; + + CHECK_RELOC_OFFSET(sizeof(uint32)); + + for (k = 0; k < PCREL_HI20_CACHE_SIZE; k++) { + if (pcrel_hi20_cache[k].auipc_addr == auipc_addr) { + val = pcrel_hi20_cache[k].val; + found = true; + break; + } + } + if (!found) { goto fail_addr_out_of_range; } + + rv_calc_imm(val, &imm_hi, &imm_lo); + if (reloc_type == R_RISCV_PCREL_LO12_S) { + /* Adjust imm for store : S-type */ + val = (((int32)imm_lo >> 5) << 25) + + (((int32)imm_lo & 0x1f) << 7); + rv_add_val((uint16 *)addr, val); + } + else { + /* Adjust imm for load/MV(ADDI) : I-type */ + rv_add_val((uint16 *)addr, ((int32)imm_lo << 20)); + } break; }