diff --git a/Makefile b/Makefile index d21f1cada..ab23518e5 100644 --- a/Makefile +++ b/Makefile @@ -114,6 +114,7 @@ DEF-arm64-FreeBSD = $(DEF-arm64) -DTARGETOS_FreeBSD DEF-arm64-NetBSD = $(DEF-arm64) -DTARGETOS_NetBSD DEF-arm64-OpenBSD = $(DEF-arm64) -DTARGETOS_OpenBSD DEF-riscv64 = -DTCC_TARGET_RISCV64 +DEF-riscv32 = -DTCC_TARGET_RISCV32 DEF-c67 = -DTCC_TARGET_C67 -w # disable warnigs DEF-x86_64-FreeBSD = $(DEF-x86_64) -DTARGETOS_FreeBSD DEF-x86_64-NetBSD = $(DEF-x86_64) -DTARGETOS_NetBSD @@ -131,7 +132,7 @@ all: $(PROGS) $(TCCLIBS) $(TCCDOCS) # cross compiler targets to build TCC_X = i386 x86_64 i386-win32 x86_64-win32 x86_64-osx arm arm64 arm-wince c67 -TCC_X += riscv64 arm64-osx +TCC_X += riscv64 riscv32 arm64-osx # TCC_X += arm-fpa arm-fpa-ld arm-vfp arm-eabi # cross libtcc1.a targets to build @@ -189,6 +190,7 @@ TRIPLET-x86_64 ?= x86_64-linux-gnu TRIPLET-arm ?= arm-linux-gnueabi TRIPLET-arm64 ?= aarch64-linux-gnu TRIPLET-riscv64 ?= riscv64-linux-gnu +TRIPLET-riscv32 ?= riscv32-linux-gnu MARCH-i386 ?= i386-linux-gnu MARCH-$T ?= $(TRIPLET-$T) TR = $(if $(TRIPLET-$T),$T,ignored) @@ -216,6 +218,7 @@ arm64_FILES = $(CORE_FILES) arm64-gen.c arm64-link.c arm64-asm.c arm64-osx_FILES = $(arm64_FILES) tccmacho.c c67_FILES = $(CORE_FILES) c67-gen.c c67-link.c tcccoff.c riscv64_FILES = $(CORE_FILES) riscv64-gen.c riscv64-link.c riscv64-asm.c +riscv32_FILES = $(CORE_FILES) riscv32-gen.c riscv32-link.c riscv32-asm.c TCCDEFS_H$(subst yes,,$(CONFIG_predefs)) = tccdefs_.h diff --git a/configure b/configure index c1abffc93..030db59ab 100755 --- a/configure +++ b/configure @@ -348,6 +348,9 @@ case "$cpu" in riscv64) cpu="riscv64" ;; + riscv32) + cpu="riscv32" + ;; *) echo "Unsupported CPU" exit 1 @@ -636,7 +639,7 @@ cat >$TMPH <> 3) #define _tcc_align(addr,type) (((unsigned long)addr + __alignof__(type) - 1) \ diff --git a/lib/Makefile b/lib/Makefile index 5357e25fd..515f0ac4a 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -39,6 +39,7 @@ I386_O = libtcc1.o $(COMMON_O) X86_64_O = libtcc1.o $(COMMON_O) ARM_O = libtcc1.o armeabi.o armflush.o $(COMMON_O) ARM64_O = lib-arm64.o $(COMMON_O) +RISCV32_O = libtcc1.o stdatomic.o builtin.o alloca.o RISCV64_O = lib-arm64.o $(COMMON_O) COMMON_O = stdatomic.o atomic.o builtin.o alloca.o alloca-bt.o WIN_O = crt1.o crt1w.o wincrt1.o wincrt1w.o dllcrt1.o dllmain.o @@ -72,6 +73,7 @@ OBJ-arm-vfp = $(OBJ-arm) OBJ-arm-eabi = $(OBJ-arm) OBJ-arm-eabihf = $(OBJ-arm) OBJ-arm-wince = $(ARM_O) $(WIN_O) +OBJ-riscv32 = $(RISCV32_O) $(LIN_O) OBJ-riscv64 = $(RISCV64_O) $(LIN_O) OBJ-extra = $(filter $(EXTRA_O),$(OBJ-$T)) diff --git a/libtcc.c b/libtcc.c index 171e36226..b7b1f90cc 100644 --- a/libtcc.c +++ b/libtcc.c @@ -53,6 +53,10 @@ #include "riscv64-gen.c" #include "riscv64-link.c" #include "riscv64-asm.c" +#elif defined(TCC_TARGET_RISCV32) +#include "riscv32-gen.c" +#include "riscv32-link.c" +#include "riscv32-asm.c" #else #error unknown target #endif @@ -1731,6 +1735,9 @@ static const FlagDef options_m[] = { { offsetof(TCCState, ms_bitfields), 0, "ms-bitfields" }, #ifdef TCC_TARGET_X86_64 { offsetof(TCCState, nosse), FD_INVERT, "sse" }, +#endif +#ifdef TCC_TARGET_RISCV32 + { offsetof(TCCState, fpu), 0, "fpu" }, #endif { 0, 0, NULL } }; @@ -1783,6 +1790,8 @@ static const char dumpmachine_str[] = "aarch64" #elif defined TCC_TARGET_RISCV64 "riscv64" +#elif defined TCC_TARGET_RISCV32 + "riscv32" #endif "-" #ifdef TCC_TARGET_PE diff --git a/riscv32-asm.c b/riscv32-asm.c new file mode 100644 index 000000000..7a5bdb348 --- /dev/null +++ b/riscv32-asm.c @@ -0,0 +1,2628 @@ +/*************************************************************/ +/* + * RISCV32 assembler (based on RISCV64) for TCC + * + */ + +#ifdef TARGET_DEFS_ONLY + +#define CONFIG_TCC_ASM +/* 32 general purpose + 32 floating point registers */ +#define NB_ASM_REGS 64 + +ST_FUNC void g(int c); +ST_FUNC void gen_le16(int c); +ST_FUNC void gen_le32(int c); + +/*************************************************************/ +#else +/*************************************************************/ +#define USING_GLOBALS +#include "tcc.h" + +enum { + OPT_REG, + OPT_IM12S, + OPT_IM32, +}; +// Registers go from 0 to 31. We use next bit to choose general/float +#define REG_FLOAT_MASK 0x20 +#define REG_IS_FLOAT(register_index) ((register_index) & REG_FLOAT_MASK) +#define REG_VALUE(register_index) ((register_index) & (REG_FLOAT_MASK-1)) +#define C_ENCODE_RS1(register_index) (REG_VALUE(register_index) << 7) +#define C_ENCODE_RS2(register_index) (REG_VALUE(register_index) << 2) +#define ENCODE_RD(register_index) (REG_VALUE(register_index) << 7) +#define ENCODE_RS1(register_index) (REG_VALUE(register_index) << 15) +#define ENCODE_RS2(register_index) (REG_VALUE(register_index) << 20) +#define NTH_BIT(b, n) ((b >> n) & 1) +#define OP_IM12S (1 << OPT_IM12S) +#define OP_IM32 (1 << OPT_IM32) +#define OP_REG (1 << OPT_REG) + +typedef struct Operand { + uint32_t type; + union { + uint8_t reg; + uint16_t regset; + ExprValue e; + }; +} Operand; + +static const Operand zero = { OP_REG, { 0 }}; +static const Operand ra = { OP_REG, { 1 }}; +static const Operand zimm = { OP_IM12S }; + +static void asm_binary_opcode(TCCState* s1, int token); +ST_FUNC void asm_clobber(uint8_t *clobber_regs, const char *str); +ST_FUNC void asm_compute_constraints(ASMOperand *operands, int nb_operands, int nb_outputs, const uint8_t *clobber_regs, int *pout_reg); +static void asm_emit_a(int token, uint32_t opcode, const Operand *rs1, const Operand *rs2, const Operand *rd1, int aq, int rl); +static void asm_emit_b(int token, uint32_t opcode, const Operand *rs1, const Operand *rs2, const Operand *imm); +static void asm_emit_i(int token, uint32_t opcode, const Operand *rd, const Operand *rs1, const Operand *rs2); +static void asm_emit_j(int token, uint32_t opcode, const Operand *rd, const Operand *rs2); +static void asm_emit_opcode(uint32_t opcode); +static void asm_emit_r(int token, uint32_t opcode, const Operand *rd, const Operand *rs1, const Operand *rs2); +static void asm_emit_s(int token, uint32_t opcode, const Operand *rs1, const Operand *rs2, const Operand *imm); +static void asm_emit_u(int token, uint32_t opcode, const Operand *rd, const Operand *rs2); +static void asm_emit_f(int token, uint32_t opcode, const Operand *rd, const Operand *rs1, const Operand *rs2); +static void asm_emit_fb(int token, uint32_t opcode, const Operand *rd, const Operand *rs); +static void asm_emit_fq(int token, uint32_t opcode, const Operand *rd, const Operand *rs1, const Operand *rs2, const Operand *rs3); +ST_FUNC void asm_gen_code(ASMOperand *operands, int nb_operands, int nb_outputs, int is_output, uint8_t *clobber_regs, int out_reg); +static void asm_nullary_opcode(TCCState *s1, int token); +ST_FUNC void asm_opcode(TCCState *s1, int token); +static int asm_parse_csrvar(int t); +ST_FUNC int asm_parse_regvar(int t); +static void asm_ternary_opcode(TCCState *s1, int token); +static void asm_unary_opcode(TCCState *s1, int token); +static void asm_branch_opcode(TCCState *s1, int token, int argc); +ST_FUNC void gen_expr32(ExprValue *pe); +static void parse_operand(TCCState *s1, Operand *op); +static void parse_branch_offset_operand(TCCState *s1, Operand *op); +static void parse_operands(TCCState *s1, Operand *ops, int count); +static void parse_mem_access_operands(TCCState *s1, Operand* ops); +ST_FUNC void subst_asm_operand(CString *add_str, SValue *sv, int modifier); +/* C extension */ +static void asm_emit_ca(int token, uint16_t opcode, const Operand *rd, const Operand *rs2); +static void asm_emit_cb(int token, uint16_t opcode, const Operand *rs1, const Operand *imm); +static void asm_emit_ci(int token, uint16_t opcode, const Operand *rd, const Operand *imm); +static void asm_emit_ciw(int token, uint16_t opcode, const Operand *rd, const Operand *imm); +static void asm_emit_cj(int token, uint16_t opcode, const Operand *imm); +static void asm_emit_cl(int token, uint16_t opcode, const Operand *rd, const Operand *rs1, const Operand *imm); +static void asm_emit_cr(int token, uint16_t opcode, const Operand *rd, const Operand *rs2); +static void asm_emit_cs(int token, uint16_t opcode, const Operand *rs2, const Operand *rs1, const Operand *imm); +static void asm_emit_css(int token, uint16_t opcode, const Operand *rs2, const Operand *imm); + +/* XXX: make it faster ? */ +ST_FUNC void g(int c) +{ + int ind1; + if (nocode_wanted) + return; + ind1 = ind + 1; + if (ind1 > cur_text_section->data_allocated) + section_realloc(cur_text_section, ind1); + cur_text_section->data[ind] = c; + ind = ind1; +} + +ST_FUNC void gen_le16 (int i) +{ + g(i); + g(i>>8); +} + +ST_FUNC void gen_le32 (int i) +{ + int ind1; + if (nocode_wanted) + return; + ind1 = ind + 4; + if (ind1 > cur_text_section->data_allocated) + section_realloc(cur_text_section, ind1); + cur_text_section->data[ind++] = i & 0xFF; + cur_text_section->data[ind++] = (i >> 8) & 0xFF; + cur_text_section->data[ind++] = (i >> 16) & 0xFF; + cur_text_section->data[ind++] = (i >> 24) & 0xFF; +} + +ST_FUNC void gen_expr32(ExprValue *pe) +{ + gen_le32(pe->v); +} + +static void asm_emit_opcode(uint32_t opcode) { + gen_le32(opcode); +} + +static void asm_nullary_opcode(TCCState *s1, int token) +{ + switch (token) { + // Sync instructions + + case TOK_ASM_fence_i: // I + asm_emit_opcode((0x3 << 2) | 3| (1 << 12)); + return; + + // System calls + + case TOK_ASM_ecall: // I (pseudo) + asm_emit_opcode((0x1C << 2) | 3 | (0 << 12)); + return; + case TOK_ASM_ebreak: // I (pseudo) + asm_emit_opcode((0x1C << 2) | 3 | (0 << 12) | (1 << 20)); + return; + + // Other + + case TOK_ASM_nop: + asm_emit_i(token, (4 << 2) | 3, &zero, &zero, &zimm); + return; + + case TOK_ASM_wfi: + asm_emit_opcode((0x1C << 2) | 3 | (0x105 << 20)); + return; + + /* Pseudoinstructions */ + case TOK_ASM_ret: + /* jalr zero, x1, 0 */ + asm_emit_opcode( 0x67 | (0 << 12) | ENCODE_RS1(1) ); + return; + + /* C extension */ + case TOK_ASM_c_ebreak: + asm_emit_cr(token, 2 | (9 << 12), &zero, &zero); + return; + case TOK_ASM_c_nop: + asm_emit_ci(token, 1, &zero, &zimm); + return; + + default: + expect("nullary instruction"); + } +} + +/* Parse a text containing operand and store the result in OP */ +static void parse_operand(TCCState *s1, Operand *op) +{ + ExprValue e = {0}; + Sym label = {0}; + int8_t reg; + + op->type = 0; + + if ((reg = asm_parse_regvar(tok)) != -1) { + next(); // skip register name + op->type = OP_REG; + op->reg = (uint8_t) reg; + return; + } else if (tok == '$') { + /* constant value */ + next(); // skip '#' or '$' + } else if ((e.v = asm_parse_csrvar(tok)) != -1) { + next(); + } else { + asm_expr(s1, &e); + } + op->type = OP_IM32; + op->e = e; + /* compare against unsigned 12-bit maximum */ + if (!op->e.sym) { + if ((int) op->e.v >= -0x1000 && (int) op->e.v < 0x1000) + op->type = OP_IM12S; + } else if (op->e.sym->type.t & (VT_EXTERN | VT_STATIC)) { + /* see also: "RISC-V ABIs Specification" V1.0 + + section 5.2 recommends using a GOT for + "possibly-undefined weak symbols" + + section 5.3: "Medium position independent code model" + if this is a non-local symbol: use a GOT + non-local: outside of a pc-relative +- 2 GiB range + */ + + label.type.t = VT_VOID | VT_STATIC; + + /* use the medium PIC model: GOT, auipc, lw */ + if (op->e.sym->type.t & VT_STATIC) + greloca(cur_text_section, op->e.sym, ind, R_RISCV_PCREL_HI20, 0); + else + greloca(cur_text_section, op->e.sym, ind, R_RISCV_GOT_HI20, 0); + put_extern_sym(&label, cur_text_section, ind, 0); + greloca(cur_text_section, &label, ind+4, R_RISCV_PCREL_LO12_I, 0); + + op->type = OP_IM12S; + op->e.v = 0; + } else { + expect("operand"); + } +} + +static void parse_branch_offset_operand(TCCState *s1, Operand *op){ + ExprValue e = {0}; + + asm_expr(s1, &e); + op->type = OP_IM32; + op->e = e; + /* compare against unsigned 12-bit maximum */ + if (!op->e.sym) { + if ((int) op->e.v >= -0x1000 && (int) op->e.v < 0x1000) + op->type = OP_IM12S; + } else if (op->e.sym->type.t & (VT_EXTERN | VT_STATIC)) { + greloca(cur_text_section, op->e.sym, ind, R_RISCV_BRANCH, 0); + + /* XXX: Implement far branches */ + + op->type = OP_IM12S; + op->e.v = 0; + } else { + expect("operand"); + } +} + +static void parse_jump_offset_operand(TCCState *s1, Operand *op){ + ExprValue e = {0}; + + asm_expr(s1, &e); + op->type = OP_IM32; + op->e = e; + /* compare against unsigned 12-bit maximum */ + if (!op->e.sym) { + if ((int) op->e.v >= -0x1000 && (int) op->e.v < 0x1000) + op->type = OP_IM12S; + } else if (op->e.sym->type.t & (VT_EXTERN | VT_STATIC)) { + greloca(cur_text_section, op->e.sym, ind, R_RISCV_JAL, 0); + op->type = OP_IM12S; + op->e.v = 0; + } else { + expect("operand"); + } +} + +static void parse_operands(TCCState *s1, Operand* ops, int count){ + int i; + for (i = 0; i < count; i++) { + if ( i != 0 ) + skip(','); + parse_operand(s1, &ops[i]); + } +} + +/* parse `X, imm(Y)` to {X, Y, imm} operands */ +static void parse_mem_access_operands(TCCState *s1, Operand* ops){ + + Operand op; + + parse_operand(s1, &ops[0]); + skip(','); + if ( tok == '(') { + /* `X, (Y)` case*/ + next(); + parse_operand(s1, &ops[1]); + skip(')'); + ops[2] = zimm; + } else { + parse_operand(s1, &ops[2]); + if ( tok == '('){ + /* `X, imm(Y)` case*/ + next(); + parse_operand(s1, &ops[1]); + skip(')'); + } else { + /* `X, Y` case*/ + /* we parsed Y thinking it was imm, swap and default imm to zero */ + op = ops[2]; + ops[1] = ops[2]; + ops[2] = op; + ops[2] = zimm; + } + } +} + +/* This is special: First operand is optional */ +static void asm_jal_opcode(TCCState *s1, int token){ + Operand ops[2]; + + if (token == TOK_ASM_j ){ + ops[0] = zero; // j offset + } else if (asm_parse_regvar(tok) == -1) { + ops[0] = ra; // jal offset + } else { + // jal reg, offset + parse_operand(s1, &ops[0]); + if ( tok == ',') next(); else expect("','"); + } + parse_jump_offset_operand(s1, &ops[1]); + asm_emit_j(token, 0x6f, &ops[0], &ops[1]); +} + +/* This is special: It can be a pseudointruction or a instruction */ +static void asm_jalr_opcode(TCCState *s1, int token){ + Operand ops[3]; + Operand op; + + parse_operand(s1, &ops[0]); + if ( tok == ',') + next(); + else { + /* no more operands, it's the pseudoinstruction: + * jalr rs + * Expand to: + * jalr ra, 0(rs) + */ + asm_emit_i(token, 0x67 | (0 << 12), &ra, &ops[0], &zimm); + return; + } + + if ( tok == '(') { + /* `X, (Y)` case*/ + next(); + parse_operand(s1, &ops[1]); + skip(')'); + ops[2] = zimm; + } else { + parse_operand(s1, &ops[2]); + if ( tok == '('){ + /* `X, imm(Y)` case*/ + next(); + parse_operand(s1, &ops[1]); + skip(')'); + } else { + /* `X, Y` case*/ + /* we parsed Y thinking it was imm, swap and default imm to zero */ + op = ops[2]; + ops[1] = ops[2]; + ops[2] = op; + ops[2] = zimm; + } + } + /* jalr(RD, RS1, IMM); I-format */ + asm_emit_i(token, 0x67 | (0 << 12), &ops[0], &ops[1], &ops[2]); +} + + +static void asm_unary_opcode(TCCState *s1, int token) +{ + uint32_t opcode = (0x1C << 2) | 3 | (2 << 12); + Operand op; + + parse_operands(s1, &op, 1); + /* Note: Those all map to CSR--so they are pseudo-instructions. */ + opcode |= ENCODE_RD(op.reg); + + switch (token) { + /* pseudoinstructions */ + case TOK_ASM_rdcycle: + asm_emit_opcode(opcode | (0xC00 << 20)); + return; + case TOK_ASM_rdcycleh: + asm_emit_opcode(opcode | (0xC80 << 20)); + return; + case TOK_ASM_rdtime: + asm_emit_opcode(opcode | (0xC01 << 20) | ENCODE_RD(op.reg)); + return; + case TOK_ASM_rdtimeh: + asm_emit_opcode(opcode | (0xC81 << 20) | ENCODE_RD(op.reg)); + return; + case TOK_ASM_rdinstret: + asm_emit_opcode(opcode | (0xC02 << 20) | ENCODE_RD(op.reg)); + return; + case TOK_ASM_rdinstreth: + asm_emit_opcode(opcode | (0xC82 << 20) | ENCODE_RD(op.reg)); + return; + case TOK_ASM_frflags: + asm_emit_opcode(opcode | (0x001 << 20) | ENCODE_RD(op.reg)); + return; + case TOK_ASM_frrm: + asm_emit_opcode(opcode | (0x002 << 20) | ENCODE_RD(op.reg)); + return; + case TOK_ASM_frcsr: + asm_emit_opcode(opcode | (0x003 << 20) | ENCODE_RD(op.reg)); + return; + + case TOK_ASM_jr: + /* jalr zero, 0(rs)*/ + asm_emit_i(token, 0x67 | (0 << 12), &zero, &op, &zimm); + return; + case TOK_ASM_call: + /* auipc ra, 0 */ + greloca(cur_text_section, op.e.sym, ind, R_RISCV_CALL, 0); + asm_emit_opcode(3 | (5 << 2) | ENCODE_RD(1)); + /* jalr zero, 0(ra) */ + asm_emit_opcode(0x67 | (0 << 12) | ENCODE_RS1(1)); + return; + case TOK_ASM_tail: + /* auipc x6, 0 */ + greloca(cur_text_section, op.e.sym, ind, R_RISCV_CALL, 0); + asm_emit_opcode(3 | (5 << 2) | ENCODE_RD(6)); + /* jalr zero, 0(x6) */ + asm_emit_opcode(0x67 | (0 << 12) | ENCODE_RS1(6)); + return; + + /* C extension */ + case TOK_ASM_c_j: + asm_emit_cj(token, 1 | (5 << 13), &op); + return; + case TOK_ASM_c_jal: /* RV32C-only */ + asm_emit_cj(token, 1 | (1 << 13), &op); + return; + case TOK_ASM_c_jalr: + asm_emit_cr(token, 2 | (9 << 12), &op, &zero); + return; + case TOK_ASM_c_jr: + asm_emit_cr(token, 2 | (8 << 12), &op, &zero); + return; + + default: + expect("unary instruction"); + } +} + +static void asm_emit_u(int token, uint32_t opcode, const Operand* rd, const Operand* rs2) +{ + if (rd->type != OP_REG) { + tcc_error("'%s': Expected destination operand that is a register", get_tok_str(token, NULL)); + } + if (rs2->type != OP_IM12S && rs2->type != OP_IM32) { + tcc_error("'%s': Expected second source operand that is an immediate value", get_tok_str(token, NULL)); + } else if (rs2->e.v >= 0x100000) { + tcc_error("'%s': Expected second source operand that is an immediate value between 0 and 0xfffff", get_tok_str(token, NULL)); + } + /* U-type instruction: + 31...12 imm[31:12] + 11...7 rd + 6...0 opcode */ + gen_le32(opcode | ENCODE_RD(rd->reg) | (rs2->e.v << 12)); +} + +static int parse_fence_operand(){ + int t = tok; + if ( tok == TOK_ASM_or ){ + // we are in a fence instruction, parse as output read + t = TOK_ASM_or_fence; + } + next(); + return t - (TOK_ASM_w_fence - 1); +} + +static void asm_fence_opcode(TCCState *s1, int token){ + // `fence` is both an instruction and a pseudoinstruction: + // `fence` expands to `fence iorw, iorw` + int succ = 0xF, pred = 0xF; + if (tok != TOK_LINEFEED && tok != ';' && tok != CH_EOF){ + pred = parse_fence_operand(); + if ( pred > 0xF || pred < 0) { + tcc_error("'%s': Expected first operand that is a valid predecessor operand", get_tok_str(token, NULL)); + } + skip(','); + succ = parse_fence_operand(); + if ( succ > 0xF || succ < 0) { + tcc_error("'%s': Expected second operand that is a valid successor operand", get_tok_str(token, NULL)); + } + } + asm_emit_opcode((0x3 << 2) | 3 | (0 << 12) | succ<<20 | pred<<24); +} + +static void asm_binary_opcode(TCCState* s1, int token) +{ + Operand imm = { OP_IM12S }; + Operand ops[2]; + int32_t lo; + uint32_t hi; + + parse_operands(s1, &ops[0], 2); + switch (token) { + case TOK_ASM_lui: + asm_emit_u(token, (0xD << 2) | 3, &ops[0], &ops[1]); + return; + case TOK_ASM_auipc: + asm_emit_u(token, (0x05 << 2) | 3, &ops[0], &ops[1]); + return; + + /* C extension */ + case TOK_ASM_c_add: + asm_emit_cr(token, 2 | (9 << 12), ops, ops + 1); + return; + case TOK_ASM_c_mv: + asm_emit_cr(token, 2 | (8 << 12), ops, ops + 1); + return; + + case TOK_ASM_c_addi16sp: + asm_emit_ci(token, 1 | (3 << 13), ops, ops + 1); + return; + case TOK_ASM_c_addi: + asm_emit_ci(token, 1, ops, ops + 1); + return; + case TOK_ASM_c_addiw: + asm_emit_ci(token, 1 | (1 << 13), ops, ops + 1); + return; + case TOK_ASM_c_fldsp: + asm_emit_ci(token, 2 | (1 << 13), ops, ops + 1); + return; + case TOK_ASM_c_flwsp: /* RV32FC-only */ + asm_emit_ci(token, 2 | (3 << 13), ops, ops + 1); + return; + case TOK_ASM_c_ldsp: + asm_emit_ci(token, 2 | (3 << 13), ops, ops + 1); + return; + case TOK_ASM_c_li: + asm_emit_ci(token, 1 | (2 << 13), ops, ops + 1); + return; + case TOK_ASM_c_lui: + asm_emit_ci(token, 1 | (3 << 13), ops, ops + 1); + return; + case TOK_ASM_c_lwsp: + asm_emit_ci(token, 2 | (2 << 13), ops, ops + 1); + return; + case TOK_ASM_c_slli: + asm_emit_ci(token, 2, ops, ops + 1); + return; + + case TOK_ASM_c_addi4spn: + asm_emit_ciw(token, 0, ops, ops + 1); + return; + +#define CA (1 | (3 << 10) | (4 << 13)) + case TOK_ASM_c_addw: + asm_emit_ca(token, CA | (1 << 5) | (1 << 12), ops, ops + 1); + return; + case TOK_ASM_c_and: + asm_emit_ca(token, CA | (3 << 5), ops, ops + 1); + return; + case TOK_ASM_c_or: + asm_emit_ca(token, CA | (2 << 5), ops, ops + 1); + return; + case TOK_ASM_c_sub: + asm_emit_ca(token, CA, ops, ops + 1); + return; + case TOK_ASM_c_subw: + asm_emit_ca(token, CA | (1 << 12), ops, ops + 1); + return; + case TOK_ASM_c_xor: + asm_emit_ca(token, CA | (1 << 5), ops, ops + 1); + return; +#undef CA + + case TOK_ASM_c_andi: + asm_emit_cb(token, 1 | (2 << 10) | (4 << 13), ops, ops + 1); + return; + case TOK_ASM_c_beqz: + asm_emit_cb(token, 1 | (6 << 13), ops, ops + 1); + return; + case TOK_ASM_c_bnez: + asm_emit_cb(token, 1 | (7 << 13), ops, ops + 1); + return; + case TOK_ASM_c_srai: + asm_emit_cb(token, 1 | (1 << 10) | (4 << 13), ops, ops + 1); + return; + case TOK_ASM_c_srli: + asm_emit_cb(token, 1 | (4 << 13), ops, ops + 1); + return; + + case TOK_ASM_c_sdsp: + asm_emit_css(token, 2 | (7 << 13), ops, ops + 1); + return; + case TOK_ASM_c_swsp: + asm_emit_css(token, 2 | (6 << 13), ops, ops + 1); + return; + case TOK_ASM_c_fswsp: /* RV32FC-only */ + asm_emit_css(token, 2 | (7 << 13), ops, ops + 1); + return; + case TOK_ASM_c_fsdsp: + asm_emit_css(token, 2 | (5 << 13), ops, ops + 1); + return; + + /* F/D extension */ + case TOK_ASM_fsqrt_d: + asm_emit_fb(token, 0x53 | (11 << 27) | (1 << 25) | (7 << 12), ops, ops + 1); + return; + case TOK_ASM_fsqrt_s: + asm_emit_fb(token, 0x53 | (11 << 27) | (0 << 25) | (7 << 12), ops, ops + 1); + return; + + /* pseudoinstructions */ + /* rd, sym */ + case TOK_ASM_la: + /* auipc rd, 0 */ + asm_emit_u(token, 3 | (5 << 2), ops, ops + 1); + /* lw rd, rd, 0 */ + asm_emit_i(token, 3 | (2 << 12), ops, ops, ops + 1); + return; + case TOK_ASM_lla: + /* auipc rd, 0 */ + asm_emit_u(token, 3 | (5 << 2), ops, ops + 1); + /* addi rd, rd, 0 */ + asm_emit_i(token, 3 | (4 << 2), ops, ops, ops + 1); + return; + case TOK_ASM_li: + if(ops[1].type != OP_IM32 && ops[1].type != OP_IM12S){ + tcc_error("'%s': Expected first source operand that is an immediate value between 0 and 0xFFFFFFFFFFFFFFFF", get_tok_str(token, NULL)); + } + lo = ops[1].e.v; + hi = (int64_t)ops[1].e.v >> 32; + if(lo < 0){ + hi += 1; + } + imm.e.v = ((hi + 0x800) & 0xfffff000) >> 12; + /* lui rd, HI_20(HI_32(imm)) */ + asm_emit_u(token, (0xD << 2) | 3, &ops[0], &imm); + /* addi rd, rd, LO_12(HI_32(imm)) */ + imm.e.v = (int32_t)hi<<20>>20; + asm_emit_i(token, 3 | (4 << 2), &ops[0], &ops[0], &imm); + /* slli rd, rd, 12 */ + imm.e.v = 12; + asm_emit_i(token, (4 << 2) | 3 | (1 << 12), &ops[0], &ops[0], &imm); + /* addi rd, rd, HI_12(LO_32(imm)) */ + imm.e.v = (lo + (1<<19)) >> 20; + asm_emit_i(token, 3 | (4 << 2), &ops[0], &ops[0], &imm); + /* slli rd, rd, 12 */ + imm.e.v = 12; + asm_emit_i(token, (4 << 2) | 3 | (1 << 12), &ops[0], &ops[0], &imm); + /* addi rd, rd, HI_12(LO_20(LO_32imm)) */ + lo = lo << 12 >> 12; + imm.e.v = lo >> 8; + asm_emit_i(token, 3 | (4 << 2), &ops[0], &ops[0], &imm); + /* slli rd, rd, 8 */ + imm.e.v = 8; + asm_emit_i(token, (4 << 2) | 3 | (1 << 12), &ops[0], &ops[0], &imm); + /* addi rd, rd, LO_8(LO_20(LO_32imm)) */ + lo &= 0xff; + imm.e.v = lo << 20 >> 20; + asm_emit_i(token, 3 | (4 << 2), &ops[0], &ops[0], &imm); + return; + case TOK_ASM_mv: + /* addi rd, rs, 0 */ + asm_emit_i(token, 3 | (4 << 2), &ops[0], &ops[1], &imm); + return; + case TOK_ASM_not: + /* xori rd, rs, -1 */ + imm.e.v = -1; + asm_emit_i(token, (0x4 << 2) | 3 | (4 << 12), &ops[0], &ops[1], &imm); + return; + case TOK_ASM_neg: + /* sub rd, x0, rs */ + imm.e.v = 1; + asm_emit_i(token, (0x4 << 2) | 3 | (4 << 12), &ops[0], &zero, &imm); + return; + case TOK_ASM_negw: + /* sub rd, x0, rs */ + imm.e.v = 1; + asm_emit_i(token, (0x4 << 2) | 3 | (4 << 12), &ops[0], &zero, &imm); + return; + case TOK_ASM_jump: + /* auipc x5, 0 */ + asm_emit_opcode(3 | (5 << 2) | ENCODE_RD(5)); + greloca(cur_text_section, ops->e.sym, ind, R_RISCV_CALL, 0); + /* jalr zero, 0(x5) */ + asm_emit_opcode(0x67 | (0 << 12) | ENCODE_RS1(5)); + return; + case TOK_ASM_seqz: + /* sltiu rd, rs, 1 */ + imm.e.v = 1; + asm_emit_i(token, (0x4 << 2) | 3 | (3 << 12), &ops[0], &ops[1], &imm); + return; + case TOK_ASM_snez: + /* sltu rd, zero, rs */ + imm.e.v = 1; + asm_emit_r(token, (0xC << 2) | 3 | (3 << 12), &ops[0], &zero, &ops[1]); + return; + case TOK_ASM_sltz: + /* slt rd, rs, zero */ + asm_emit_r(token, (0xC << 2) | 3 | (2 << 12), &ops[0], &ops[1], &zero); + return; + case TOK_ASM_sgtz: + /* slt rd, zero, rs */ + asm_emit_r(token, (0xC << 2) | 3 | (2 << 12), &ops[0], &zero, &ops[1]); + return; + + case TOK_ASM_fabs_d: + /* fsgnjx.d rd, rs, rs */ + asm_emit_f(token, 0x53 | (4 << 27) | (1 << 25) | (2 << 12), &ops[0], &ops[1], &ops[1]); + return; + case TOK_ASM_fabs_s: + /* fsgnjx.s rd, rs, rs */ + asm_emit_f(token, 0x53 | (4 << 27) | (0 << 25) | (2 << 12), &ops[0], &ops[1], &ops[1]); + return; + + case TOK_ASM_csrs: + /* csrrs x0, csr, rs */ + asm_emit_opcode(0x73 | (2 << 12) | (ops[0].e.v << 20) | ENCODE_RS1(ops[1].reg)); + return; + case TOK_ASM_csrc: + /* csrrc x0, csr, rs */ + asm_emit_opcode(0x73 | (3 << 12) | (ops[0].e.v << 20) | ENCODE_RS1(ops[1].reg)); + return; + case TOK_ASM_fsrm: + /* csrrw rd, frm, rs */ + asm_emit_opcode(0x73 | (1 << 12) | (2 << 20) | ENCODE_RD(ops[0].reg) | ENCODE_RS1(ops[1].reg)); + return; + case TOK_ASM_fscsr: + /* csrrw rd, fcsr, rs */ + asm_emit_opcode(0x73 | (1 << 12) | (3 << 20) | ENCODE_RD(ops[0].reg) | ENCODE_RS1(ops[1].reg)); + return; + default: + expect("binary instruction"); + } +} + +/* caller: Add funct3, funct7 into opcode */ +static void asm_emit_r(int token, uint32_t opcode, const Operand* rd, const Operand* rs1, const Operand* rs2) +{ + if (rd->type != OP_REG) { + tcc_error("'%s': Expected destination operand that is a register", get_tok_str(token, NULL)); + } + if (rs1->type != OP_REG) { + tcc_error("'%s': Expected first source operand that is a register", get_tok_str(token, NULL)); + } + if (rs2->type != OP_REG) { + tcc_error("'%s': Expected second source operand that is a register or immediate", get_tok_str(token, NULL)); + } + /* R-type instruction: + 31...25 funct7 + 24...20 rs2 + 19...15 rs1 + 14...12 funct3 + 11...7 rd + 6...0 opcode */ + gen_le32(opcode | ENCODE_RD(rd->reg) | ENCODE_RS1(rs1->reg) | ENCODE_RS2(rs2->reg)); +} + +/* caller: Add rounding mode, fmt, funct5 to opcode */ +static void asm_emit_f(int token, uint32_t opcode, const Operand* rd, const Operand* rs1, const Operand* rs2) +{ + if (rd->type != OP_REG || !REG_IS_FLOAT(rd->reg)) { + tcc_error("'%s': Expected destination operand that is a floating-point register", get_tok_str(token, NULL)); + } + if (rs1->type != OP_REG || !REG_IS_FLOAT(rs1->reg)) { + tcc_error("'%s': Expected first source operand that is a floating-point register", get_tok_str(token, NULL)); + } + if (rs2->type != OP_REG || !REG_IS_FLOAT(rs2->reg)) { + tcc_error("'%s': Expected second source operand that is a floating-point register", get_tok_str(token, NULL)); + } + /* F-type instruction: + 31...27 funct5 + 26...25 fmt + 24...20 rs2 + 19...15 rs1 + 14...12 rm + 11...7 rd + 6...0 opcode = OP-FP */ + gen_le32(opcode | ENCODE_RD(rd->reg) | ENCODE_RS1(rs1->reg) | ENCODE_RS2(rs2->reg)); +} +/* caller: Add rounding mode, fmt, funct5 to opcode */ +static void asm_emit_fb(int token, uint32_t opcode, const Operand* rd, const Operand* rs) +{ + if (rd->type != OP_REG || !REG_IS_FLOAT(rd->reg)) { + tcc_error("'%s': Expected destination operand that is a floating-point register", get_tok_str(token, NULL)); + } + if (rs->type != OP_REG || !REG_IS_FLOAT(rs->reg)) { + tcc_error("'%s': Expected source operand that is a floating-point register", get_tok_str(token, NULL)); + } + /* F-type instruction: + 31...27 funct5 + 26...25 fmt + 24...20 rs2 = 0 + 19...15 rs1 = rs + 14...12 rm + 11...7 rd + 6...0 opcode = OP-FP */ + gen_le32(opcode | ENCODE_RD(rd->reg) | ENCODE_RS1(rs->reg) | ENCODE_RS2(0)); +} +/* caller: Add rounding mode, fmt to opcode */ +static void asm_emit_fq(int token, uint32_t opcode, const Operand* rd, const Operand* rs1, const Operand* rs2, const Operand* rs3) +{ + if (rd->type != OP_REG || !REG_IS_FLOAT(rd->reg)) { + tcc_error("'%s': Expected destination operand that is a floating-point register", get_tok_str(token, NULL)); + } + if (rs1->type != OP_REG || !REG_IS_FLOAT(rs1->reg)) { + tcc_error("'%s': Expected first source operand that is a floating-point register", get_tok_str(token, NULL)); + } + if (rs2->type != OP_REG || !REG_IS_FLOAT(rs2->reg)) { + tcc_error("'%s': Expected second source operand that is a floating-point register", get_tok_str(token, NULL)); + } + if (rs3->type != OP_REG || !REG_IS_FLOAT(rs3->reg)) { + tcc_error("'%s': Expected third source operand that is a floating-point register", get_tok_str(token, NULL)); + } + /* F-type instruction: + 31...27 rs3 + 26...25 fmt + 24...20 rs2 + 19...15 rs1 + 14...12 rm + 11...7 rd + 6...0 opcode */ + gen_le32(opcode | ENCODE_RD(rd->reg) | ENCODE_RS1(rs1->reg) | ENCODE_RS2(rs2->reg) | (REG_VALUE(rs3->reg) << 27)); +} + +/* caller: Add funct3 into opcode */ +static void asm_emit_i(int token, uint32_t opcode, const Operand* rd, const Operand* rs1, const Operand* rs2) +{ + if (rd->type != OP_REG) { + tcc_error("'%s': Expected destination operand that is a register", get_tok_str(token, NULL)); + } + if (rs1->type != OP_REG) { + tcc_error("'%s': Expected first source operand that is a register", get_tok_str(token, NULL)); + } + if (rs2->type != OP_IM12S) { + tcc_error("'%s': Expected second source operand that is an immediate value between 0 and 8191", get_tok_str(token, NULL)); + } + /* I-type instruction: + 31...20 imm[11:0] + 19...15 rs1 + 14...12 funct3 + 11...7 rd + 6...0 opcode */ + + gen_le32(opcode | ENCODE_RD(rd->reg) | ENCODE_RS1(rs1->reg) | (rs2->e.v << 20)); +} + +static void asm_emit_j(int token, uint32_t opcode, const Operand* rd, const Operand* rs2) +{ + uint32_t imm; + + if (rd->type != OP_REG) { + tcc_error("'%s': Expected destination operand that is a register", get_tok_str(token, NULL)); + } + if (rs2->type != OP_IM12S && rs2->type != OP_IM32) { + tcc_error("'%s': Expected second source operand that is an immediate value", get_tok_str(token, NULL)); + } + + imm = rs2->e.v; + + /* even offsets in a +- 1 MiB range */ + if ((int)imm > (1 << 20) -1 || (int)imm <= -1 * ((1 << 20) -1)) { + tcc_error("'%s': Expected second source operand that is an immediate value between 0 and 0x1fffff", get_tok_str(token, NULL)); + } + + if (imm & 1) { + tcc_error("'%s': Expected second source operand that is an even immediate value", get_tok_str(token, NULL)); + } + /* J-type instruction: + 31 imm[20] + 30...21 imm[10:1] + 20 imm[11] + 19...12 imm[19:12] + 11...7 rd + 6...0 opcode */ + gen_le32(opcode | ENCODE_RD(rd->reg) | (((imm >> 20) & 1) << 31) | (((imm >> 1) & 0x3ff) << 21) | (((imm >> 11) & 1) << 20) | (((imm >> 12) & 0xff) << 12)); +} + +static void asm_mem_access_opcode(TCCState *s1, int token) +{ + + Operand ops[3]; + parse_mem_access_operands(s1, &ops[0]); + + /* Pseudoinstruction: inst reg, label + * expand to: + * auipc reg, 0 + * inst reg, 0(reg) + * And with the proper relocation to label + */ + if (ops[1].type == OP_IM32 && ops[1].e.sym && ops[1].e.sym->type.t & VT_STATIC){ + ops[1] = ops[0]; + /* set the offset to zero */ + ops[2].type = OP_IM12S; + ops[2].e.v = 0; + /* auipc reg, 0 */ + asm_emit_u(token, (0x05 << 2) | 3, &ops[0], &ops[2]); + } + + switch (token) { + // l{b|h|w|d}[u] rd, imm(rs1); I-format + case TOK_ASM_lb: + asm_emit_i(token, (0x0 << 2) | 3, &ops[0], &ops[1], &ops[2]); + return; + case TOK_ASM_lh: + asm_emit_i(token, (0x0 << 2) | 3 | (1 << 12), &ops[0], &ops[1], &ops[2]); + return; + case TOK_ASM_lw: + asm_emit_i(token, (0x0 << 2) | 3 | (2 << 12), &ops[0], &ops[1], &ops[2]); + return; + case TOK_ASM_ld: + asm_emit_i(token, (0x0 << 2) | 3 | (3 << 12), &ops[0], &ops[1], &ops[2]); + return; + case TOK_ASM_lbu: + asm_emit_i(token, (0x0 << 2) | 3 | (4 << 12), &ops[0], &ops[1], &ops[2]); + return; + case TOK_ASM_lhu: + asm_emit_i(token, (0x0 << 2) | 3 | (5 << 12), &ops[0], &ops[1], &ops[2]); + return; + case TOK_ASM_lwu: + asm_emit_i(token, (0x0 << 2) | 3 | (6 << 12), &ops[0], &ops[1], &ops[2]); + return; + case TOK_ASM_fld: + asm_emit_i(token, (0x1 << 2) | 3 | (3 << 12), &ops[0], &ops[1], &ops[2]); + return; + + // s{b|h|w|d} rs2, imm(rs1); S-format (with rsX swapped) + case TOK_ASM_sb: + asm_emit_s(token, (0x8 << 2) | 3 | (0 << 12), &ops[1], &ops[0], &ops[2]); + return; + case TOK_ASM_sh: + asm_emit_s(token, (0x8 << 2) | 3 | (1 << 12), &ops[1], &ops[0], &ops[2]); + return; + case TOK_ASM_sw: + asm_emit_s(token, (0x8 << 2) | 3 | (2 << 12), &ops[1], &ops[0], &ops[2]); + return; + case TOK_ASM_sd: + asm_emit_s(token, (0x8 << 2) | 3 | (3 << 12), &ops[1], &ops[0], &ops[2]); + return; + case TOK_ASM_fsd: + asm_emit_s(token, (0x9 << 2) | 3 | (3 << 12), &ops[1], &ops[0], &ops[2]); + return; + } +} + +static void asm_branch_opcode(TCCState *s1, int token, int argc) +{ + Operand ops[3]; + parse_operands(s1, &ops[0], argc-1); + skip(','); + parse_branch_offset_operand(s1, &ops[argc-1]); + + switch(token){ + /* branch (RS1, RS2, IMM); B-format */ + case TOK_ASM_beq: + asm_emit_b(token, 0x63 | (0 << 12), ops, ops + 1, ops + 2); + return; + case TOK_ASM_bne: + asm_emit_b(token, 0x63 | (1 << 12), ops, ops + 1, ops + 2); + return; + case TOK_ASM_blt: + asm_emit_b(token, 0x63 | (4 << 12), ops, ops + 1, ops + 2); + return; + case TOK_ASM_bge: + asm_emit_b(token, 0x63 | (5 << 12), ops, ops + 1, ops + 2); + return; + case TOK_ASM_bltu: + asm_emit_b(token, 0x63 | (6 << 12), ops, ops + 1, ops + 2); + return; + case TOK_ASM_bgeu: + asm_emit_b(token, 0x63 | (7 << 12), ops, ops + 1, ops + 2); + return; + /* related pseudoinstructions */ + case TOK_ASM_bgt: + asm_emit_b(token, 0x63 | (4 << 12), ops + 1, ops, ops + 2); + return; + case TOK_ASM_ble: + asm_emit_b(token, 0x63 | (5 << 12), ops + 1, ops, ops + 2); + return; + case TOK_ASM_bgtu: + asm_emit_b(token, 0x63 | (6 << 12), ops + 1, ops, ops + 2); + return; + case TOK_ASM_bleu: + asm_emit_b(token, 0x63 | (7 << 12), ops + 1, ops, ops + 2); + return; + /* shorter pseudoinstructions */ + case TOK_ASM_bnez: + /* bne rs, zero, offset */ + asm_emit_b(token, 0x63 | (1 << 12), &ops[0], &zero, &ops[1]); + return; + case TOK_ASM_beqz: + /* bne rs, zero, offset */ + asm_emit_b(token, 0x63 | (0 << 12), &ops[0], &zero, &ops[1]); + return; + case TOK_ASM_blez: + /* bge rs, zero, offset */ + asm_emit_b(token, 0x63 | (5 << 12), &ops[0], &zero, &ops[1]); + return; + case TOK_ASM_bgez: + /* bge zero, rs, offset */ + asm_emit_b(token, 0x63 | (5 << 12), &zero, &ops[0], &ops[1]); + return; + case TOK_ASM_bltz: + /* blt rs, zero, offset */ + asm_emit_b(token, 0x63 | (4 << 12), &ops[0], &zero, &ops[1]); + return; + case TOK_ASM_bgtz: + /* blt zero, rs, offset */ + asm_emit_b(token, 0x63 | (4 << 12), &zero, &ops[0], &ops[1]); + return; + } +} + +static void asm_ternary_opcode(TCCState *s1, int token) +{ + Operand ops[3]; + parse_operands(s1, &ops[0], 3); + + switch (token) { + case TOK_ASM_sll: + asm_emit_r(token, (0xC << 2) | 3 | (1 << 12), &ops[0], &ops[1], &ops[2]); + return; + case TOK_ASM_slli: + asm_emit_i(token, (4 << 2) | 3 | (1 << 12), &ops[0], &ops[1], &ops[2]); + return; + case TOK_ASM_srl: + asm_emit_r(token, (0xC << 2) | 3 | (4 << 12), &ops[0], &ops[1], &ops[2]); + return; + case TOK_ASM_srli: + asm_emit_i(token, (0x4 << 2) | 3 | (5 << 12), &ops[0], &ops[1], &ops[2]); + return; + case TOK_ASM_sra: + asm_emit_r(token, (0xC << 2) | 3 | (5 << 12) | (32 << 25), &ops[0], &ops[1], &ops[2]); + return; + case TOK_ASM_srai: + asm_emit_i(token, (0x4 << 2) | 3 | (5 << 12) | (16 << 26), &ops[0], &ops[1], &ops[2]); + return; + case TOK_ASM_sllw: + asm_emit_r(token, (0xE << 2) | 3 | (1 << 12), &ops[0], &ops[1], &ops[2]); + return; + case TOK_ASM_slliw: + asm_emit_i(token, (6 << 2) | 3 | (1 << 12), &ops[0], &ops[1], &ops[2]); + return; + case TOK_ASM_srlw: + asm_emit_r(token, (0xE << 2) | 3 | (5 << 12), &ops[0], &ops[1], &ops[2]); + return; + case TOK_ASM_srliw: + asm_emit_i(token, (0x6 << 2) | 3 | (5 << 12), &ops[0], &ops[1], &ops[2]); + return; + case TOK_ASM_sraw: + asm_emit_r(token, (0xE << 2) | 3 | (5 << 12), &ops[0], &ops[1], &ops[2]); + return; + case TOK_ASM_sraiw: + asm_emit_i(token, (0x6 << 2) | 3 | (5 << 12), &ops[0], &ops[1], &ops[2]); + return; + + // Arithmetic (RD,RS1,(RS2|IMM)); R-format, I-format or U-format + + case TOK_ASM_add: + asm_emit_r(token, (0xC << 2) | 3, &ops[0], &ops[1], &ops[2]); + return; + case TOK_ASM_addi: + asm_emit_i(token, (4 << 2) | 3, &ops[0], &ops[1], &ops[2]); + return; + case TOK_ASM_sub: + asm_emit_r(token, (0xC << 2) | 3 | (32 << 25), &ops[0], &ops[1], &ops[2]); + return; + case TOK_ASM_addw: + asm_emit_r(token, (0xE << 2) | 3 | (0 << 12), &ops[0], &ops[1], &ops[2]); + return; + case TOK_ASM_addiw: // 64 bit + asm_emit_i(token, (0x6 << 2) | 3 | (0 << 12), &ops[0], &ops[1], &ops[2]); + return; + case TOK_ASM_subw: + asm_emit_r(token, (0xE << 2) | 3 | (0 << 12) | (32 << 25), &ops[0], &ops[1], &ops[2]); + return; + + // Logical (RD,RS1,(RS2|IMM)); R-format or I-format + + case TOK_ASM_xor: + asm_emit_r(token, (0xC << 2) | 3 | (4 << 12), &ops[0], &ops[1], &ops[2]); + return; + case TOK_ASM_xori: + asm_emit_i(token, (0x4 << 2) | 3 | (4 << 12), &ops[0], &ops[1], &ops[2]); + return; + case TOK_ASM_or: + asm_emit_r(token, (0xC << 2) | 3 | (6 << 12), &ops[0], &ops[1], &ops[2]); + return; + case TOK_ASM_ori: + asm_emit_i(token, (0x4 << 2) | 3 | (6 << 12), &ops[0], &ops[1], &ops[2]); + return; + case TOK_ASM_and: + asm_emit_r(token, (0xC << 2) | 3 | (7 << 12), &ops[0], &ops[1], &ops[2]); + return; + case TOK_ASM_andi: + asm_emit_i(token, (0x4 << 2) | 3 | (7 << 12), &ops[0], &ops[1], &ops[2]); + return; + + // Compare (RD,RS1,(RS2|IMM)); R-format or I-format + + case TOK_ASM_slt: + asm_emit_r(token, (0xC << 2) | 3 | (2 << 12), &ops[0], &ops[1], &ops[2]); + return; + case TOK_ASM_slti: + asm_emit_i(token, (0x4 << 2) | 3 | (2 << 12), &ops[0], &ops[1], &ops[2]); + return; + case TOK_ASM_sltu: + asm_emit_r(token, (0xC << 2) | 3 | (3 << 12), &ops[0], &ops[1], &ops[2]); + return; + case TOK_ASM_sltiu: + asm_emit_i(token, (0x4 << 2) | 3 | (3 << 12), &ops[0], &ops[1], &ops[2]); + return; + + /* M extension */ + case TOK_ASM_div: + asm_emit_r(token, 0x33 | (4 << 12) | (1 << 25), ops, ops + 1, ops + 2); + return; + case TOK_ASM_divu: + asm_emit_r(token, 0x33 | (5 << 12) | (1 << 25), ops, ops + 1, ops + 2); + return; + case TOK_ASM_divuw: + asm_emit_r(token, 0x3b | (5 << 12) | (1 << 25), ops, ops + 1, ops + 2); + return; + case TOK_ASM_divw: + asm_emit_r(token, 0x3b | (4 << 12) | (1 << 25), ops, ops + 1, ops + 2); + return; + case TOK_ASM_mul: + asm_emit_r(token, 0x33 | (1 << 25), ops, ops + 1, ops + 2); + return; + case TOK_ASM_mulh: + asm_emit_r(token, 0x33 | (1 << 12) | (1 << 25), ops, ops + 1, ops + 2); + return; + case TOK_ASM_mulhsu: + asm_emit_r(token, 0x33 | (2 << 12) | (1 << 25), ops, ops + 1, ops + 2); + return; + case TOK_ASM_mulhu: + asm_emit_r(token, 0x33 | (3 << 12) | (1 << 25), ops, ops + 1, ops + 2); + return; + case TOK_ASM_mulw: + asm_emit_r(token, 0x3b | (1 << 25), ops, ops + 1, ops + 2); + return; + case TOK_ASM_rem: + asm_emit_r(token, 0x33 | (6 << 12) | (1 << 25), ops, ops + 1, ops + 2); + return; + case TOK_ASM_remu: + asm_emit_r(token, 0x33 | (7 << 12) | (1 << 25), ops, ops + 1, ops + 2); + return; + case TOK_ASM_remuw: + asm_emit_r(token, 0x3b | (7 << 12) | (1 << 25), ops, ops + 1, ops + 2); + return; + case TOK_ASM_remw: + asm_emit_r(token, 0x3b | (6 << 12) | (1 << 25), ops, ops + 1, ops + 2); + return; + + /* Zicsr extension; (rd, csr, rs/uimm) */ + case TOK_ASM_csrrc: + asm_emit_i(token, 0x73 | (3 << 12), ops, ops + 2, ops + 1); + return; + case TOK_ASM_csrrci: + /* using rs1 field for uimmm */ + ops[2].type = OP_REG; + asm_emit_i(token, 0x73 | (7 << 12), ops, ops + 2, ops + 1); + return; + case TOK_ASM_csrrs: + asm_emit_i(token, 0x73 | (2 << 12), ops, ops + 2, ops + 1); + return; + case TOK_ASM_csrrsi: + ops[2].type = OP_REG; + asm_emit_i(token, 0x73 | (6 << 12), ops, ops + 2, ops + 1); + return; + case TOK_ASM_csrrw: + asm_emit_i(token, 0x73 | (1 << 12), ops, ops + 2, ops + 1); + return; + case TOK_ASM_csrrwi: + ops[2].type = OP_REG; + asm_emit_i(token, 0x73 | (5 << 12), ops, ops + 2, ops + 1); + return; + + /* C extension */ + /* register-based loads and stores (RD, RS1, IMM); CL-format */ + case TOK_ASM_c_fld: + asm_emit_cl(token, 1 << 13, ops, ops + 1, ops + 2); + return; + case TOK_ASM_c_flw: /* RV32FC-only */ + asm_emit_cl(token, 3 << 13, ops, ops + 1, ops + 2); + return; + case TOK_ASM_c_fsd: + asm_emit_cs(token, 5 << 13, ops, ops + 1, ops + 2); + return; + case TOK_ASM_c_fsw: /* RV32FC-only */ + asm_emit_cs(token, 7 << 13, ops, ops + 1, ops + 2); + return; + case TOK_ASM_c_ld: + asm_emit_cl(token, 3 << 13, ops, ops + 1, ops + 2); + return; + case TOK_ASM_c_lw: + asm_emit_cl(token, 2 << 13, ops, ops + 1, ops + 2); + return; + case TOK_ASM_c_sd: + asm_emit_cs(token, 7 << 13, ops, ops + 1, ops + 2); + return; + case TOK_ASM_c_sw: + asm_emit_cs(token, 6 << 13, ops, ops + 1, ops + 2); + return; + + /* F/D extension */ + case TOK_ASM_fsgnj_d: + asm_emit_f(token, 0x53 | (4 << 27) | (1 << 25) | (0 << 12), ops, ops + 1, ops + 2); + return; + case TOK_ASM_fsgnj_s: + asm_emit_f(token, 0x53 | (4 << 27) | (0 << 25) | (0 << 12), ops, ops + 1, ops + 2); + return; + case TOK_ASM_fmax_d: + asm_emit_f(token, 0x53 | (5 << 27) | (1 << 25) | (1 << 12), ops, ops + 1, ops + 2); + return; + case TOK_ASM_fmax_s: + asm_emit_f(token, 0x53 | (5 << 27) | (0 << 25) | (1 << 12), ops, ops + 1, ops + 2); + return; + case TOK_ASM_fmin_d: + asm_emit_f(token, 0x53 | (5 << 27) | (1 << 25) | (0 << 12), ops, ops + 1, ops + 2); + return; + case TOK_ASM_fmin_s: + asm_emit_f(token, 0x53 | (5 << 27) | (0 << 25) | (0 << 12), ops, ops + 1, ops + 2); + return; + + default: + expect("ternary instruction"); + } +} + +static void asm_quaternary_opcode(TCCState *s1, int token) +{ + Operand ops[4]; + parse_operands(s1, &ops[0], 4); + + switch (token) { + case TOK_ASM_fmadd_d: + asm_emit_fq(token, 0x43 | (1 << 25) | (7 << 12), ops, ops + 1, ops + 2, ops + 3); + return; + case TOK_ASM_fmadd_s: + asm_emit_fq(token, 0x43 | (0 << 25) | (7 << 12), ops, ops + 1, ops + 2, ops + 3); + return; + + default: + expect("quaternary instruction"); + } +} + +static void asm_atomic_opcode(TCCState *s1, int token) +{ + Operand ops[3]; + + parse_operand(s1, &ops[0]); + skip(','); + + if ( token <= TOK_ASM_lr_d_aqrl && token >= TOK_ASM_lr_w ) { + ops[1] = zero; + } else { + parse_operand(s1, &ops[1]); + skip(','); + } + + skip('('); + parse_operand(s1, &ops[2]); + skip(')'); + + switch(token){ + case TOK_ASM_lr_w: + asm_emit_a(token, 0x2F | 0x2<<12 | 0x2<<27, &ops[0], &ops[1], &ops[2], 0, 0); + break; + case TOK_ASM_lr_w_aq: + asm_emit_a(token, 0x2F | 0x2<<12 | 0x2<<27, &ops[0], &ops[1], &ops[2], 1, 0); + break; + case TOK_ASM_lr_w_rl: + asm_emit_a(token, 0x2F | 0x2<<12 | 0x2<<27, &ops[0], &ops[1], &ops[2], 0, 1); + break; + case TOK_ASM_lr_w_aqrl: + asm_emit_a(token, 0x2F | 0x2<<12 | 0x2<<27, &ops[0], &ops[1], &ops[2], 1, 1); + break; + + case TOK_ASM_lr_d: + asm_emit_a(token, 0x2F | 0x3<<12 | 0x2<<27, &ops[0], &ops[1], &ops[2], 0, 0); + break; + case TOK_ASM_lr_d_aq: + asm_emit_a(token, 0x2F | 0x3<<12 | 0x2<<27, &ops[0], &ops[1], &ops[2], 1, 0); + break; + case TOK_ASM_lr_d_rl: + asm_emit_a(token, 0x2F | 0x3<<12 | 0x2<<27, &ops[0], &ops[1], &ops[2], 0, 1); + break; + case TOK_ASM_lr_d_aqrl: + asm_emit_a(token, 0x2F | 0x3<<12 | 0x2<<27, &ops[0], &ops[1], &ops[2], 1, 1); + break; + + case TOK_ASM_sc_w: + asm_emit_a(token, 0x2F | 0x2<<12 | 0x3<<27, &ops[0], &ops[1], &ops[2], 0, 0); + break; + case TOK_ASM_sc_w_aq: + asm_emit_a(token, 0x2F | 0x2<<12 | 0x3<<27, &ops[0], &ops[1], &ops[2], 1, 0); + break; + case TOK_ASM_sc_w_rl: + asm_emit_a(token, 0x2F | 0x2<<12 | 0x3<<27, &ops[0], &ops[1], &ops[2], 0, 1); + break; + case TOK_ASM_sc_w_aqrl: + asm_emit_a(token, 0x2F | 0x2<<12 | 0x3<<27, &ops[0], &ops[1], &ops[2], 1, 1); + break; + + case TOK_ASM_sc_d: + asm_emit_a(token, 0x2F | 0x3<<12 | 0x3<<27, &ops[0], &ops[1], &ops[2], 0, 0); + break; + case TOK_ASM_sc_d_aq: + asm_emit_a(token, 0x2F | 0x3<<12 | 0x3<<27, &ops[0], &ops[1], &ops[2], 1, 0); + break; + case TOK_ASM_sc_d_rl: + asm_emit_a(token, 0x2F | 0x3<<12 | 0x3<<27, &ops[0], &ops[1], &ops[2], 0, 1); + break; + case TOK_ASM_sc_d_aqrl: + asm_emit_a(token, 0x2F | 0x3<<12 | 0x3<<27, &ops[0], &ops[1], &ops[2], 1, 1); + break; + } +} + +/* caller: Add funct3 and func5 to opcode */ +static void asm_emit_a(int token, uint32_t opcode, const Operand *rd1, const Operand *rs2, const Operand *rs1, int aq, int rl) +{ + if (rd1->type != OP_REG) + tcc_error("'%s': Expected first destination operand that is a register", get_tok_str(token, NULL)); + if (rs2->type != OP_REG) + tcc_error("'%s': Expected second source operand that is a register", get_tok_str(token, NULL)); + if (rs1->type != OP_REG) + tcc_error("'%s': Expected third source operand that is a register", get_tok_str(token, NULL)); + /* A-type instruction: + 31...27 funct5 + 26 aq + 25 rl + 24...20 rs2 + 19...15 rs1 + 14...11 funct3 + 11...7 rd + 6...0 opcode + opcode always fixed pos. */ + gen_le32(opcode | ENCODE_RS1(rs1->reg) | ENCODE_RS2(rs2->reg) | ENCODE_RD(rd1->reg) | aq << 26 | rl << 25); +} + +/* caller: Add funct3 to opcode */ +static void asm_emit_s(int token, uint32_t opcode, const Operand* rs1, const Operand* rs2, const Operand* imm) +{ + if (rs1->type != OP_REG) { + tcc_error("'%s': Expected first source operand that is a register", get_tok_str(token, NULL)); + } + if (rs2->type != OP_REG) { + tcc_error("'%s': Expected second source operand that is a register", get_tok_str(token, NULL)); + } + if (imm->type != OP_IM12S) { + tcc_error("'%s': Expected third operand that is an immediate value between 0 and 8191", get_tok_str(token, NULL)); + } + { + uint16_t v = imm->e.v; + /* S-type instruction: + 31...25 imm[11:5] + 24...20 rs2 + 19...15 rs1 + 14...12 funct3 + 11...7 imm[4:0] + 6...0 opcode + opcode always fixed pos. */ + gen_le32(opcode | ENCODE_RS1(rs1->reg) | ENCODE_RS2(rs2->reg) | ((v & 0x1F) << 7) | ((v >> 5) << 25)); + } +} + +static void asm_emit_b(int token, uint32_t opcode, const Operand *rs1, const Operand *rs2, const Operand *imm) +{ + uint32_t offset; + + if (rs1->type != OP_REG) { + tcc_error("'%s': Expected first source operand that is a register", get_tok_str(token, NULL)); + } + if (rs2->type != OP_REG) { + tcc_error("'%s': Expected destination operand that is a register", get_tok_str(token, NULL)); + } + if (imm->type != OP_IM12S) { + tcc_error("'%s': Expected second source operand that is an immediate value between 0 and 8191", get_tok_str(token, NULL)); + } + + offset = imm->e.v; + + /* B-type instruction: + 31 imm[12] + 30...25 imm[10:5] + 24...20 rs2 + 19...15 rs1 + 14...12 funct3 + 8...11 imm[4:1] + 7 imm[11] + 6...0 opcode */ + asm_emit_opcode(opcode | ENCODE_RS1(rs1->reg) | ENCODE_RS2(rs2->reg) | (((offset >> 1) & 0xF) << 8) | (((offset >> 5) & 0x1f) << 25) | (((offset >> 11) & 1) << 7) | (((offset >> 12) & 1) << 31)); +} + +ST_FUNC void asm_opcode(TCCState *s1, int token) +{ + switch (token) { + case TOK_ASM_ebreak: + case TOK_ASM_ecall: + case TOK_ASM_fence_i: + case TOK_ASM_hrts: + case TOK_ASM_mrth: + case TOK_ASM_mrts: + case TOK_ASM_wfi: + asm_nullary_opcode(s1, token); + return; + + case TOK_ASM_fence: + asm_fence_opcode(s1, token); + return; + + case TOK_ASM_rdcycle: + case TOK_ASM_rdcycleh: + case TOK_ASM_rdtime: + case TOK_ASM_rdtimeh: + case TOK_ASM_rdinstret: + case TOK_ASM_rdinstreth: + asm_unary_opcode(s1, token); + return; + + case TOK_ASM_lui: + case TOK_ASM_auipc: + case TOK_ASM_fsqrt_s: + case TOK_ASM_fsqrt_d: + asm_binary_opcode(s1, token); + return; + + case TOK_ASM_lb: + case TOK_ASM_lh: + case TOK_ASM_lw: + case TOK_ASM_ld: + case TOK_ASM_fld: + case TOK_ASM_lbu: + case TOK_ASM_lhu: + case TOK_ASM_lwu: + case TOK_ASM_sb: + case TOK_ASM_sh: + case TOK_ASM_sw: + case TOK_ASM_sd: + case TOK_ASM_fsd: + asm_mem_access_opcode(s1, token); + break; + + case TOK_ASM_jalr: + asm_jalr_opcode(s1, token); /* it can be a pseudo instruction too*/ + break; + case TOK_ASM_j: + asm_jal_opcode(s1, token); /* jal zero, offset*/ + return; + case TOK_ASM_jal: + asm_jal_opcode(s1, token); /* it can be a pseudo instruction too*/ + break; + + case TOK_ASM_add: + case TOK_ASM_addi: + case TOK_ASM_addiw: + case TOK_ASM_addw: + case TOK_ASM_and: + case TOK_ASM_andi: + case TOK_ASM_or: + case TOK_ASM_ori: + case TOK_ASM_sll: + case TOK_ASM_slli: + case TOK_ASM_slliw: + case TOK_ASM_sllw: + case TOK_ASM_slt: + case TOK_ASM_slti: + case TOK_ASM_sltiu: + case TOK_ASM_sltu: + case TOK_ASM_sra: + case TOK_ASM_srai: + case TOK_ASM_sraiw: + case TOK_ASM_sraw: + case TOK_ASM_srl: + case TOK_ASM_srli: + case TOK_ASM_srliw: + case TOK_ASM_srlw: + case TOK_ASM_sub: + case TOK_ASM_subw: + case TOK_ASM_xor: + case TOK_ASM_xori: + /* M extension */ + case TOK_ASM_div: + case TOK_ASM_divu: + case TOK_ASM_divuw: + case TOK_ASM_divw: + case TOK_ASM_mul: + case TOK_ASM_mulh: + case TOK_ASM_mulhsu: + case TOK_ASM_mulhu: + case TOK_ASM_mulw: + case TOK_ASM_rem: + case TOK_ASM_remu: + case TOK_ASM_remuw: + case TOK_ASM_remw: + /* Zicsr extension */ + case TOK_ASM_csrrc: + case TOK_ASM_csrrci: + case TOK_ASM_csrrs: + case TOK_ASM_csrrsi: + case TOK_ASM_csrrw: + case TOK_ASM_csrrwi: + /* F/D extension */ + case TOK_ASM_fsgnj_d: + case TOK_ASM_fsgnj_s: + case TOK_ASM_fmax_s: + case TOK_ASM_fmax_d: + case TOK_ASM_fmin_s: + case TOK_ASM_fmin_d: + asm_ternary_opcode(s1, token); + return; + case TOK_ASM_fmadd_d: + case TOK_ASM_fmadd_s: + asm_quaternary_opcode(s1, token); + return; + + /* Branches */ + case TOK_ASM_beq: + case TOK_ASM_bge: + case TOK_ASM_bgeu: + case TOK_ASM_blt: + case TOK_ASM_bltu: + case TOK_ASM_bne: + asm_branch_opcode(s1, token, 3); + break; + + /* C extension */ + case TOK_ASM_c_ebreak: + case TOK_ASM_c_nop: + asm_nullary_opcode(s1, token); + return; + + case TOK_ASM_c_j: + case TOK_ASM_c_jal: + case TOK_ASM_c_jalr: + case TOK_ASM_c_jr: + asm_unary_opcode(s1, token); + return; + + case TOK_ASM_c_add: + case TOK_ASM_c_addi16sp: + case TOK_ASM_c_addi4spn: + case TOK_ASM_c_addi: + case TOK_ASM_c_addiw: + case TOK_ASM_c_addw: + case TOK_ASM_c_and: + case TOK_ASM_c_andi: + case TOK_ASM_c_beqz: + case TOK_ASM_c_bnez: + case TOK_ASM_c_fldsp: + case TOK_ASM_c_flwsp: + case TOK_ASM_c_fsdsp: + case TOK_ASM_c_fswsp: + case TOK_ASM_c_ldsp: + case TOK_ASM_c_li: + case TOK_ASM_c_lui: + case TOK_ASM_c_lwsp: + case TOK_ASM_c_mv: + case TOK_ASM_c_or: + case TOK_ASM_c_sdsp: + case TOK_ASM_c_slli: + case TOK_ASM_c_srai: + case TOK_ASM_c_srli: + case TOK_ASM_c_sub: + case TOK_ASM_c_subw: + case TOK_ASM_c_swsp: + case TOK_ASM_c_xor: + asm_binary_opcode(s1, token); + return; + + case TOK_ASM_c_fld: + case TOK_ASM_c_flw: + case TOK_ASM_c_fsd: + case TOK_ASM_c_fsw: + case TOK_ASM_c_ld: + case TOK_ASM_c_lw: + case TOK_ASM_c_sd: + case TOK_ASM_c_sw: + asm_ternary_opcode(s1, token); + return; + + /* pseudoinstructions */ + case TOK_ASM_nop: + case TOK_ASM_ret: + asm_nullary_opcode(s1, token); + return; + + case TOK_ASM_jr: + case TOK_ASM_call: + case TOK_ASM_tail: + case TOK_ASM_frflags: + case TOK_ASM_frrm: + case TOK_ASM_frcsr: + asm_unary_opcode(s1, token); + return; + + case TOK_ASM_la: + case TOK_ASM_lla: + case TOK_ASM_li: + case TOK_ASM_jump: + case TOK_ASM_seqz: + case TOK_ASM_snez: + case TOK_ASM_sltz: + case TOK_ASM_sgtz: + case TOK_ASM_mv: + case TOK_ASM_not: + case TOK_ASM_neg: + case TOK_ASM_negw: + case TOK_ASM_fabs_s: + case TOK_ASM_fabs_d: + case TOK_ASM_csrc: + case TOK_ASM_csrs: + case TOK_ASM_fsrm: + case TOK_ASM_fscsr: + asm_binary_opcode(s1, token); + return; + + case TOK_ASM_bnez: + case TOK_ASM_beqz: + case TOK_ASM_blez: + case TOK_ASM_bgez: + case TOK_ASM_bltz: + case TOK_ASM_bgtz: + asm_branch_opcode(s1, token, 2); + return; + + case TOK_ASM_bgt: + case TOK_ASM_bgtu: + case TOK_ASM_ble: + case TOK_ASM_bleu: + asm_branch_opcode(s1, token, 3); + return; + + /* Atomic operations */ + case TOK_ASM_lr_w: + case TOK_ASM_lr_w_aq: + case TOK_ASM_lr_w_rl: + case TOK_ASM_lr_w_aqrl: + case TOK_ASM_lr_d: + case TOK_ASM_lr_d_aq: + case TOK_ASM_lr_d_rl: + case TOK_ASM_lr_d_aqrl: + case TOK_ASM_sc_w: + case TOK_ASM_sc_w_aq: + case TOK_ASM_sc_w_rl: + case TOK_ASM_sc_w_aqrl: + case TOK_ASM_sc_d: + case TOK_ASM_sc_d_aq: + case TOK_ASM_sc_d_rl: + case TOK_ASM_sc_d_aqrl: + asm_atomic_opcode(s1, token); + break; + + default: + expect("known instruction"); + } +} + +static int asm_parse_csrvar(int t) +{ + switch (t) { + case TOK_ASM_cycle: + return 0xc00; + case TOK_ASM_fcsr: + return 3; + case TOK_ASM_fflags: + return 1; + case TOK_ASM_frm: + return 2; + case TOK_ASM_instret: + return 0xc02; + case TOK_ASM_time: + return 0xc01; + case TOK_ASM_cycleh: + return 0xc80; + case TOK_ASM_instreth: + return 0xc82; + case TOK_ASM_timeh: + return 0xc81; + default: + return -1; + } +} + +ST_FUNC void subst_asm_operand(CString *add_str, SValue *sv, int modifier) +{ + int r, reg, val; + + r = sv->r; + if ((r & VT_VALMASK) == VT_CONST) { + if (!(r & VT_LVAL) && modifier != 'c' && modifier != 'n' && + modifier != 'P') { + //cstr_ccat(add_str, '#'); + } + if (r & VT_SYM) { + const char *name = get_tok_str(sv->sym->v, NULL); + if (sv->sym->v >= SYM_FIRST_ANOM) { + /* In case of anonymous symbols ("L.42", used + for static data labels) we can't find them + in the C symbol table when later looking up + this name. So enter them now into the asm label + list when we still know the symbol. */ + get_asm_sym(tok_alloc(name, strlen(name))->tok, sv->sym); + } + if (tcc_state->leading_underscore) + cstr_ccat(add_str, '_'); + cstr_cat(add_str, name, -1); + if ((uint32_t) sv->c.i == 0) + goto no_offset; + cstr_ccat(add_str, '+'); + } + val = sv->c.i; + if (modifier == 'n') + val = -val; + if (modifier == 'z' && sv->c.i == 0) { + cstr_cat(add_str, "zero", -1); + } else { + cstr_printf(add_str, "%d", (int) sv->c.i); + } + no_offset:; + } else if ((r & VT_VALMASK) == VT_LOCAL) { + cstr_printf(add_str, "%d", (int) sv->c.i); + } else if (r & VT_LVAL) { + reg = r & VT_VALMASK; + if (reg >= VT_CONST) + tcc_internal_error(""); + if ((sv->type.t & VT_BTYPE) == VT_FLOAT || + (sv->type.t & VT_BTYPE) == VT_DOUBLE) { + /* floating point register */ + reg = TOK_ASM_f0 + REG_VALUE(reg); + } else { + /* general purpose register */ + reg = TOK_ASM_x0 + reg; + } + cstr_cat(add_str, get_tok_str(reg, NULL), -1); + } else { + /* register case */ + reg = r & VT_VALMASK; + if (reg >= VT_CONST) + tcc_internal_error(""); + if ((sv->type.t & VT_BTYPE) == VT_FLOAT || + (sv->type.t & VT_BTYPE) == VT_DOUBLE) { + /* floating point register */ + reg = TOK_ASM_f0 + REG_VALUE(reg); + } else { + /* general purpose register */ + reg = TOK_ASM_x0 + reg; + } + cstr_cat(add_str, get_tok_str(reg, NULL), -1); + } +} + +/* TCC does not use RISC-V register numbers internally, it uses 0-8 for + * integers and 8-16 for floats instead */ +static int tcc_ireg(int r){ + return REG_VALUE(r) - 10; +} +static int tcc_freg(int r){ + return REG_VALUE(r) - 10 + 8; +} + +/* generate prolog and epilog code for asm statement */ +ST_FUNC void asm_gen_code(ASMOperand *operands, int nb_operands, + int nb_outputs, int is_output, + uint8_t *clobber_regs, + int out_reg) +{ + uint8_t regs_allocated[NB_ASM_REGS]; + ASMOperand *op; + int i, reg; + + static const uint8_t reg_saved[] = { + // General purpose regs + 8, 9, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, + // Float regs + 40, 41, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59 + }; + + /* mark all used registers */ + memcpy(regs_allocated, clobber_regs, sizeof(regs_allocated)); + for(i = 0; i < nb_operands; i++) { + op = &operands[i]; + if (op->reg >= 0) { + regs_allocated[op->reg] = 1; + } + } + + if(!is_output) { + /* generate reg save code */ + for(i = 0; i < sizeof(reg_saved)/sizeof(reg_saved[0]); i++) { + reg = reg_saved[i]; + if (regs_allocated[reg]) { + /* push */ + /* addi sp, sp, -offset */ + gen_le32((4 << 2) | 3 | + ENCODE_RD(2) | ENCODE_RS1(2) | (unsigned)-8 << 20); + if (REG_IS_FLOAT(reg)){ + /* fsd reg, offset(sp) */ + gen_le32( 0x27 | (3 << 12) | + ENCODE_RS2(reg) | ENCODE_RS1(2) ); + } else { + /* sd reg, offset(sp) */ + gen_le32((0x8 << 2) | 3 | (3 << 12) | + ENCODE_RS2(reg) | ENCODE_RS1(2) ); + } + } + } + + /* generate load code */ + for(i = 0; i < nb_operands; i++) { + op = &operands[i]; + if (op->reg >= 0) { + if ((op->vt->r & VT_VALMASK) == VT_LLOCAL && + op->is_memory) { + /* memory reference case (for both input and + output cases) */ + SValue sv; + sv = *op->vt; + sv.r = (sv.r & ~VT_VALMASK) | VT_LOCAL | VT_LVAL; + sv.type.t = VT_PTR; + load(tcc_ireg(op->reg), &sv); + } else if (i >= nb_outputs || op->is_rw) { + /* load value in register */ + if ((op->vt->type.t & VT_BTYPE) == VT_FLOAT || + (op->vt->type.t & VT_BTYPE) == VT_DOUBLE) { + load(tcc_freg(op->reg), op->vt); + } else { + load(tcc_ireg(op->reg), op->vt); + } + if (op->is_llong) { + tcc_error("long long not implemented"); + } + } + } + } + } else { + /* generate save code */ + for(i = 0 ; i < nb_outputs; i++) { + op = &operands[i]; + if (op->reg >= 0) { + if ((op->vt->r & VT_VALMASK) == VT_LLOCAL) { + if (!op->is_memory) { + SValue sv; + sv = *op->vt; + sv.r = (sv.r & ~VT_VALMASK) | VT_LOCAL; + sv.type.t = VT_PTR; + load(tcc_ireg(out_reg), &sv); + + sv = *op->vt; + sv.r = (sv.r & ~VT_VALMASK) | out_reg; + store(tcc_ireg(op->reg), &sv); + } + } else { + if ((op->vt->type.t & VT_BTYPE) == VT_FLOAT || + (op->vt->type.t & VT_BTYPE) == VT_DOUBLE) { + store(tcc_freg(op->reg), op->vt); + } else { + store(tcc_ireg(op->reg), op->vt); + } + if (op->is_llong) { + tcc_error("long long not implemented"); + } + } + } + } + /* generate reg restore code for floating point registers */ + for(i = sizeof(reg_saved)/sizeof(reg_saved[0]) - 1; i >= 0; i--) { + reg = reg_saved[i]; + if (regs_allocated[reg]) { + /* pop */ + if (REG_IS_FLOAT(reg)){ + /* fld reg, offset(sp) */ + gen_le32(7 | (3 << 12) | + ENCODE_RD(reg) | ENCODE_RS1(2) | 0); + } else { + /* ld reg, offset(sp) */ + gen_le32(3 | (3 << 12) | + ENCODE_RD(reg) | ENCODE_RS1(2) | 0); + } + /* addi sp, sp, offset */ + gen_le32((4 << 2) | 3 | + ENCODE_RD(2) | ENCODE_RS1(2) | 8 << 20); + } + } + } +} + +/* return the constraint priority (we allocate first the lowest + numbered constraints) */ +static inline int constraint_priority(const char *str) +{ + // TODO: How is this chosen?? + int priority, c, pr; + + /* we take the lowest priority */ + priority = 0; + for(;;) { + c = *str; + if (c == '\0') + break; + str++; + switch(c) { + case 'A': // address that is held in a general-purpose register. + case 'S': // constraint that matches an absolute symbolic address. + case 'f': // register [float] + case 'r': // register [general] + case 'p': // valid memory address for load,store [general] + pr = 3; + break; + case 'I': // 12 bit signed immedate + case 'i': // immediate integer operand, including symbolic constants [general] + case 'm': // memory operand [general] + case 'g': // general-purpose-register, memory, immediate integer [general] + pr = 4; + break; + case 'v': + tcc_error("unimp: constraint '%c'", c); + default: + tcc_error("unknown constraint '%d'", c); + } + if (pr > priority) + priority = pr; + } + return priority; +} + +static const char *skip_constraint_modifiers(const char *p) +{ + /* Constraint modifier: + = Operand is written to by this instruction + + Operand is both read and written to by this instruction + % Instruction is commutative for this operand and the following operand. + + Per-alternative constraint modifier: + & Operand is clobbered before the instruction is done using the input operands + */ + while (*p == '=' || *p == '&' || *p == '+' || *p == '%') + p++; + return p; +} + +#define REG_OUT_MASK 0x01 +#define REG_IN_MASK 0x02 + +#define is_reg_allocated(reg) (regs_allocated[reg] & reg_mask) + +ST_FUNC void asm_compute_constraints(ASMOperand *operands, + int nb_operands, int nb_outputs, + const uint8_t *clobber_regs, + int *pout_reg) +{ + /* TODO: Simple constraints + whitespace ignored + o memory operand that is offsetable + V memory but not offsetable + < memory operand with autodecrement addressing is allowed. Restrictions apply. + > memory operand with autoincrement addressing is allowed. Restrictions apply. + n immediate integer operand with a known numeric value + E immediate floating operand (const_double) is allowed, but only if target=host + F immediate floating operand (const_double or const_vector) is allowed + s immediate integer operand whose value is not an explicit integer + X any operand whatsoever + 0...9 (postfix); (can also be more than 1 digit number); an operand that matches the specified operand number is allowed + */ + + /* TODO: RISCV constraints + J The integer 0. + K A 5-bit unsigned immediate for CSR access instructions. + A An address that is held in a general-purpose register. + S A constraint that matches an absolute symbolic address. + vr A vector register (if available).. + vd A vector register, excluding v0 (if available). + vm A vector register, only v0 (if available). + */ + ASMOperand *op; + int sorted_op[MAX_ASM_OPERANDS]; + int i, j, k, p1, p2, tmp, reg, c, reg_mask; + const char *str; + uint8_t regs_allocated[NB_ASM_REGS]; + + /* init fields */ + for (i = 0; i < nb_operands; i++) { + op = &operands[i]; + op->input_index = -1; + op->ref_index = -1; + op->reg = -1; + op->is_memory = 0; + op->is_rw = 0; + } + /* compute constraint priority and evaluate references to output + constraints if input constraints */ + for (i = 0; i < nb_operands; i++) { + op = &operands[i]; + str = op->constraint; + str = skip_constraint_modifiers(str); + if (isnum(*str) || *str == '[') { + /* this is a reference to another constraint */ + k = find_constraint(operands, nb_operands, str, NULL); + if ((unsigned) k >= i || i < nb_outputs) + tcc_error("invalid reference in constraint %d ('%s')", + i, str); + op->ref_index = k; + if (operands[k].input_index >= 0) + tcc_error("cannot reference twice the same operand"); + operands[k].input_index = i; + op->priority = 5; + } else if ((op->vt->r & VT_VALMASK) == VT_LOCAL + && op->vt->sym + && (reg = op->vt->sym->r & VT_VALMASK) < VT_CONST) { + op->priority = 1; + op->reg = reg; + } else { + op->priority = constraint_priority(str); + } + } + + /* sort operands according to their priority */ + for (i = 0; i < nb_operands; i++) + sorted_op[i] = i; + for (i = 0; i < nb_operands - 1; i++) { + for (j = i + 1; j < nb_operands; j++) { + p1 = operands[sorted_op[i]].priority; + p2 = operands[sorted_op[j]].priority; + if (p2 < p1) { + tmp = sorted_op[i]; + sorted_op[i] = sorted_op[j]; + sorted_op[j] = tmp; + } + } + } + + for (i = 0; i < NB_ASM_REGS; i++) { + if (clobber_regs[i]) + regs_allocated[i] = REG_IN_MASK | REG_OUT_MASK; + else + regs_allocated[i] = 0; + } + + /* allocate registers and generate corresponding asm moves */ + for (i = 0; i < nb_operands; i++) { + j = sorted_op[i]; + op = &operands[j]; + str = op->constraint; + /* no need to allocate references */ + if (op->ref_index >= 0) + continue; + /* select if register is used for output, input or both */ + if (op->input_index >= 0) { + reg_mask = REG_IN_MASK | REG_OUT_MASK; + } else if (j < nb_outputs) { + reg_mask = REG_OUT_MASK; + } else { + reg_mask = REG_IN_MASK; + } + if (op->reg >= 0) { + if (is_reg_allocated(op->reg)) + tcc_error + ("asm regvar requests register that's taken already"); + reg = op->reg; + } + try_next: + c = *str++; + switch (c) { + case '=': // Operand is written-to + goto try_next; + case '+': // Operand is both READ and written-to + op->is_rw = 1; + /* FALL THRU */ + case '&': // Operand is clobbered before the instruction is done using the input operands + if (j >= nb_outputs) + tcc_error("'%c' modifier can only be applied to outputs", c); + reg_mask = REG_IN_MASK | REG_OUT_MASK; + goto try_next; + case 'r': // general-purpose register + case 'p': // loadable/storable address + /* any general register */ + /* From a0 to a7 */ + if ((reg = op->reg) >= 0) + goto reg_found; + else for (reg = 10; reg <= 18; reg++) { + if (!is_reg_allocated(reg)) + goto reg_found; + } + goto try_next; + reg_found: + /* now we can reload in the register */ + op->is_llong = 0; + op->reg = reg; + regs_allocated[reg] |= reg_mask; + break; + case 'f': // floating pont register + /* floating point register */ + /* From fa0 to fa7 */ + if ((reg = op->reg) >= 0) + goto reg_found; + else for (reg = 42; reg <= 50; reg++) { + if (!is_reg_allocated(reg)) + goto reg_found; + } + goto try_next; + case 'I': // I-Type 12 bit signed immediate + case 'i': // immediate integer operand, including symbolic constants + if (!((op->vt->r & (VT_VALMASK | VT_LVAL)) == VT_CONST)) + goto try_next; + break; + case 'm': // memory operand + case 'g': // any register + /* nothing special to do because the operand is already in + memory, except if the pointer itself is stored in a + memory variable (VT_LLOCAL case) */ + /* XXX: fix constant case */ + /* if it is a reference to a memory zone, it must lie + in a register, so we reserve the register in the + input registers and a load will be generated + later */ + if (j < nb_outputs || c == 'm') { + if ((op->vt->r & VT_VALMASK) == VT_LLOCAL) { + /* any general register: from a0 to a7 */ + for (reg = 10; reg <= 18; reg++) { + if (!(regs_allocated[reg] & REG_IN_MASK)) + goto reg_found1; + } + goto try_next; + reg_found1: + /* now we can reload in the register */ + regs_allocated[reg] |= REG_IN_MASK; + op->reg = reg; + op->is_memory = 1; + } + } + break; + default: + tcc_error("asm constraint %d ('%s') could not be satisfied", + j, op->constraint); + break; + } + /* if a reference is present for that operand, we assign it too */ + if (op->input_index >= 0) { + operands[op->input_index].reg = op->reg; + operands[op->input_index].is_llong = op->is_llong; + } + } + + /* compute out_reg. It is used to store outputs registers to memory + locations references by pointers (VT_LLOCAL case) */ + *pout_reg = -1; + for (i = 0; i < nb_operands; i++) { + op = &operands[i]; + if (op->reg >= 0 && + (op->vt->r & VT_VALMASK) == VT_LLOCAL && !op->is_memory) { + if (REG_IS_FLOAT(op->reg)){ + /* From fa0 to fa7 */ + for (reg = 42; reg <= 50; reg++) { + if (!(regs_allocated[reg] & REG_OUT_MASK)) + goto reg_found2; + } + } else { + /* From a0 to a7 */ + for (reg = 10; reg <= 18; reg++) { + if (!(regs_allocated[reg] & REG_OUT_MASK)) + goto reg_found2; + } + } + tcc_error("could not find free output register for reloading"); + reg_found2: + *pout_reg = reg; + break; + } + } + + /* print sorted constraints */ +#ifdef ASM_DEBUG + for (i = 0; i < nb_operands; i++) { + j = sorted_op[i]; + op = &operands[j]; + printf("%%%d [%s]: \"%s\" r=0x%04x reg=%d\n", + j, + op->id ? get_tok_str(op->id, NULL) : "", + op->constraint, op->vt->r, op->reg); + } + if (*pout_reg >= 0) + printf("out_reg=%d\n", *pout_reg); +#endif +} + +ST_FUNC void asm_clobber(uint8_t *clobber_regs, const char *str) +{ + int reg; + TokenSym *ts; + + if (!strcmp(str, "memory") || + !strcmp(str, "cc") || + !strcmp(str, "flags")) + return; + ts = tok_alloc(str, strlen(str)); + reg = asm_parse_regvar(ts->tok); + if (reg == -1) { + tcc_error("invalid clobber register '%s'", str); + } + clobber_regs[reg] = 1; +} + +ST_FUNC int asm_parse_regvar (int t) +{ + /* PC register not implemented */ + if (t >= TOK_ASM_pc || t < TOK_ASM_x0) + return -1; + + if (t < TOK_ASM_f0) + return t - TOK_ASM_x0; + + if (t < TOK_ASM_zero) + return t - TOK_ASM_f0 + 32; // Use higher 32 for floating point + + /* ABI mnemonic */ + if (t < TOK_ASM_ft0) + return t - TOK_ASM_zero; + + return t - TOK_ASM_ft0 + 32; // Use higher 32 for floating point +} + +/*************************************************************/ +/* C extension */ + +/* caller: Add funct6, funct2 into opcode */ +static void asm_emit_ca(int token, uint16_t opcode, const Operand *rd, const Operand *rs2) +{ + uint8_t dst, src; + + if (rd->type != OP_REG) { + tcc_error("'%s': Expected destination operand that is a register", get_tok_str(token, NULL)); + } + + if (rs2->type != OP_REG) { + tcc_error("'%s': Expected source operand that is a register", get_tok_str(token, NULL)); + } + + /* subtract index of x8 */ + dst = rd->reg - 8; + src = rs2->reg - 8; + + /* only registers {x,f}8 to {x,f}15 are valid (3-bit) */ + if (dst > 7) { + tcc_error("'%s': Expected destination operand that is a valid C-extension register", get_tok_str(token, NULL)); + } + + if (src > 7) { + tcc_error("'%s': Expected source operand that is a valid C-extension register", get_tok_str(token, NULL)); + } + + /* CA-type instruction: + 15...10 funct6 + 9...7 rd'/rs1' + 6..5 funct2 + 4...2 rs2' + 1...0 opcode */ + + gen_le16(opcode | C_ENCODE_RS2(src) | C_ENCODE_RS1(dst)); +} + +static void asm_emit_cb(int token, uint16_t opcode, const Operand *rs1, const Operand *imm) +{ + uint32_t offset; + uint8_t src; + + if (rs1->type != OP_REG) { + tcc_error("'%s': Expected source operand that is a register", get_tok_str(token, NULL)); + } + + if (imm->type != OP_IM12S && imm->type != OP_IM32) { + tcc_error("'%s': Expected source operand that is an immediate value", get_tok_str(token, NULL)); + } + + offset = imm->e.v; + + if (offset & 1) { + tcc_error("'%s': Expected source operand that is an even immediate value", get_tok_str(token, NULL)); + } + + src = rs1->reg - 8; + + if (src > 7) { + tcc_error("'%s': Expected source operand that is a valid C-extension register", get_tok_str(token, NULL)); + } + + /* CB-type instruction: + 15...13 funct3 + 12...10 offset + 9..7 rs1' + 6...2 offset + 1...0 opcode */ + + /* non-branch also using CB: + 15...13 funct3 + 12 imm + 11..10 funct2 + 9...7 rd'/rs1' + 6..2 imm + 1...0 opcode */ + + switch (token) { + case TOK_ASM_c_beqz: + case TOK_ASM_c_bnez: + gen_le16(opcode | C_ENCODE_RS1(src) | ((NTH_BIT(offset, 5) | (((offset >> 1) & 3) << 1) | (((offset >> 6) & 3) << 3)) << 2) | ((((offset >> 3) & 3) | NTH_BIT(offset, 8)) << 10)); + return; + default: + gen_le16(opcode | C_ENCODE_RS1(src) | ((offset & 0x1f) << 2) | (NTH_BIT(offset, 5) << 12)); + return; + } +} + +static void asm_emit_ci(int token, uint16_t opcode, const Operand *rd, const Operand *imm) +{ + uint32_t immediate; + + if (rd->type != OP_REG) { + tcc_error("'%s': Expected destination operand that is a register", get_tok_str(token, NULL)); + } + + if (imm->type != OP_IM12S && imm->type != OP_IM32) { + tcc_error("'%s': Expected source operand that is an immediate value", get_tok_str(token, NULL)); + } + + immediate = imm->e.v; + + /* CI-type instruction: + 15...13 funct3 + 12 imm + 11...7 rd/rs1 + 6...2 imm + 1...0 opcode */ + + switch (token) { + case TOK_ASM_c_addi: + case TOK_ASM_c_addiw: + case TOK_ASM_c_li: + case TOK_ASM_c_slli: + gen_le16(opcode | ((immediate & 0x1f) << 2) | ENCODE_RD(rd->reg) | (NTH_BIT(immediate, 5) << 12)); + return; + case TOK_ASM_c_addi16sp: + gen_le16(opcode | NTH_BIT(immediate, 5) << 2 | (((immediate >> 7) & 3) << 3) | NTH_BIT(immediate, 6) << 5 | NTH_BIT(immediate, 4) << 6 | ENCODE_RD(rd->reg) | (NTH_BIT(immediate, 9) << 12)); + return; + case TOK_ASM_c_lui: + gen_le16(opcode | (((immediate >> 12) & 0x1f) << 2) | ENCODE_RD(rd->reg) | (NTH_BIT(immediate, 17) << 12)); + return; + case TOK_ASM_c_fldsp: + case TOK_ASM_c_ldsp: + gen_le16(opcode | (((immediate >> 6) & 7) << 2) | (((immediate >> 3) & 2) << 5) | ENCODE_RD(rd->reg) | (NTH_BIT(immediate, 5) << 12)); + return; + case TOK_ASM_c_flwsp: + case TOK_ASM_c_lwsp: + gen_le16(opcode | (((immediate >> 6) & 3) << 2) | (((immediate >> 2) & 7) << 4) | ENCODE_RD(rd->reg) | (NTH_BIT(immediate, 5) << 12)); + return; + case TOK_ASM_c_nop: + gen_le16(opcode); + return; + default: + expect("known instruction"); + } +} + +/* caller: Add funct3 into opcode */ +static void asm_emit_ciw(int token, uint16_t opcode, const Operand *rd, const Operand *imm) +{ + uint32_t nzuimm; + uint8_t dst; + + if (rd->type != OP_REG) { + tcc_error("'%s': Expected destination operand that is a register", get_tok_str(token, NULL)); + } + + if (imm->type != OP_IM12S && imm->type != OP_IM32) { + tcc_error("'%s': Expected source operand that is an immediate value", get_tok_str(token, NULL)); + } + + dst = rd->reg - 8; + + if (dst > 7) { + tcc_error("'%s': Expected destination operand that is a valid C-extension register", get_tok_str(token, NULL)); + } + + nzuimm = imm->e.v; + + if (nzuimm > 0x3fc) { + tcc_error("'%s': Expected source operand that is an immediate value between 0 and 0x3ff", get_tok_str(token, NULL)); + } + + if (nzuimm & 3) { + tcc_error("'%s': Expected source operand that is a non-zero immediate value divisible by 4", get_tok_str(token, NULL)); + } + + /* CIW-type instruction: + 15...13 funct3 + 12...5 imm + 4...2 rd' + 1...0 opcode */ + + gen_le16(opcode | ENCODE_RS2(rd->reg) | ((NTH_BIT(nzuimm, 3) | (NTH_BIT(nzuimm, 2) << 1) | (((nzuimm >> 6) & 0xf) << 2) | (((nzuimm >> 4) & 3) << 6)) << 5)); +} + +/* caller: Add funct3 into opcode */ +static void asm_emit_cj(int token, uint16_t opcode, const Operand *imm) +{ + uint32_t offset; + + /* +-2 KiB range */ + if (imm->type != OP_IM12S) { + tcc_error("'%s': Expected source operand that is a 12-bit immediate value", get_tok_str(token, NULL)); + } + + offset = imm->e.v; + + if (offset & 1) { + tcc_error("'%s': Expected source operand that is an even immediate value", get_tok_str(token, NULL)); + } + + /* CJ-type instruction: + 15...13 funct3 + 12...2 offset[11|4|9:8|10|6|7|3:1|5] + 1...0 opcode */ + + gen_le16(opcode | (NTH_BIT(offset, 5) << 2) | (((offset >> 1) & 7) << 3) | (NTH_BIT(offset, 7) << 6) | (NTH_BIT(offset, 6) << 7) | (NTH_BIT(offset, 10) << 8) | (((offset >> 8) & 3) << 9) | (NTH_BIT(offset, 4) << 11) | (NTH_BIT(offset, 11) << 12)); +} + +/* caller: Add funct3 into opcode */ +static void asm_emit_cl(int token, uint16_t opcode, const Operand *rd, const Operand *rs1, const Operand *imm) +{ + uint32_t offset; + uint8_t dst, src; + + if (rd->type != OP_REG) { + tcc_error("'%s': Expected destination operand that is a register", get_tok_str(token, NULL)); + } + + if (rs1->type != OP_REG) { + tcc_error("'%s': Expected source operand that is a register", get_tok_str(token, NULL)); + } + + if (imm->type != OP_IM12S && imm->type != OP_IM32) { + tcc_error("'%s': Expected source operand that is an immediate value", get_tok_str(token, NULL)); + } + + dst = rd->reg - 8; + src = rs1->reg - 8; + + if (dst > 7) { + tcc_error("'%s': Expected destination operand that is a valid C-extension register", get_tok_str(token, NULL)); + } + + if (src > 7) { + tcc_error("'%s': Expected source operand that is a valid C-extension register", get_tok_str(token, NULL)); + } + + offset = imm->e.v; + + if (offset > 0xff) { + tcc_error("'%s': Expected source operand that is an immediate value between 0 and 0xff", get_tok_str(token, NULL)); + } + + if (offset & 3) { + tcc_error("'%s': Expected source operand that is an immediate value divisible by 4", get_tok_str(token, NULL)); + } + + /* CL-type instruction: + 15...13 funct3 + 12...10 imm + 9...7 rs1' + 6...5 imm + 4...2 rd' + 1...0 opcode */ + + switch (token) { + /* imm variant 1 */ + case TOK_ASM_c_flw: + case TOK_ASM_c_lw: + gen_le16(opcode | C_ENCODE_RS2(dst) | C_ENCODE_RS1(src) | (NTH_BIT(offset, 6) << 5) | (NTH_BIT(offset, 2) << 6) | (((offset >> 3) & 7) << 10)); + return; + /* imm variant 2 */ + case TOK_ASM_c_fld: + case TOK_ASM_c_ld: + gen_le16(opcode | C_ENCODE_RS2(dst) | C_ENCODE_RS1(src) | (((offset >> 6) & 3) << 5) | (((offset >> 3) & 7) << 10)); + return; + default: + expect("known instruction"); + } +} + +/* caller: Add funct4 into opcode */ +static void asm_emit_cr(int token, uint16_t opcode, const Operand *rd, const Operand *rs2) +{ + if (rd->type != OP_REG) { + tcc_error("'%s': Expected destination operand that is a register", get_tok_str(token, NULL)); + } + + if (rs2->type != OP_REG) { + tcc_error("'%s': Expected source operand that is a register", get_tok_str(token, NULL)); + } + + /* CR-type instruction: + 15...12 funct4 + 11..7 rd/rs1 + 6...2 rs2 + 1...0 opcode */ + + gen_le16(opcode | C_ENCODE_RS1(rd->reg) | C_ENCODE_RS2(rs2->reg)); +} + +/* caller: Add funct3 into opcode */ +static void asm_emit_cs(int token, uint16_t opcode, const Operand *rs2, const Operand *rs1, const Operand *imm) +{ + uint32_t offset; + uint8_t base, src; + + if (rs2->type != OP_REG) { + tcc_error("'%s': Expected destination operand that is a register", get_tok_str(token, NULL)); + } + + if (rs1->type != OP_REG) { + tcc_error("'%s': Expected source operand that is a register", get_tok_str(token, NULL)); + } + + if (imm->type != OP_IM12S && imm->type != OP_IM32) { + tcc_error("'%s': Expected source operand that is an immediate value", get_tok_str(token, NULL)); + } + + base = rs1->reg - 8; + src = rs2->reg - 8; + + if (base > 7) { + tcc_error("'%s': Expected destination operand that is a valid C-extension register", get_tok_str(token, NULL)); + } + + if (src > 7) { + tcc_error("'%s': Expected source operand that is a valid C-extension register", get_tok_str(token, NULL)); + } + + offset = imm->e.v; + + if (offset > 0xff) { + tcc_error("'%s': Expected source operand that is an immediate value between 0 and 0xff", get_tok_str(token, NULL)); + } + + if (offset & 3) { + tcc_error("'%s': Expected source operand that is an immediate value divisible by 4", get_tok_str(token, NULL)); + } + + /* CS-type instruction: + 15...13 funct3 + 12...10 imm + 9...7 rs1' + 6...5 imm + 4...2 rs2' + 1...0 opcode */ + switch (token) { + /* imm variant 1 */ + case TOK_ASM_c_fsw: + case TOK_ASM_c_sw: + gen_le16(opcode | C_ENCODE_RS2(base) | C_ENCODE_RS1(src) | (NTH_BIT(offset, 6) << 5) | (NTH_BIT(offset, 2) << 6) | (((offset >> 3) & 7) << 10)); + return; + /* imm variant 2 */ + case TOK_ASM_c_fsd: + case TOK_ASM_c_sd: + gen_le16(opcode | C_ENCODE_RS2(base) | C_ENCODE_RS1(src) | (((offset >> 6) & 3) << 5) | (((offset >> 3) & 7) << 10)); + return; + default: + expect("known instruction"); + } +} + +/* caller: Add funct3 into opcode */ +static void asm_emit_css(int token, uint16_t opcode, const Operand *rs2, const Operand *imm) +{ + uint32_t offset; + + if (rs2->type != OP_REG) { + tcc_error("'%s': Expected destination operand that is a register", get_tok_str(token, NULL)); + } + + if (imm->type != OP_IM12S && imm->type != OP_IM32) { + tcc_error("'%s': Expected source operand that is an immediate value", get_tok_str(token, NULL)); + } + + offset = imm->e.v; + + if (offset > 0xff) { + tcc_error("'%s': Expected source operand that is an immediate value between 0 and 0xff", get_tok_str(token, NULL)); + } + + if (offset & 3) { + tcc_error("'%s': Expected source operand that is an immediate value divisible by 4", get_tok_str(token, NULL)); + } + + /* CSS-type instruction: + 15...13 funct3 + 12...7 imm + 6...2 rs2 + 1...0 opcode */ + + switch (token) { + /* imm variant 1 */ + case TOK_ASM_c_fswsp: + case TOK_ASM_c_swsp: + gen_le16(opcode | ENCODE_RS2(rs2->reg) | (((offset >> 6) & 3) << 7) | (((offset >> 2) & 0xf) << 9)); + return; + /* imm variant 2 */ + case TOK_ASM_c_fsdsp: + case TOK_ASM_c_sdsp: + gen_le16(opcode | ENCODE_RS2(rs2->reg) | (((offset >> 6) & 7) << 7) | (((offset >> 3) & 7) << 10)); + return; + default: + expect("known instruction"); + } +} + +/*************************************************************/ +#endif /* ndef TARGET_DEFS_ONLY */ diff --git a/riscv32-gen.c b/riscv32-gen.c new file mode 100644 index 000000000..aa64021b6 --- /dev/null +++ b/riscv32-gen.c @@ -0,0 +1,1736 @@ +#ifdef TARGET_DEFS_ONLY + +// Number of registers available to allocator: +// x10-x17 aka a0-a7, x28-x31 aka t3-t6, xxx, ra, sp +// No float registers (soft-float RV32IMA) +#define NB_REGS 15 +#define CONFIG_TCC_ASM + +#define TREG_R(x) (x) // x = 0..7 (a0-a7) +#define TREG_T(x) (8 + (x)) // x = 0..3 (t3-t6) + +// Register classes sorted from more general to more precise: +#define RC_INT (1 << 0) +#define RC_FLOAT (1 << 1) // defined but no regs in this class (soft-float) +#define RC_R(x) (1 << (2 + (x))) // x = 0..7 +#define RC_T(x) (1 << (10 + (x))) // x = 0..3 + +#define RC_IRET (RC_R(0)) // int return register class +#define RC_IRE2 (RC_R(1)) // int 2nd return register class +#define RC_FRET (RC_R(0)) // soft-float: float returns in int regs + +#define REG_IRET (TREG_R(0)) // int return register number +#define REG_IRE2 (TREG_R(1)) // int 2nd return register number +#define REG_FRET (TREG_R(0)) // soft-float: float returns in int regs + +#define PTR_SIZE 4 + +#define LDOUBLE_SIZE 8 +#define LDOUBLE_ALIGN 8 + +#define MAX_ALIGN 16 + +#define CHAR_IS_UNSIGNED + +#else +#define USING_GLOBALS +#include "tcc.h" +#include + +#define UPPER(x) (((unsigned)(x) + 0x800u) & 0xfffff000) +#define LOW_OVERFLOW(x) UPPER(x) +#define SIGN7(x) ((((x) & 0xff) ^ 0x80) - 0x80) +#define SIGN11(x) ((((x) & 0xfff) ^ 0x800) - 0x800) + +ST_DATA const char * const target_machine_defs = + "__riscv\0" + "__riscv_xlen 32\0" + "__riscv_div\0" + "__riscv_mul\0" + "__riscv_float_abi_soft\0" + ; + +#define XLEN 4 + +#define TREG_RA 13 +#define TREG_SP 14 + +ST_DATA const int reg_classes[NB_REGS] = { + RC_INT | RC_FLOAT | RC_R(0), /* a0 — soft-float: floats use int regs */ + RC_INT | RC_FLOAT | RC_R(1), /* a1 */ + RC_INT | RC_FLOAT | RC_R(2), /* a2 */ + RC_INT | RC_FLOAT | RC_R(3), /* a3 */ + RC_INT | RC_FLOAT | RC_R(4), /* a4 */ + RC_INT | RC_FLOAT | RC_R(5), /* a5 */ + RC_INT | RC_FLOAT | RC_R(6), /* a6 */ + RC_INT | RC_FLOAT | RC_R(7), /* a7 */ + RC_INT | RC_FLOAT | RC_T(0), /* t3 (x28) — caller-saved temporaries */ + RC_INT | RC_FLOAT | RC_T(1), /* t4 (x29) */ + RC_INT | RC_FLOAT | RC_T(2), /* t5 (x30) */ + RC_INT | RC_FLOAT | RC_T(3), /* t6 (x31) */ + 0, + 1 << TREG_RA, + 1 << TREG_SP +}; + +#if defined(CONFIG_TCC_BCHECK) +static addr_t func_bound_offset; +static unsigned long func_bound_ind; +ST_DATA int func_bound_add_epilog; +#endif + +static int ireg(int r) +{ + if (r == TREG_RA) + return 1; // ra + if (r == TREG_SP) + return 2; // sp + if (r >= 8 && r < 12) + return r + 20; // tccT0-T3 --> t3-t6 == x28-x31 + assert(r >= 0 && r < 8); + return r + 10; // tccrX --> aX == x(10+X) +} + +static int is_ireg(int r) +{ + return (unsigned)r < 12 || r == TREG_RA || r == TREG_SP; +} + +ST_FUNC void o(unsigned int c) +{ + int ind1 = ind + 4; + if (nocode_wanted) + return; + if (ind1 > cur_text_section->data_allocated) + section_realloc(cur_text_section, ind1); + write32le(cur_text_section->data + ind, c); + ind = ind1; +} + +static void EIu(uint32_t opcode, uint32_t func3, + uint32_t rd, uint32_t rs1, uint32_t imm) +{ + o(opcode | (func3 << 12) | (rd << 7) | (rs1 << 15) | (imm << 20)); +} + +static void ER(uint32_t opcode, uint32_t func3, + uint32_t rd, uint32_t rs1, uint32_t rs2, uint32_t func7) +{ + o(opcode | func3 << 12 | rd << 7 | rs1 << 15 | rs2 << 20 | func7 << 25); +} + +static void EI(uint32_t opcode, uint32_t func3, + uint32_t rd, uint32_t rs1, uint32_t imm) +{ + assert(! LOW_OVERFLOW(imm)); + EIu(opcode, func3, rd, rs1, imm); +} + +static void ES(uint32_t opcode, uint32_t func3, + uint32_t rs1, uint32_t rs2, uint32_t imm) +{ + assert(! LOW_OVERFLOW(imm)); + o(opcode | (func3 << 12) | ((imm & 0x1f) << 7) | (rs1 << 15) + | (rs2 << 20) | ((imm >> 5) << 25)); +} + +// Patch all branches in list pointed to by t to branch to a: +ST_FUNC void gsym_addr(int t_, int a_) +{ + uint32_t t = t_; + uint32_t a = a_; + while (t) { + unsigned char *ptr = cur_text_section->data + t; + uint32_t next = read32le(ptr); + uint32_t r = a - t, imm; + if ((r + (1 << 21)) & ~((1U << 22) - 2)) + tcc_error("out-of-range branch chain"); + imm = (((r >> 12) & 0xff) << 12) + | (((r >> 11) & 1) << 20) + | (((r >> 1) & 0x3ff) << 21) + | (((r >> 20) & 1) << 31); + write32le(ptr, r == 4 ? 0x33 : 0x6f | imm); // nop || j imm + t = next; + } +} + +static int load_symofs(int r, SValue *sv, int forstore, int *new_fc) +{ + int rr, doload = 0, large_addend = 0; + int fc = sv->c.i, v = sv->r & VT_VALMASK; + if (sv->r & VT_SYM) { + Sym label = {0}; + assert(v == VT_CONST); + if (sv->sym->type.t & VT_STATIC) { // XXX do this per linker relax + greloca(cur_text_section, sv->sym, ind, + R_RISCV_PCREL_HI20, sv->c.i); + *new_fc = 0; + } else { + if (LOW_OVERFLOW(fc)){ + large_addend = 1; + } + greloca(cur_text_section, sv->sym, ind, + R_RISCV_GOT_HI20, 0); + doload = 1; + } + label.type.t = VT_VOID | VT_STATIC; + if (!nocode_wanted) + put_extern_sym(&label, cur_text_section, ind, 0); + rr = is_ireg(r) ? ireg(r) : 5; // t0 when called from store (r=-1) + o(0x17 | (rr << 7)); // auipc RR, 0 %pcrel_hi(sym)+addend + greloca(cur_text_section, &label, ind, + doload || !forstore + ? R_RISCV_PCREL_LO12_I : R_RISCV_PCREL_LO12_S, 0); + if (doload) { + EI(0x03, 2, rr, rr, 0); // lw RR, 0(RR) + if (large_addend) { + o(0x37 | (6 << 7) | UPPER(fc)); //lui t1, high(fc) + ER(0x33, 0, rr, rr, 6, 0); // add RR, RR, t1 + *new_fc = SIGN11(fc); + } + } + } else if (v == VT_LOCAL || v == VT_LLOCAL) { + rr = 8; // s0 + if (fc != sv->c.i) + tcc_error("unimp: store(giant local off) (0x%lx)", (long)sv->c.i); + if (LOW_OVERFLOW(fc)) { + rr = is_ireg(r) ? ireg(r) : 5; // t0 when called from store (r=-1) + o(0x37 | (rr << 7) | UPPER(fc)); //lui RR, upper(fc) + ER(0x33, 0, rr, rr, 8, 0); // add RR, RR, s0 + *new_fc = SIGN11(fc); + } + } else + tcc_error("uhh"); + return rr; +} + +ST_FUNC void load(int r, SValue *sv) +{ + int fr = sv->r; + int v = fr & VT_VALMASK; + int rr = ireg(r); + int fc = sv->c.i; + int bt = sv->type.t & VT_BTYPE; + int align, size; + if (fr & VT_LVAL) { + int func3, opcode = 0x03, br; + size = type_size(&sv->type, &align); + if (bt == VT_PTR || bt == VT_FUNC) /* XXX should be done in generic code */ + size = PTR_SIZE; + /* On RV32, max single-register load is 4 bytes */ + if (size > 4) + size = 4; + func3 = size == 1 ? 0 : size == 2 ? 1 : 2; /* lb, lh, lw */ + if (size < 4 && !is_float(sv->type.t) && (sv->type.t & VT_UNSIGNED)) + func3 |= 4; /* lbu, lhu */ + if (v == VT_LOCAL || (fr & VT_SYM)) { + br = load_symofs(r, sv, 0, &fc); + } else if (v < VT_CONST) { + br = ireg(v); + fc = 0; // XXX store ofs in LVAL(reg) + } else if (v == VT_LLOCAL) { + br = load_symofs(r, sv, 0, &fc); + EI(0x03, 2, rr, br, fc); // lw RR, fc(BR) + br = rr; + fc = 0; + } else if (v == VT_CONST) { + o(0x37 | (rr << 7) | UPPER(fc)); //lui RR, upper(fc) + fc = SIGN11(fc); + br = rr; + } else { + tcc_error("unimp: load(non-local lval)"); + } + EI(opcode, func3, rr, br, fc); // l[bhw][u] RR, fc(BR) + } else if (v == VT_CONST) { + int rb = 0; + assert(is_ireg(r)); + if (fr & VT_SYM) { + rb = load_symofs(r, sv, 0, &fc); + } + /* On RV64, float consts use FPU loads - not supported without FPU. + On RV32 soft-float, float/double consts are loaded as integers + (handled below via lui/addi), no special action needed. */ + if (LOW_OVERFLOW(fc)) + o(0x37 | (rr << 7) | UPPER(fc)), rb = rr; //lui RR, upper(fc) + if (fc || (rr != rb) || (fr & VT_SYM)) + EI(0x13, 0, rr, rb, SIGN11(fc)); // addi R, x0|R, FC + } else if (v == VT_LOCAL) { + int br = load_symofs(r, sv, 0, &fc); + assert(is_ireg(r)); + EI(0x13, 0, rr, br, fc); // addi R, s0, FC + } else if (v < VT_CONST) { /* reg-reg */ + //assert(!fc); XXX support offseted regs + if (is_ireg(r) && is_ireg(v)) + EI(0x13, 0, rr, ireg(v), 0); // addi RR, V, 0 == mv RR, V + else { + tcc_error("unimp: load(non-int reg-reg)"); + } + } else if (v == VT_CMP) { + int op = vtop->cmp_op; + int a = vtop->cmp_r & 0xff; + int b = (vtop->cmp_r >> 8) & 0xff; + int inv = 0; + switch (op) { + case TOK_ULT: + case TOK_UGE: + case TOK_ULE: + case TOK_UGT: + case TOK_LT: + case TOK_GE: + case TOK_LE: + case TOK_GT: + if (op & 1) { // remove [U]GE,GT + inv = 1; + op--; + } + if ((op & 7) == 6) { // [U]LE + int t = a; a = b; b = t; + inv ^= 1; + } + ER(0x33, (op > TOK_UGT) ? 2 : 3, rr, a, b, 0); // slt[u] d, a, b + if (inv) + EI(0x13, 4, rr, rr, 1); // xori d, d, 1 + break; + case TOK_NE: + case TOK_EQ: + if (rr != a || b) + ER(0x33, 0, rr, a, b, 0x20); // sub d, a, b + if (op == TOK_NE) + ER(0x33, 3, rr, 0, rr, 0); // sltu d, x0, d == snez d,d + else + EI(0x13, 3, rr, rr, 1); // sltiu d, d, 1 == seqz d,d + break; + } + } else if ((v & ~1) == VT_JMP) { + int t = v & 1; + assert(is_ireg(r)); + EI(0x13, 0, rr, 0, t); // addi RR, x0, t + gjmp_addr(ind + 8); + gsym(fc); + EI(0x13, 0, rr, 0, t ^ 1); // addi RR, x0, !t + } else + tcc_error("unimp: load(non-const)"); +} + +ST_FUNC void store(int r, SValue *sv) +{ + int fr = sv->r & VT_VALMASK; + int rr = ireg(r), ptrreg; + int fc = sv->c.i; + int bt = sv->type.t & VT_BTYPE; + int align, size = type_size(&sv->type, &align); + /* long doubles are in two integer registers, but the load/store + primitives only deal with one, so do as if it's one reg. */ + if (bt == VT_LDOUBLE) + size = align = 4; + if (bt == VT_STRUCT) + tcc_error("unimp: store(struct)"); + /* On RV32, max single-register store is 4 bytes */ + if (size > 4) + size = 4; + assert(sv->r & VT_LVAL); + if (fr == VT_LOCAL || (sv->r & VT_SYM)) { + ptrreg = load_symofs(-1, sv, 1, &fc); + } else if (fr < VT_CONST) { + ptrreg = ireg(fr); + fc = 0; // XXX support offsets regs + } else if (fr == VT_CONST) { + ptrreg = 8; // s0 + o(0x37 | (ptrreg << 7) | UPPER(fc)); //lui RR, upper(fc) + fc = SIGN11(fc); + } else + tcc_error("implement me: %s(!local)", __FUNCTION__); + ES(0x23, // s... + size == 1 ? 0 : size == 2 ? 1 : 2, // [bhw] + ptrreg, rr, fc); // RR, fc(base) +} + +static void gcall_or_jmp(int docall) +{ + int tr = docall ? 1 : 5; // ra or t0 + if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST && + ((vtop->r & VT_SYM) && vtop->c.i == (int)vtop->c.i)) { + /* constant symbolic case -> simple relocation */ + greloca(cur_text_section, vtop->sym, ind, + R_RISCV_CALL_PLT, (int)vtop->c.i); + o(0x17 | (tr << 7)); // auipc TR, 0 %call(func) + EI(0x67, 0, tr, tr, 0);// jalr TR, r(TR) + } else if (vtop->r < VT_CONST) { + int r = ireg(vtop->r); + EI(0x67, 0, tr, r, 0); // jalr TR, 0(R) + } else { + int r = TREG_RA; + load(r, vtop); + r = ireg(r); + EI(0x67, 0, tr, r, 0); // jalr TR, 0(R) + } +} + +#if defined(CONFIG_TCC_BCHECK) + +static void gen_bounds_call(int v) +{ + Sym *sym = external_helper_sym(v); + + greloca(cur_text_section, sym, ind, R_RISCV_CALL_PLT, 0); + o(0x17 | (1 << 7)); // auipc TR, 0 %call(func) + EI(0x67, 0, 1, 1, 0); // jalr TR, r(TR) +} + +static void gen_bounds_prolog(void) +{ + /* leave some room for bound checking code */ + func_bound_offset = lbounds_section->data_offset; + func_bound_ind = ind; + func_bound_add_epilog = 0; + o(0x00000013); /* nop -> load lbound section pointer */ + o(0x00000013); + o(0x00000013); /* nop -> call __bound_local_new */ + o(0x00000013); +} + +static void gen_bounds_epilog(void) +{ + addr_t saved_ind; + addr_t *bounds_ptr; + Sym *sym_data; + Sym label = {0}; + + int offset_modified = func_bound_offset != lbounds_section->data_offset; + + if (!offset_modified && !func_bound_add_epilog) + return; + + /* add end of table info */ + bounds_ptr = section_ptr_add(lbounds_section, sizeof(addr_t)); + *bounds_ptr = 0; + + sym_data = get_sym_ref(&char_pointer_type, lbounds_section, + func_bound_offset, PTR_SIZE); + + label.type.t = VT_VOID | VT_STATIC; + /* generate bound local allocation */ + if (offset_modified) { + saved_ind = ind; + ind = func_bound_ind; + put_extern_sym(&label, cur_text_section, ind, 0); + greloca(cur_text_section, sym_data, ind, R_RISCV_GOT_HI20, 0); + o(0x17 | (10 << 7)); // auipc a0, 0 %pcrel_hi(sym)+addend + greloca(cur_text_section, &label, ind, R_RISCV_PCREL_LO12_I, 0); + EI(0x03, 2, 10, 10, 0); // lw a0, 0(a0) + gen_bounds_call(TOK___bound_local_new); + ind = saved_ind; + label.c = 0; /* force new local ELF symbol */ + } + + /* generate bound check local freeing */ + /* addi sp,sp,-16; sw a0,0(sp); sw a1,4(sp) */ + EI(0x13, 0, 2, 2, -16); // addi sp, sp, -16 + ES(0x23, 2, 2, 10, 0); // sw a0, 0(sp) + ES(0x23, 2, 2, 11, 4); // sw a1, 4(sp) + put_extern_sym(&label, cur_text_section, ind, 0); + greloca(cur_text_section, sym_data, ind, R_RISCV_GOT_HI20, 0); + o(0x17 | (10 << 7)); // auipc a0, 0 %pcrel_hi(sym)+addend + greloca(cur_text_section, &label, ind, R_RISCV_PCREL_LO12_I, 0); + EI(0x03, 2, 10, 10, 0); // lw a0, 0(a0) + gen_bounds_call(TOK___bound_local_delete); + EI(0x03, 2, 10, 2, 0); // lw a0, 0(sp) + EI(0x03, 2, 11, 2, 4); // lw a1, 4(sp) + EI(0x13, 0, 2, 2, 16); // addi sp, sp, 16 +} +#endif + +static void reg_pass_rec(CType *type, int *rc, int *fieldofs, int ofs) +{ + if ((type->t & VT_BTYPE) == VT_STRUCT) { + Sym *f; + if (type->ref->type.t == VT_UNION) + rc[0] = -1; + else for (f = type->ref->next; f; f = f->next) + reg_pass_rec(&f->type, rc, fieldofs, ofs + f->c); + } else if (type->t & VT_ARRAY) { + if (type->ref->c < 0 || type->ref->c > 2) + rc[0] = -1; + else { + int a, sz = type_size(&type->ref->type, &a); + reg_pass_rec(&type->ref->type, rc, fieldofs, ofs); + if (rc[0] > 2 || (rc[0] == 2 && type->ref->c > 1)) + rc[0] = -1; + else if (type->ref->c == 2) + rc[0] = -1; + } + } else if (rc[0] == 2 || rc[0] < 0 + || (type->t & VT_BTYPE) == VT_LDOUBLE + || (type->t & VT_BTYPE) == VT_DOUBLE + || (type->t & VT_BTYPE) == VT_LLONG) + /* On RV32 soft-float, double/llong/ldouble are wider than XLEN + and need register pairs; handled by reg_pass fallback */ + rc[0] = -1; + else if (!rc[0]) { + /* soft-float: first scalar field goes in integer register. + Additional fields force fallback (size-based packing) since + on RV32 soft-float there are no mixed int+float pairs. */ + rc[++rc[0]] = RC_INT; + fieldofs[rc[0]] = (ofs << 4) | ((type->t & VT_BTYPE) == VT_PTR ? VT_INT : type->t & VT_BTYPE); + } else + rc[0] = -1; +} + +static void reg_pass(CType *type, int *prc, int *fieldofs, int named) +{ + prc[0] = 0; + reg_pass_rec(type, prc, fieldofs, 0); + if (prc[0] <= 0 || !named) { + int align, size = type_size(type, &align); + prc[0] = (size + 3) >> 2; /* number of 4-byte slots */ + prc[1] = prc[2] = RC_INT; + fieldofs[1] = (0 << 4) | (size <= 1 ? VT_BYTE : size <= 2 ? VT_SHORT : VT_INT); + fieldofs[2] = (4 << 4) | (size <= 5 ? VT_BYTE : size <= 6 ? VT_SHORT : VT_INT); + } +} + +static void gen_dbl_to_quad_store(int d0, int d1, int addr); + +ST_FUNC void gfunc_call(int nb_args) +{ + int i, align, size, areg[2]; + int *info = tcc_malloc((nb_args + 1) * sizeof (int)); + int stack_adj = 0, tempspace = 0, stack_add, ofs, splitofs = 0; + int old = (vtop[-nb_args].type.ref->f.func_type == FUNC_OLD); + SValue *sv; + Sym *sa; + +#ifdef CONFIG_TCC_BCHECK + int bc_save = tcc_state->do_bounds_check; + if (tcc_state->do_bounds_check) + gbound_args(nb_args); +#endif + + areg[0] = 0; /* int arg regs */ + areg[1] = 0; /* no float arg regs (soft-float) */ + sa = vtop[-nb_args].type.ref->next; + for (i = 0; i < nb_args; i++) { + int nregs, byref = 0, tempofs; + int prc[3], fieldofs[3]; + sv = &vtop[1 + i - nb_args]; + sv->type.t &= ~VT_ARRAY; // XXX this should be done in tccgen.c + size = type_size(&sv->type, &align); + /* Varargs long double: the RV32 ILP32 ABI uses 128-bit (binary128) + long double passed by reference. TCC internally uses 64-bit + double, so force the size to 16 to trigger the byref path. + The byref store phase converts the value to quad format. */ + if (!sa && (sv->type.t & VT_BTYPE) == VT_DOUBLE + && (sv->type.t & VT_LONG)) { + size = 16; + align = 16; + } + if (size > 2 * XLEN) { + if (align < XLEN) + align = XLEN; + tempspace = (tempspace + align - 1) & -align; + tempofs = tempspace; + tempspace += size; + size = align = XLEN; + byref = 64 | (tempofs << 7); + } + reg_pass(&sv->type, prc, fieldofs, old || sa != 0); + if (!old && !sa && align == 2*XLEN && size <= 2*XLEN) + areg[0] = (areg[0] + 1) & ~1; + nregs = prc[0]; + if (byref) + nregs = 1; /* byref passes a pointer, needs only 1 register */ + if (size == 0) + info[i] = 0; + else if (prc[1] == RC_INT && areg[0] >= 8) { + info[i] = 32; + if (align < XLEN) + align = XLEN; + stack_adj += (size + align - 1) & -align; + if (!old && !sa) /* one vararg on stack forces the rest on stack */ + areg[0] = 8; + } else { + info[i] = areg[0]++; + if (!byref) + info[i] |= (fieldofs[1] & VT_BTYPE) << 12; + assert(!(fieldofs[1] >> 4)); + if (nregs == 2) { + if (areg[0] < 8) + info[i] |= (1 + areg[0]++) << 7; + else { + info[i] |= 16; + stack_adj += XLEN; + } + if (!byref) { + assert((fieldofs[2] >> 4) < 2048); + info[i] |= fieldofs[2] << (12 + 4); // includes offset + } + } + } + info[i] |= byref; + if (sa) + sa = sa->next; + } + stack_adj = (stack_adj + 15) & -16; + tempspace = (tempspace + 15) & -16; + stack_add = stack_adj + tempspace; + + if (stack_add) { + if (stack_add >= 0x800) { + o(0x37 | (5 << 7) | UPPER(-stack_add)); //lui t0, upper(v) + EI(0x13, 0, 5, 5, SIGN11(-stack_add)); // addi t0, t0, lo(v) + ER(0x33, 0, 2, 2, 5, 0); // add sp, sp, t0 + } + else + EI(0x13, 0, 2, 2, -stack_add); // addi sp, sp, -adj + for (i = ofs = 0; i < nb_args; i++) { + if (info[i] & (64 | 32)) { + vrotb(nb_args - i); + size = type_size(&vtop->type, &align); + if (info[i] & 64) { + if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE + && (vtop->type.t & VT_LONG)) { + /* Varargs long double: convert 64-bit double to + 128-bit quad in temp space, replace with pointer */ + int dest_ofs = stack_adj + (info[i] >> 7); + /* Compute dest addr: sp + dest_ofs → t0 (x5) */ + if (dest_ofs >= 0 && dest_ofs < 2048) + EI(0x13, 0, 5, 2, dest_ofs); + else { + o(0x37 | (5 << 7) | UPPER(dest_ofs)); + EI(0x13, 0, 5, 5, SIGN11(dest_ofs)); + ER(0x33, 0, 5, 5, 2, 0); + } + /* Force double into register pair */ + gv(RC_INT); + gen_dbl_to_quad_store(ireg(vtop->r), + ireg(vtop->r2), 5); + vtop--; /* pop the double */ + /* Push pointer to the quad as the new argument */ + vset(&char_pointer_type, TREG_SP, 0); + vpushi(dest_ofs); + gen_op('+'); + } else { + vset(&char_pointer_type, TREG_SP, 0); + vpushi(stack_adj + (info[i] >> 7)); + gen_op('+'); + vpushv(vtop); // this replaces the old argument + vrott(3); + indir(); + vtop->type = vtop[-1].type; + vswap(); + vstore(); + vpop(); + } + size = align = XLEN; + } + if (info[i] & 32) { + if (align < XLEN) + align = XLEN; + vset(&char_pointer_type, TREG_SP, 0); + ofs = (ofs + align - 1) & -align; + vpushi(ofs); + gen_op('+'); + indir(); + vtop->type = vtop[-1].type; + vswap(); + vstore(); + vtop->r = vtop->r2 = VT_CONST; // this arg is done + ofs += size; + } + vrott(nb_args - i); + } else if (info[i] & 16) { + assert(!splitofs); + splitofs = ofs; + ofs += XLEN; + } + } + } + for (i = 0; i < nb_args; i++) { + int ii = info[nb_args - 1 - i], r = ii, r2 = r; + if (!(r & 32)) { + CType origtype; + int loadt; + r &= 15; + r2 = r2 & 64 ? 0 : (r2 >> 7) & 31; + assert(r2 <= 16); + vrotb(i+1); + origtype = vtop->type; + size = type_size(&vtop->type, &align); + if (size == 0) + goto done; + loadt = vtop->type.t & VT_BTYPE; + if (loadt == VT_STRUCT) { + loadt = (ii >> 12) & VT_BTYPE; + } + if (info[nb_args - 1 - i] & 16) { + assert(!r2); + r2 = 1 + TREG_RA; + } + if (loadt == VT_LDOUBLE + || (r2 && (loadt == VT_DOUBLE)) + || (r2 && (loadt == VT_LLONG))) { + /* Two-word value: gv() handles loading both halves */ + assert(r2); + r2--; + } else if (r2) { + test_lvalue(); + vpushv(vtop); + } + vtop->type.t = loadt | (vtop->type.t & VT_UNSIGNED); + gv(RC_R(r)); + vtop->type = origtype; + + if (r2 && loadt != VT_LDOUBLE && loadt != VT_DOUBLE && loadt != VT_LLONG) { + r2--; + assert(r2 < 16 || r2 == TREG_RA); + vswap(); + gaddrof(); + vtop->type = char_pointer_type; + vpushi(ii >> 20); +#ifdef CONFIG_TCC_BCHECK + if ((origtype.t & VT_BTYPE) == VT_STRUCT) + tcc_state->do_bounds_check = 0; +#endif + gen_op('+'); +#ifdef CONFIG_TCC_BCHECK + tcc_state->do_bounds_check = bc_save; +#endif + indir(); + vtop->type = origtype; + loadt = vtop->type.t & VT_BTYPE; + if (loadt == VT_STRUCT) { + loadt = (ii >> 16) & VT_BTYPE; + } + save_reg_upstack(r2, 1); + vtop->type.t = loadt | (vtop->type.t & VT_UNSIGNED); + load(r2, vtop); + assert(r2 < VT_CONST); + vtop--; + vtop->r2 = r2; + } + if (info[nb_args - 1 - i] & 16) { + ES(0x23, 2, 2, ireg(vtop->r2), splitofs); // sw t0, ofs(sp) + vtop->r2 = VT_CONST; + } else if ((loadt == VT_LDOUBLE || loadt == VT_DOUBLE || loadt == VT_LLONG) && vtop->r2 != r2) { + assert(vtop->r2 <= 7 && r2 <= 7); + EI(0x13, 0, ireg(r2), ireg(vtop->r2), 0); // mv Ra+1, RR2 + vtop->r2 = r2; + } +done: + vrott(i+1); + } + } + vrotb(nb_args + 1); + save_regs(nb_args + 1); + gcall_or_jmp(1); + vtop -= nb_args + 1; + if (stack_add) { + if (stack_add >= 0x800) { + o(0x37 | (5 << 7) | UPPER(stack_add)); //lui t0, upper(v) + EI(0x13, 0, 5, 5, SIGN11(stack_add)); // addi t0, t0, lo(v) + ER(0x33, 0, 2, 2, 5, 0); // add sp, sp, t0 + } + else + EI(0x13, 0, 2, 2, stack_add); // addi sp, sp, adj + } + tcc_free(info); +} + +static int func_sub_sp_offset, num_va_regs, func_va_list_ofs; + +ST_FUNC void gfunc_prolog(Sym *func_sym) +{ + CType *func_type = &func_sym->type; + int i, addr, align, size; + int param_addr = 0; + int areg[2]; + Sym *sym; + CType *type; + + sym = func_type->ref; + loc = -8; // for ra and s0 (each 4 bytes) + func_sub_sp_offset = ind; + ind += 5 * 4; + + areg[0] = 0, areg[1] = 0; + addr = 0; + /* if the function returns by reference, then add an + implicit pointer parameter */ + size = type_size(&func_vt, &align); + if (size > 2 * XLEN) { + loc -= XLEN; + func_vc = loc; + ES(0x23, 2, 8, 10 + areg[0]++, loc); // sw a0, loc(s0) + } + /* define parameters */ + while ((sym = sym->next) != NULL) { + int byref = 0; + int regcount; + int prc[3], fieldofs[3]; + type = &sym->type; + size = type_size(type, &align); + if (size > 2 * XLEN) { + type = &char_pointer_type; + size = align = byref = XLEN; + } + reg_pass(type, prc, fieldofs, 1); + regcount = prc[0]; + if (areg[prc[1] - 1] >= 8 + || (regcount == 2 && areg[0] >= 7)) { + if (align < XLEN) + align = XLEN; + addr = (addr + align - 1) & -align; + param_addr = addr; + addr += size; + } else { + loc -= regcount * XLEN; + param_addr = loc; + for (i = 0; i < regcount; i++) { + if (areg[0] >= 8) { + assert(i == 1 && regcount == 2 && !(addr & (XLEN-1))); + EI(0x03, 2, 5, 8, addr); // lw t0, addr(s0) + addr += XLEN; + ES(0x23, 2, 8, 5, loc + i*XLEN); // sw t0, loc(s0) + } else { + ES(0x23, 2, 8, 10 + areg[0]++, loc + i*XLEN); // sw aX, loc(s0) + } + } + } + gfunc_set_param(sym, param_addr, byref); + } + func_va_list_ofs = addr; + num_va_regs = 0; + if (func_var) { + for (; areg[0] < 8; areg[0]++) { + num_va_regs++; + ES(0x23, 2, 8, 10 + areg[0], -XLEN + num_va_regs * XLEN); // sw aX, loc(s0) + } + } +#ifdef CONFIG_TCC_BCHECK + if (tcc_state->do_bounds_check) + gen_bounds_prolog(); +#endif +} + +ST_FUNC int gfunc_sret(CType *vt, int variadic, CType *ret, + int *ret_align, int *regsize) +{ + int align, size = type_size(vt, &align), nregs; + int prc[3], fieldofs[3]; + *ret_align = 1; + *regsize = XLEN; + if (size > 2 * XLEN) + return 0; + reg_pass(vt, prc, fieldofs, 1); + nregs = prc[0]; + if (nregs == 2 && prc[1] != prc[2]) + return -1; /* generic code can't deal with this case */ + ret->t = fieldofs[1] & VT_BTYPE; + ret->ref = NULL; + return nregs; +} + +ST_FUNC void arch_transfer_ret_regs(int aftercall) +{ + int prc[3], fieldofs[3]; + reg_pass(&vtop->type, prc, fieldofs, 1); + assert(prc[0] == 2 && prc[1] != prc[2] && !(fieldofs[1] >> 4)); + assert(vtop->r == (VT_LOCAL | VT_LVAL)); + vpushv(vtop); + vtop->type.t = fieldofs[1] & VT_BTYPE; + (aftercall ? store : load)(REG_IRET, vtop); + vtop->c.i += fieldofs[2] >> 4; + vtop->type.t = fieldofs[2] & VT_BTYPE; + (aftercall ? store : load)(REG_IRET, vtop); + vtop--; +} + +ST_FUNC void gfunc_epilog(void) +{ + int v, saved_ind, d, large_ofs_ind; + +#ifdef CONFIG_TCC_BCHECK + if (tcc_state->do_bounds_check) + gen_bounds_epilog(); +#endif + + loc = (loc - num_va_regs * XLEN); + d = v = (-loc + 15) & -16; + + EI(0x13, 0, 2, 8, num_va_regs * XLEN); // addi sp, s0, num_va_regs*XLEN + EI(0x03, 2, 1, 8, -4); // lw ra, -4(s0) + EI(0x03, 2, 8, 8, -8); // lw s0, -8(s0) + EI(0x67, 0, 0, 1, 0); // jalr x0, 0(x1), aka ret + + large_ofs_ind = ind; + if (v >= (1 << 11)) { + d = 8; // space for ra+s0 + EI(0x13, 0, 8, 2, d - num_va_regs * XLEN); // addi s0, sp, d + o(0x37 | (5 << 7) | UPPER(v-8)); //lui t0, upper(v) + EI(0x13, 0, 5, 5, SIGN11(v-8)); // addi t0, t0, lo(v) + ER(0x33, 0, 2, 2, 5, 0x20); // sub sp, sp, t0 + gjmp_addr(func_sub_sp_offset + 5*4); + } + saved_ind = ind; + + ind = func_sub_sp_offset; + EI(0x13, 0, 2, 2, -d); // addi sp, sp, -d + ES(0x23, 2, 2, 1, d - 4 - num_va_regs * XLEN); // sw ra, d-4(sp) + ES(0x23, 2, 2, 8, d - 8 - num_va_regs * XLEN); // sw s0, d-8(sp) + if (v < (1 << 11)) + EI(0x13, 0, 8, 2, d - num_va_regs * XLEN); // addi s0, sp, d + else + gjmp_addr(large_ofs_ind); + if ((ind - func_sub_sp_offset) != 5*4) + EI(0x13, 0, 0, 0, 0); // addi x0, x0, 0 == nop + ind = saved_ind; +} + +ST_FUNC void gen_va_start(void) +{ + vtop--; + vset(&char_pointer_type, VT_LOCAL, func_va_list_ofs); +} + +ST_FUNC void gen_fill_nops(int bytes) +{ + if ((bytes & 3)) + tcc_error("alignment of code section not multiple of 4"); + while (bytes > 0) { + EI(0x13, 0, 0, 0, 0); // addi x0, x0, 0 == nop + bytes -= 4; + } +} + +// Generate forward branch to label: +ST_FUNC int gjmp(int t) +{ + if (nocode_wanted) + return t; + o(t); + return ind - 4; +} + +// Generate branch to known address: +ST_FUNC void gjmp_addr(int a) +{ + uint32_t r = a - ind, imm; + if ((r + (1 << 21)) & ~((1U << 22) - 2)) { + o(0x17 | (5 << 7) | UPPER(r)); // lui RR, up(r) + r = SIGN11(r); + EI(0x67, 0, 0, 5, r); // jalr x0, r(t0) + } else { + imm = (((r >> 12) & 0xff) << 12) + | (((r >> 11) & 1) << 20) + | (((r >> 1) & 0x3ff) << 21) + | (((r >> 20) & 1) << 31); + o(0x6f | imm); // jal x0, imm == j imm + } +} + +ST_FUNC int gjmp_cond(int op, int t) +{ + int tmp; + int a = vtop->cmp_r & 0xff; + int b = (vtop->cmp_r >> 8) & 0xff; + switch (op) { + case TOK_ULT: op = 6; break; + case TOK_UGE: op = 7; break; + case TOK_ULE: op = 7; tmp = a; a = b; b = tmp; break; + case TOK_UGT: op = 6; tmp = a; a = b; b = tmp; break; + case TOK_LT: op = 4; break; + case TOK_GE: op = 5; break; + case TOK_LE: op = 5; tmp = a; a = b; b = tmp; break; + case TOK_GT: op = 4; tmp = a; a = b; b = tmp; break; + case TOK_NE: op = 1; break; + case TOK_EQ: op = 0; break; + } + o(0x63 | (op ^ 1) << 12 | a << 15 | b << 20 | 8 << 7); // bOP a,b,+4 + return gjmp(t); +} + +ST_FUNC int gjmp_append(int n, int t) +{ + void *p; + /* insert jump list n into t */ + if (n) { + uint32_t n1 = n, n2; + while ((n2 = read32le(p = cur_text_section->data + n1))) + n1 = n2; + write32le(p, t); + t = n; + } + return t; +} + +/* RV32: carry/borrow register for long long add/sub. + We use x5 (t0) which is not managed by the register allocator. + Between TOK_ADDC1/SUBC1 and TOK_ADDC2/SUBC2, no other code + generation occurs (only vstack manipulation), so t0 is safe. */ +#define CARRY_REG 5 /* x5 = t0 */ + +/* Emit code to convert a 64-bit double (binary64) in hardware registers + d0 (low word) and d1 (high word) to IEEE 754 binary128 (quad) format, + and store 16 bytes to the address in hardware register 'addr'. + Uses t1 (x6) and t2 (x7) as scratch. addr must be t0 (x5). + d0 and d1 must be from TCC's allocatable set (a0-a7, t3-t6). + + Double: sign(1) | exp(11) | mantissa(52) + Quad: sign(1) | exp(15) | mantissa(112) + Mantissa shifted left by 60 bits; exponent bias adjusted by 15360. + + In little-endian 32-bit words: + Q0 = 0 + Q1 = mantissa[3:0] << 28 + Q2 = (D0 >> 4) | ((D1 & 0xF) << 28) + Q3 = sign | (quad_exp << 16) | mantissa[51:36] */ +static void gen_dbl_to_quad_store(int d0, int d1, int addr) +{ + int s1 = 6, s2 = 7; /* t1 (x6), t2 (x7) — unmanaged scratch */ + + /* Q0 = 0 */ + ES(0x23, 2, addr, 0, 0); /* sw x0, 0(addr) */ + + /* Q1 = (D0 & 0xF) << 28 */ + EI(0x13, 7, s1, d0, 0xF); /* andi t1, d0, 0xF */ + EI(0x13, 1, s1, s1, 28); /* slli t1, t1, 28 */ + ES(0x23, 2, addr, s1, 4); /* sw t1, 4(addr) */ + + /* Q2 = (D0 >> 4) | ((D1 & 0xF) << 28) */ + EI(0x13, 5, s1, d0, 4); /* srli t1, d0, 4 */ + EI(0x13, 7, s2, d1, 0xF); /* andi t2, d1, 0xF */ + EI(0x13, 1, s2, s2, 28); /* slli t2, t2, 28 */ + ER(0x33, 6, s1, s1, s2, 0); /* or t1, t1, t2 */ + ES(0x23, 2, addr, s1, 8); /* sw t1, 8(addr) */ + + /* Q3: build quad exponent, then combine with mantissa and sign */ + + /* Extract double exponent into s1 */ + EI(0x13, 5, s1, d1, 20); /* srli t1, d1, 20 */ + EI(0x13, 7, s1, s1, 0x7FF); /* andi t1, t1, 0x7FF */ + + /* if double_exp == 0 → quad_exp = 0 (zero/denorm), skip bias. + 8 instructions ahead = 32 bytes to .Lafter_bias */ + o(0x63 | (0 << 12) | (s1 << 15) | (0 << 20) + | (0 << 7) | (0 << 8) | (1 << 25) | (0 << 31)); + /* beq t1, x0, +32 */ + + /* if double_exp == 0x7FF → inf/NaN, set quad_exp = 0x7FFF. + 5 instructions ahead = 20 bytes to .Linf_nan */ + EI(0x13, 0, s2, 0, 0x7FF); /* li t2, 0x7FF */ + o(0x63 | (0 << 12) | (s1 << 15) | (s2 << 20) + | (0 << 7) | (0xA << 8) | (0 << 25) | (0 << 31)); + /* beq t1, t2, +20 */ + + /* Normal: quad_exp = double_exp + 15360 (0x3C00) */ + o(0x37 | (s2 << 7) | (4 << 12)); /* lui t2, 4 (= 0x4000) */ + EI(0x13, 0, s2, s2, -1024); /* addi t2, t2, -1024 (= 0x3C00) */ + ER(0x33, 0, s1, s1, s2, 0); /* add t1, t1, t2 */ + o(0x6F | (0 << 7) | (0 << 12) | (0 << 20) + | (6 << 21) | (0 << 31)); /* jal x0, +12 (skip inf/nan) */ + + /* .Linf_nan: quad_exp = 0x7FFF */ + o(0x37 | (s1 << 7) | (8 << 12)); /* lui t1, 8 (= 0x8000) */ + EI(0x13, 0, s1, s1, -1); /* addi t1, t1, -1 (= 0x7FFF) */ + + /* .Lafter_bias: s1 = quad_exp */ + + /* Shift exponent into position */ + EI(0x13, 1, s1, s1, 16); /* slli t1, t1, 16 */ + + /* mantissa[51:36] = (D1 >> 4) & 0xFFFF — use slli+srli to mask */ + EI(0x13, 5, s2, d1, 4); /* srli t2, d1, 4 */ + EI(0x13, 1, s2, s2, 16); /* slli t2, t2, 16 */ + EI(0x13, 5, s2, s2, 16); /* srli t2, t2, 16 */ + ER(0x33, 6, s1, s1, s2, 0); /* or t1, t1, t2 */ + + /* sign = D1[31] */ + EI(0x13, 5, s2, d1, 31); /* srli t2, d1, 31 */ + EI(0x13, 1, s2, s2, 31); /* slli t2, t2, 31 */ + ER(0x33, 6, s1, s1, s2, 0); /* or t1, t1, t2 */ + + ES(0x23, 2, addr, s1, 12); /* sw t1, 12(addr) */ +} + +static void gen_opil(int op) +{ + int a, b, d; + int func3 = 0; + if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) { + int fc = vtop->c.i; + if (fc == vtop->c.i && !LOW_OVERFLOW(fc)) { + int m = 31; /* RV32: shift mask is 5 bits */ + vswap(); + gv(RC_INT); + a = ireg(vtop[0].r); + --vtop; + d = get_reg(RC_INT); + ++vtop; + vswap(); + switch (op) { + case '-': + if (fc <= -(1 << 11)) + break; + fc = -fc; + case '+': + func3 = 0; // addi d, a, fc + do_cop: + EI(0x13, func3, ireg(d), a, fc); + --vtop; + if (op >= TOK_ULT && op <= TOK_GT) { + vset_VT_CMP(TOK_NE); + vtop->cmp_r = ireg(d) | 0 << 8; + } else + vtop[0].r = d; + return; + case TOK_LE: + if (fc >= (1 << 11) - 1) + break; + ++fc; + case TOK_LT: func3 = 2; goto do_cop; // slti d, a, fc + case TOK_ULE: + if (fc >= (1 << 11) - 1 || fc == -1) + break; + ++fc; + case TOK_ULT: func3 = 3; goto do_cop; // sltiu d, a, fc + case '^': func3 = 4; goto do_cop; // xori d, a, fc + case '|': func3 = 6; goto do_cop; // ori d, a, fc + case '&': func3 = 7; goto do_cop; // andi d, a, fc + case TOK_SHL: func3 = 1; fc &= m; goto do_cop; // slli d, a, fc + case TOK_SHR: func3 = 5; fc &= m; goto do_cop; // srli d, a, fc + case TOK_SAR: func3 = 5; fc = 1024 | (fc & m); goto do_cop; + + case TOK_UGE: /* -> TOK_ULT */ + case TOK_UGT: /* -> TOK_ULE */ + case TOK_GE: /* -> TOK_LT */ + case TOK_GT: /* -> TOK_LE */ + gen_opil(op - 1); + vtop->cmp_op ^= 1; + return; + + case TOK_NE: + case TOK_EQ: + if (fc) + gen_opil('-'), a = ireg(vtop++->r); + --vtop; + vset_VT_CMP(op); + vtop->cmp_r = a | 0 << 8; + return; + } + } + } + gv2(RC_INT, RC_INT); + a = ireg(vtop[-1].r); + b = ireg(vtop[0].r); + vtop -= 2; + d = get_reg(RC_INT); + vtop++; + vtop[0].r = d; + d = ireg(d); + switch (op) { + default: + if (op >= TOK_ULT && op <= TOK_GT) { + vset_VT_CMP(op); + vtop->cmp_r = a | b << 8; + break; + } + tcc_error("implement me: %s(%s)", __FUNCTION__, get_tok_str(op, NULL)); + break; + + case '+': + ER(0x33, 0, d, a, b, 0); // add d, a, b + break; + case '-': + ER(0x33, 0, d, a, b, 0x20); // sub d, a, b + break; + case TOK_SAR: + ER(0x33, 5, d, a, b, 0x20); // sra d, a, b + break; + case TOK_SHR: + ER(0x33, 5, d, a, b, 0); // srl d, a, b + break; + case TOK_SHL: + ER(0x33, 1, d, a, b, 0); // sll d, a, b + break; + case '*': + ER(0x33, 0, d, a, b, 1); // mul d, a, b + break; + case '/': + case TOK_PDIV: + ER(0x33, 4, d, a, b, 1); // div d, a, b + break; + case '&': + ER(0x33, 7, d, a, b, 0); // and d, a, b + break; + case '^': + ER(0x33, 4, d, a, b, 0); // xor d, a, b + break; + case '|': + ER(0x33, 6, d, a, b, 0); // or d, a, b + break; + case '%': + ER(0x33, 6, d, a, b, 1); // rem d, a, b + break; + case TOK_UMOD: + ER(0x33, 7, d, a, b, 1); // remu d, a, b + break; + case TOK_UDIV: + ER(0x33, 5, d, a, b, 1); // divu d, a, b + break; + + /* Long long carry operations (called by tccgen.c gen_opl) */ + case TOK_ADDC1: // add low words, save carry in t0 + ER(0x33, 0, d, a, b, 0); // add d, a, b + ER(0x33, 3, CARRY_REG, d, b, 0); // sltu t0, d, b + break; + case TOK_ADDC2: // add high words with carry from t0 + ER(0x33, 0, d, a, b, 0); // add d, a, b + ER(0x33, 0, d, d, CARRY_REG, 0); // add d, d, t0 + break; + case TOK_SUBC1: // sub low words, save borrow in t0 + ER(0x33, 3, CARRY_REG, a, b, 0); // sltu t0, a, b + ER(0x33, 0, d, a, b, 0x20); // sub d, a, b + break; + case TOK_SUBC2: // sub high words with borrow from t0 + ER(0x33, 0, d, a, b, 0x20); // sub d, a, b + ER(0x33, 0, d, d, CARRY_REG, 0x20); // sub d, d, t0 + break; + } +} + +ST_FUNC void gen_opi(int op) +{ + /* Handle TOK_UMULL specially: needs two result registers */ + if (op == TOK_UMULL) { + int a, b, dl, dh; + gv2(RC_INT, RC_INT); + a = ireg(vtop[-1].r); + b = ireg(vtop[0].r); + /* Save both source regs to temporaries first, so register + allocation for dl/dh can't clobber them. */ + vtop--; + dl = get_reg(RC_INT); + vtop->r = dl; + dh = get_reg(RC_INT); + /* mul reads both sources before writing dest, so + dl overlapping a source is fine. But mulhu writes dh + before mul reads, so ensure dh != a and dh != b. */ + if (ireg(dh) == a || ireg(dh) == b) { + /* Use t0 (x5) as scratch for mulhu, then move to dh */ + ER(0x33, 3, 5, a, b, 1); // mulhu t0, a, b + ER(0x33, 0, ireg(dl), a, b, 1); // mul dl, a, b + EI(0x13, 0, ireg(dh), 5, 0); // mv dh, t0 + } else { + ER(0x33, 3, ireg(dh), a, b, 1); // mulhu dh, a, b + ER(0x33, 0, ireg(dl), a, b, 1); // mul dl, a, b + } + vtop->r = dl; + vtop->r2 = dh; + return; + } + gen_opil(op); +} + +/* On RV32, gen_opl is provided by tccgen.c (PTR_SIZE==4) which + decomposes long long ops into TOK_ADDC1/ADDC2/SUBC1/SUBC2/UMULL + handled by gen_opi above. */ + +/* FPU register numbers (hardware encoding) */ +#define FA0 10 +#define FA1 11 + +/* Emit: fmv.w.x fd, rs — move int reg to float reg */ +static void fmv_w_x(int fd, int rs) +{ + ER(0x53, 0, fd, rs, 0, 0x78); // fmv.w.x fd, rs +} + +/* Emit: fmv.x.w rd, fs — move float reg to int reg */ +static void fmv_x_w(int rd, int fs) +{ + ER(0x53, 0, rd, fs, 0, 0x70); // fmv.x.w rd, fs +} + +/* gen_opf_fpu: inline FPU for float/double arithmetic and comparisons. + Values stay in integer registers (soft-float ABI); we transfer to + fa0/fa1, operate, and transfer back. Uses save_regs + fixed + register positions (a0-a3) like the soft-float path for robustness. */ +static void gen_opf_fpu(int op) +{ + int ft = vtop[0].type.t & VT_BTYPE; + CType type = vtop[0].type; + int dbl = (ft == VT_DOUBLE || ft == VT_LDOUBLE); + int is_cmp = (op >= TOK_EQ && op <= TOK_GT) || op == TOK_NE; + + /* Spill all live values and place args in fixed registers, + exactly like the soft-float path. */ + save_regs(1); + if (dbl) { + gv(RC_R(2)); /* arg2 → a2 */ + if (vtop->r2 != TREG_R(3)) { + EI(0x13, 0, 13, ireg(vtop->r2), 0); // mv a3, r2 + vtop->r2 = TREG_R(3); + } + vswap(); + gv(RC_R(0)); /* arg1 → a0 */ + if (vtop->r2 != TREG_R(1)) { + EI(0x13, 0, 11, ireg(vtop->r2), 0); // mv a1, r2 + vtop->r2 = TREG_R(1); + } + /* a0:a1 = arg1, a2:a3 = arg2. Store to stack, load FP regs. */ + EI(0x13, 0, 2, 2, -16); // addi sp, sp, -16 + ES(0x23, 2, 2, 10, 0); // sw a0, 0(sp) + ES(0x23, 2, 2, 11, 4); // sw a1, 4(sp) + ES(0x23, 2, 2, 12, 8); // sw a2, 8(sp) + ES(0x23, 2, 2, 13, 12); // sw a3, 12(sp) + EI(0x07, 3, FA0, 2, 0); // fld fa0, 0(sp) + EI(0x07, 3, FA1, 2, 8); // fld fa1, 8(sp) + } else { + gv(RC_R(1)); /* arg2 → a1 */ + vswap(); + gv(RC_R(0)); /* arg1 → a0 */ + fmv_w_x(FA0, 10); // fmv.w.x fa0, a0 + fmv_w_x(FA1, 11); // fmv.w.x fa1, a1 + } + + if (is_cmp) { + /* Produce a 0/1 boolean in a0 where 1 = condition true. + Then set VT_CMP with TOK_NE against x0 so the generic + branch/load machinery treats nonzero as "true". */ + int f7 = dbl ? 0x51 : 0x50; + + switch (op) { + case TOK_EQ: + ER(0x53, 2, 10, FA0, FA1, f7); // feq a0, fa0, fa1 + break; + case TOK_NE: + ER(0x53, 2, 10, FA0, FA1, f7); // feq a0, fa0, fa1 + EI(0x13, 4, 10, 10, 1); // xori a0, a0, 1 + break; + case TOK_LT: + ER(0x53, 1, 10, FA0, FA1, f7); // flt a0, fa0, fa1 + break; + case TOK_LE: + ER(0x53, 0, 10, FA0, FA1, f7); // fle a0, fa0, fa1 + break; + case TOK_GT: + ER(0x53, 1, 10, FA1, FA0, f7); // flt a0, fa1, fa0 + break; + case TOK_GE: + ER(0x53, 0, 10, FA1, FA0, f7); // fle a0, fa1, fa0 + break; + } + + if (dbl) + EI(0x13, 0, 2, 2, 16); // addi sp, sp, 16 + + vtop -= 2; /* pop both args */ + vpushi(0); + vtop->r = REG_IRET; /* result in a0 */ + vtop->r2 = VT_CONST; + vset_VT_CMP(op); + vtop->cmp_r = 10 | (0 << 8); /* compare a0 against x0 */ + vtop->cmp_op = TOK_NE; /* nonzero = condition true */ + return; + } + + /* Arithmetic: fadd/fsub/fmul/fdiv */ + { + int f7; + switch (op) { + case '+': f7 = dbl ? 0x01 : 0x00; break; + case '-': f7 = dbl ? 0x05 : 0x04; break; + case '*': f7 = dbl ? 0x09 : 0x08; break; + case '/': f7 = dbl ? 0x0D : 0x0C; break; + default: assert(0); f7 = 0; break; + } + ER(0x53, 7, FA0, FA0, FA1, f7); // fop fa0, fa0, fa1 (rm=dynamic) + } + + /* Move result back to integer registers */ + vtop -= 2; /* pop both args */ + vpushi(0); + vtop->r = REG_IRET; + vtop->r2 = VT_CONST; + vtop->type = type; + if (dbl) { + ES(0x27, 3, 2, FA0, 0); // fsd fa0, 0(sp) + EI(0x03, 2, 10, 2, 0); // lw a0, 0(sp) + EI(0x03, 2, 11, 2, 4); // lw a1, 4(sp) + EI(0x13, 0, 2, 2, 16); // addi sp, sp, 16 + vtop->r2 = TREG_R(1); + } else { + fmv_x_w(10, FA0); // fmv.x.w a0, fa0 + } +} + +ST_FUNC void gen_opf(int op) +{ + if (tcc_state->fpu) { + gen_opf_fpu(op); + return; + } + /* RV32IMA: no FPU, all float ops through library calls. + Use save_regs+gcall_or_jmp instead of gfunc_call to avoid + nested function call issues when used inside argument evaluation. */ + int func = 0; + int cond = -1; + int ft = vtop[0].type.t & VT_BTYPE; + CType type = vtop[0].type; + int dbl = (ft == VT_DOUBLE || ft == VT_LDOUBLE); + + if (ft == VT_FLOAT) { + switch (op) { + case '*': func = TOK___mulsf3; break; + case '+': func = TOK___addsf3; break; + case '-': func = TOK___subsf3; break; + case '/': func = TOK___divsf3; break; + case TOK_EQ: func = TOK___eqsf2; cond = 1; break; + case TOK_NE: func = TOK___nesf2; cond = 0; break; + case TOK_LT: func = TOK___ltsf2; cond = 10; break; + case TOK_GE: func = TOK___gesf2; cond = 11; break; + case TOK_LE: func = TOK___lesf2; cond = 12; break; + case TOK_GT: func = TOK___gtsf2; cond = 13; break; + default: assert(0); break; + } + } else if (dbl) { + switch (op) { + case '*': func = TOK___muldf3; break; + case '+': func = TOK___adddf3; break; + case '-': func = TOK___subdf3; break; + case '/': func = TOK___divdf3; break; + case TOK_EQ: func = TOK___eqdf2; cond = 1; break; + case TOK_NE: func = TOK___nedf2; cond = 0; break; + case TOK_LT: func = TOK___ltdf2; cond = 10; break; + case TOK_GE: func = TOK___gedf2; cond = 11; break; + case TOK_LE: func = TOK___ledf2; cond = 12; break; + case TOK_GT: func = TOK___gtdf2; cond = 13; break; + default: assert(0); break; + } + } else { + assert(0); + } + + save_regs(1); + if (dbl) { + /* double: arg2 in a2:a3, arg1 in a0:a1 */ + gv(RC_R(2)); + if (vtop->r2 != TREG_R(3)) { + EI(0x13, 0, 13, ireg(vtop->r2), 0); // mv a3, r2 + vtop->r2 = TREG_R(3); + } + vswap(); + gv(RC_R(0)); + if (vtop->r2 != TREG_R(1)) { + EI(0x13, 0, 11, ireg(vtop->r2), 0); // mv a1, r2 + vtop->r2 = TREG_R(1); + } + } else { + /* float: arg2 in a1, arg1 in a0 */ + gv(RC_R(1)); + vswap(); + gv(RC_R(0)); + } + vpush_helper_func(func); + gcall_or_jmp(1); + vtop -= 3; /* pop helper, arg1, arg2 */ + vpushi(0); + vtop->r = REG_IRET; + vtop->r2 = VT_CONST; + if (cond < 0) { + vtop->type = type; + if (dbl) + vtop->r2 = TREG_R(1); + } else { + vpushi(0); + gen_opil(op); + } +} + +ST_FUNC void gen_cvt_itof(int t) +{ + int u, l, func; + u = vtop->type.t & VT_UNSIGNED; + l = (vtop->type.t & VT_BTYPE) == VT_LLONG; + + if (tcc_state->fpu && !l) { + /* Inline FPU: int32 → float/double */ + save_regs(1); + gv(RC_R(0)); /* source int in a0 */ + + if (t == VT_FLOAT) { + /* fcvt.s.w / fcvt.s.wu a0 → fa0 → a0 */ + ER(0x53, 7, FA0, 10, u ? 1 : 0, 0x68); + fmv_x_w(10, FA0); + } else { + /* fcvt.d.w / fcvt.d.wu a0 → fa0 → a0:a1 */ + ER(0x53, 7, FA0, 10, u ? 1 : 0, 0x69); + EI(0x13, 0, 2, 2, -8); // addi sp, sp, -8 + ES(0x27, 3, 2, FA0, 0); // fsd fa0, 0(sp) + EI(0x03, 2, 10, 2, 0); // lw a0, 0(sp) + EI(0x03, 2, 11, 2, 4); // lw a1, 4(sp) + EI(0x13, 0, 2, 2, 8); // addi sp, sp, 8 + } + vtop--; + vpushi(0); + vtop->type.t = t; + vtop->r = REG_IRET; + if (t == VT_DOUBLE || t == VT_LDOUBLE) + vtop->r2 = TREG_R(1); + return; + } + + /* soft-float: use library calls. + Use save_regs+gcall_or_jmp to avoid nested gfunc_call issues. */ + if (t == VT_FLOAT) { + if (l) + func = u ? TOK___floatundisf : TOK___floatdisf; + else + func = u ? TOK___floatunsisf : TOK___floatsisf; + } else { + /* VT_DOUBLE or VT_LDOUBLE */ + if (l) + func = u ? TOK___floatundidf : TOK___floatdidf; + else + func = u ? TOK___floatunsidf : TOK___floatsidf; + } + save_regs(1); + gv(RC_R(0)); + if (l && vtop->r2 != TREG_R(1)) { + EI(0x13, 0, 11, ireg(vtop->r2), 0); // mv a1, r2 + vtop->r2 = TREG_R(1); + } + vpush_helper_func(func); + gcall_or_jmp(1); + vtop -= 2; + vpushi(0); + vtop->type.t = t; + vtop->r = REG_IRET; + if (t == VT_DOUBLE || t == VT_LDOUBLE) + vtop->r2 = TREG_R(1); +} + +ST_FUNC void gen_cvt_ftoi(int t) +{ + int ft = vtop->type.t & VT_BTYPE; + int l = (t & VT_BTYPE) == VT_LLONG; + int u = t & VT_UNSIGNED; + int func; + + if (tcc_state->fpu && !l) { + /* Inline FPU: float/double → int32 */ + int dbl = (ft == VT_DOUBLE || ft == VT_LDOUBLE); + save_regs(1); + gv(RC_R(0)); /* source in a0 (or a0:a1 for double) */ + + if (dbl) { + if (vtop->r2 != TREG_R(1)) { + EI(0x13, 0, 11, ireg(vtop->r2), 0); // mv a1, r2 + vtop->r2 = TREG_R(1); + } + EI(0x13, 0, 2, 2, -8); // addi sp, sp, -8 + ES(0x23, 2, 2, 10, 0); // sw a0, 0(sp) + ES(0x23, 2, 2, 11, 4); // sw a1, 4(sp) + EI(0x07, 3, FA0, 2, 0); // fld fa0, 0(sp) + EI(0x13, 0, 2, 2, 8); // addi sp, sp, 8 + } else { + fmv_w_x(FA0, 10); // fmv.w.x fa0, a0 + } + + /* fcvt.w[u].s/d a0, fa0, rtz */ + ER(0x53, 1, 10, FA0, u ? 1 : 0, dbl ? 0x61 : 0x60); + + vtop--; + vpushi(0); + vtop->type.t = t; + vtop->r = REG_IRET; + return; + } + + /* soft-float: use library calls. + Use save_regs+gcall_or_jmp to avoid nested gfunc_call issues. */ + if (ft == VT_FLOAT) { + if (l) + func = u ? TOK___fixunssfdi : TOK___fixsfdi; + else + func = u ? TOK___fixunssfsi : TOK___fixsfsi; + } else { + /* VT_DOUBLE or VT_LDOUBLE */ + if (l) + func = u ? TOK___fixunsdfdi : TOK___fixdfdi; + else + func = u ? TOK___fixunsdfsi : TOK___fixdfsi; + } + save_regs(1); + gv(RC_R(0)); + if ((ft == VT_DOUBLE || ft == VT_LDOUBLE) && vtop->r2 != TREG_R(1)) { + EI(0x13, 0, 11, ireg(vtop->r2), 0); // mv a1, r2 + vtop->r2 = TREG_R(1); + } + vpush_helper_func(func); + gcall_or_jmp(1); + vtop -= 2; + vpushi(0); + vtop->type.t = t; + vtop->r = REG_IRET; + if (l) + vtop->r2 = TREG_R(1); +} + +ST_FUNC void gen_cvt_ftof(int dt) +{ + int st = vtop->type.t & VT_BTYPE; + int func; + dt &= VT_BTYPE; + if (st == dt) + return; + + if (tcc_state->fpu) { + /* Inline FPU: float↔double conversion */ + save_regs(1); + gv(RC_R(0)); /* source in a0 (or a0:a1 for double) */ + + if (dt == VT_DOUBLE || dt == VT_LDOUBLE) { + /* float → double: a0 → fa0 → fcvt.d.s → a0:a1 */ + fmv_w_x(FA0, 10); + ER(0x53, 0, FA0, FA0, 0, 0x21); // fcvt.d.s fa0, fa0 + EI(0x13, 0, 2, 2, -8); // addi sp, sp, -8 + ES(0x27, 3, 2, FA0, 0); // fsd fa0, 0(sp) + EI(0x03, 2, 10, 2, 0); // lw a0, 0(sp) + EI(0x03, 2, 11, 2, 4); // lw a1, 4(sp) + EI(0x13, 0, 2, 2, 8); // addi sp, sp, 8 + } else { + /* double → float: a0:a1 → fa0 → fcvt.s.d → a0 */ + if (vtop->r2 != TREG_R(1)) { + EI(0x13, 0, 11, ireg(vtop->r2), 0); // mv a1, r2 + vtop->r2 = TREG_R(1); + } + EI(0x13, 0, 2, 2, -8); // addi sp, sp, -8 + ES(0x23, 2, 2, 10, 0); // sw a0, 0(sp) + ES(0x23, 2, 2, 11, 4); // sw a1, 4(sp) + EI(0x07, 3, FA0, 2, 0); // fld fa0, 0(sp) + EI(0x13, 0, 2, 2, 8); // addi sp, sp, 8 + ER(0x53, 7, FA0, FA0, 1, 0x20); // fcvt.s.d fa0, fa0 + fmv_x_w(10, FA0); + } + vtop--; + vpushi(0); + vtop->type.t = dt; + vtop->r = REG_IRET; + if (dt == VT_DOUBLE || dt == VT_LDOUBLE) + vtop->r2 = TREG_R(1); + return; + } + + /* soft-float: use library calls for float<->double conversion */ + if (dt == VT_DOUBLE || dt == VT_LDOUBLE) { + func = TOK___extendsfdf2; + } else { + func = TOK___truncdfsf2; + } + save_regs(1); + gv(RC_R(0)); + if (st == VT_DOUBLE || st == VT_LDOUBLE) { + /* double is in register pair, ensure r2 = r+1 */ + if (vtop->r2 != 1 + vtop->r) { + EI(0x13, 0, ireg(vtop->r) + 1, ireg(vtop->r2), 0); // mv Ra+1, RR2 + vtop->r2 = 1 + vtop->r; + } + } + vpush_helper_func(func); + gcall_or_jmp(1); + vtop -= 2; + vpushi(0); + vtop->type.t = dt; + if (dt == VT_DOUBLE || dt == VT_LDOUBLE) + vtop->r = REG_IRET, vtop->r2 = REG_IRET+1; + else + vtop->r = REG_IRET; +} + +/* increment tcov counter */ +ST_FUNC void gen_increment_tcov (SValue *sv) +{ + int r1, r2; + Sym label = {0}; + label.type.t = VT_VOID | VT_STATIC; + + vpushv(sv); + vtop->r = r1 = get_reg(RC_INT); + r2 = get_reg(RC_INT); + r1 = ireg(r1); + r2 = ireg(r2); + greloca(cur_text_section, sv->sym, ind, R_RISCV_PCREL_HI20, 0); + put_extern_sym(&label, cur_text_section, ind, 0); + o(0x17 | (r1 << 7)); // auipc RR, 0 %pcrel_hi(sym) + greloca(cur_text_section, &label, ind, R_RISCV_PCREL_LO12_I, 0); + EI(0x03, 2, r2, r1, 0); // lw r2, x[r1] + EI(0x13, 0, r2, r2, 1); // addi r2, r2, #1 + greloca(cur_text_section, sv->sym, ind, R_RISCV_PCREL_HI20, 0); + label.c = 0; /* force new local ELF symbol */ + put_extern_sym(&label, cur_text_section, ind, 0); + o(0x17 | (r1 << 7)); // auipc RR, 0 %pcrel_hi(sym) + greloca(cur_text_section, &label, ind, R_RISCV_PCREL_LO12_S, 0); + ES(0x23, 2, r1, r2, 0); // sw r2, [r1] + vpop(); +} + +ST_FUNC void ggoto(void) +{ + gcall_or_jmp(0); + vtop--; +} + +ST_FUNC void gen_vla_sp_save(int addr) +{ + if (LOW_OVERFLOW(addr)) { + o(0x37 | (5 << 7) | UPPER(addr)); //lui t0,upper(addr) + ER(0x33, 0, 5, 5, 8, 0); // add t0, t0, s0 + ES(0x23, 2, 5, 2, SIGN11(addr)); // sw sp, fc(t0) + } + else + ES(0x23, 2, 8, 2, addr); // sw sp, fc(s0) +} + +ST_FUNC void gen_vla_sp_restore(int addr) +{ + if (LOW_OVERFLOW(addr)) { + o(0x37 | (5 << 7) | UPPER(addr)); //lui t0,upper(addr) + ER(0x33, 0, 5, 5, 8, 0); // add t0, t0, s0 + EI(0x03, 2, 2, 5, SIGN11(addr)); // lw sp, fc(t0) + } + else + EI(0x03, 2, 2, 8, addr); // lw sp, fc(s0) +} + +ST_FUNC void gen_vla_alloc(CType *type, int align) +{ + int rr; +#if defined(CONFIG_TCC_BCHECK) + if (tcc_state->do_bounds_check) + vpushv(vtop); +#endif + rr = ireg(gv(RC_INT)); +#if defined(CONFIG_TCC_BCHECK) + if (tcc_state->do_bounds_check) + EI(0x13, 0, rr, rr, 15+1); // addi RR, RR, 15+1 + else +#endif + EI(0x13, 0, rr, rr, 15); // addi RR, RR, 15 + EI(0x13, 7, rr, rr, -16); // andi, RR, RR, -16 + ER(0x33, 0, 2, 2, rr, 0x20); // sub sp, sp, rr + vpop(); +#if defined(CONFIG_TCC_BCHECK) + if (tcc_state->do_bounds_check) { + vpushi(0); + vtop->r = TREG_R(0); + o(0x00010513); /* mv a0,sp */ + vswap(); + vpush_helper_func(TOK___bound_new_region); + vrott(3); + gfunc_call(2); + func_bound_add_epilog = 1; + } +#endif +} +#endif diff --git a/riscv32-link.c b/riscv32-link.c new file mode 100644 index 000000000..e7d6aa89e --- /dev/null +++ b/riscv32-link.c @@ -0,0 +1,377 @@ +#ifdef TARGET_DEFS_ONLY + +#define EM_TCC_TARGET EM_RISCV + +#define R_DATA_32 R_RISCV_32 +#define R_DATA_PTR R_RISCV_32 +#define R_JMP_SLOT R_RISCV_JUMP_SLOT +#define R_GLOB_DAT R_RISCV_32 +#define R_COPY R_RISCV_COPY +#define R_RELATIVE R_RISCV_RELATIVE + +#define R_NUM R_RISCV_NUM + +#define ELF_START_ADDR 0x00010000 +#define ELF_PAGE_SIZE 0x1000 + +#define PCRELATIVE_DLLPLT 1 +#define RELOCATE_DLLPLT 1 + +#else /* !TARGET_DEFS_ONLY */ + +//#define DEBUG_RELOC +#include "tcc.h" + +/* Returns 1 for a code relocation, 0 for a data relocation. For unknown + relocations, returns -1. */ +ST_FUNC int code_reloc (int reloc_type) +{ + switch (reloc_type) { + + case R_RISCV_BRANCH: + case R_RISCV_CALL: + case R_RISCV_JAL: + return 1; + + case R_RISCV_GOT_HI20: + case R_RISCV_PCREL_HI20: + case R_RISCV_PCREL_LO12_I: + case R_RISCV_PCREL_LO12_S: + case R_RISCV_32_PCREL: + case R_RISCV_SET6: + case R_RISCV_SET8: + case R_RISCV_SET16: + case R_RISCV_SUB6: + case R_RISCV_ADD16: + case R_RISCV_ADD32: + case R_RISCV_SUB8: + case R_RISCV_SUB16: + case R_RISCV_SUB32: + case R_RISCV_32: + case R_RISCV_SET_ULEB128: + case R_RISCV_SUB_ULEB128: + return 0; + + case R_RISCV_CALL_PLT: + return 1; + } + return -1; +} + +/* Returns an enumerator to describe whether and when the relocation needs a + GOT and/or PLT entry to be created. See tcc.h for a description of the + different values. */ +ST_FUNC int gotplt_entry_type (int reloc_type) +{ + switch (reloc_type) { + case R_RISCV_ALIGN: + case R_RISCV_RELAX: + case R_RISCV_RVC_BRANCH: + case R_RISCV_RVC_JUMP: + case R_RISCV_JUMP_SLOT: + case R_RISCV_SET6: + case R_RISCV_SET8: + case R_RISCV_SET16: + case R_RISCV_SUB6: + case R_RISCV_ADD16: + case R_RISCV_SUB8: + case R_RISCV_SUB16: + case R_RISCV_SET_ULEB128: + case R_RISCV_SUB_ULEB128: + return NO_GOTPLT_ENTRY; + + case R_RISCV_BRANCH: + case R_RISCV_CALL: + case R_RISCV_PCREL_HI20: + case R_RISCV_PCREL_LO12_I: + case R_RISCV_PCREL_LO12_S: + case R_RISCV_32_PCREL: + case R_RISCV_ADD32: + case R_RISCV_SUB32: + case R_RISCV_32: + case R_RISCV_JAL: + case R_RISCV_CALL_PLT: + return AUTO_GOTPLT_ENTRY; + + case R_RISCV_GOT_HI20: + return ALWAYS_GOTPLT_ENTRY; + } + return -1; +} + +ST_FUNC unsigned create_plt_entry(TCCState *s1, unsigned got_offset, struct sym_attr *attr) +{ + Section *plt = s1->plt; + uint8_t *p; + unsigned plt_offset; + + if (plt->data_offset == 0) + section_ptr_add(plt, 32); + plt_offset = plt->data_offset; + + p = section_ptr_add(plt, 16); + write32le(p, got_offset); + return plt_offset; +} + +/* relocate the PLT: compute addresses and offsets in the PLT now that final + address for PLT and GOT are known (see fill_program_header) */ +ST_FUNC void relocate_plt(TCCState *s1) +{ + uint8_t *p, *p_end; + + if (!s1->plt) + return; + + p = s1->plt->data; + p_end = p + s1->plt->data_offset; + + if (p < p_end) { + uint32_t plt = s1->plt->sh_addr; + uint32_t got = s1->got->sh_addr; + uint32_t off = (got - plt + 0x800) >> 12; + if ((off + ((uint32_t)1 << 20)) >> 21) + tcc_error_noabort("Failed relocating PLT (off=0x%lx, got=0x%lx, plt=0x%lx)", (long)off, (long)got, (long)plt); + write32le(p, 0x397 | (off << 12)); // auipc t2, %pcrel_hi(got) + write32le(p + 4, 0x41c30333); // sub t1, t1, t3 + write32le(p + 8, 0x0003ae03 // lw t3, %pcrel_lo(got)(t2) + | (((got - plt) & 0xfff) << 20)); + write32le(p + 12, 0xfd430313); // addi t1, t1, -(32+12) + write32le(p + 16, 0x00038293 // addi t0, t2, %pcrel_lo(got) + | (((got - plt) & 0xfff) << 20)); + write32le(p + 20, 0x00235313); // srli t1, t1, log2(16/PTRSIZE) = 2 + write32le(p + 24, 0x0042a283); // lw t0, PTRSIZE(t0) + write32le(p + 28, 0x000e0067); // jr t3 + p += 32; + while (p < p_end) { + uint32_t pc = plt + (p - s1->plt->data); + uint32_t addr = got + read32le(p); + uint32_t off = (addr - pc + 0x800) >> 12; + if ((off + ((uint32_t)1 << 20)) >> 21) + tcc_error_noabort("Failed relocating PLT (off=0x%lx, addr=0x%lx, pc=0x%lx)", (long)off, (long)addr, (long)pc); + write32le(p, 0xe17 | (off << 12)); // auipc t3, %pcrel_hi(func@got) + write32le(p + 4, 0x000e2e03 // lw t3, %pcrel_lo(func@got)(t3) + | (((addr - pc) & 0xfff) << 20)); + write32le(p + 8, 0x000e0367); // jalr t1, t3 + write32le(p + 12, 0x00000013); // nop + p += 16; + } + } + + if (s1->plt->reloc) { + ElfW_Rel *rel; + p = s1->got->data; + for_each_elem(s1->plt->reloc, 0, rel, ElfW_Rel) { + write32le(p + rel->r_offset, s1->plt->sh_addr); + } + } +} + +static void riscv32_record_pcrel_hi(TCCState *s1, addr_t addr, addr_t val) +{ + int n = s1->nb_pcrel_hi_entries; + if (n >= s1->alloc_pcrel_hi_entries) { + int new_alloc = s1->alloc_pcrel_hi_entries ? s1->alloc_pcrel_hi_entries * 2 : 64; + s1->pcrel_hi_entries = tcc_realloc(s1->pcrel_hi_entries, + new_alloc * sizeof(*s1->pcrel_hi_entries)); + s1->alloc_pcrel_hi_entries = new_alloc; + } + s1->pcrel_hi_entries[n].addr = addr; + s1->pcrel_hi_entries[n].val = val; + s1->nb_pcrel_hi_entries = n + 1; + last_hi.addr = addr; + last_hi.val = val; +} + +static int riscv32_lookup_pcrel_hi(TCCState *s1, addr_t hi_addr, addr_t *hi_val) +{ + int i; + struct pcrel_hi *entry; + if (s1->nb_pcrel_hi_entries && hi_addr == last_hi.addr) { + *hi_val = last_hi.val; + return 1; + } + for (i = s1->nb_pcrel_hi_entries - 1; i >= 0; --i) { + entry = &s1->pcrel_hi_entries[i]; + if (entry->addr == hi_addr) { + last_hi = *entry; + *hi_val = entry->val; + return 1; + } + } + return 0; +} + +ST_FUNC void relocate(TCCState *s1, ElfW_Rel *rel, int type, unsigned char *ptr, + addr_t addr, addr_t val) +{ + uint32_t off32; + int sym_index = ELFW(R_SYM)(rel->r_info), esym_index; + + switch(type) { + case R_RISCV_ALIGN: + case R_RISCV_RELAX: + return; + + case R_RISCV_BRANCH: + off32 = val - addr; + if ((off32 + (1 << 12)) & ~(uint32_t)0x1ffe) + tcc_error_noabort("R_RISCV_BRANCH relocation failed" + " (val=%lx, addr=%lx)", (long)val, (long)addr); + off32 >>= 1; + write32le(ptr, (read32le(ptr) & ~0xfe000f80) + | ((off32 & 0x800) << 20) + | ((off32 & 0x3f0) << 21) + | ((off32 & 0x00f) << 8) + | ((off32 & 0x400) >> 3)); + return; + case R_RISCV_JAL: + off32 = val - addr; + if ((off32 + (1 << 21)) & ~(((uint32_t)1 << 22) - 2)) + tcc_error_noabort("R_RISCV_JAL relocation failed" + " (val=%lx, addr=%lx)", (long)val, (long)addr); + write32le(ptr, (read32le(ptr) & 0xfff) + | (((off32 >> 12) & 0xff) << 12) + | (((off32 >> 11) & 1) << 20) + | (((off32 >> 1) & 0x3ff) << 21) + | (((off32 >> 20) & 1) << 31)); + return; + case R_RISCV_CALL: + case R_RISCV_CALL_PLT: + write32le(ptr, (read32le(ptr) & 0xfff) + | ((val - addr + 0x800) & ~0xfff)); + write32le(ptr + 4, (read32le(ptr + 4) & 0xfffff) + | (((val - addr) & 0xfff) << 20)); + return; + case R_RISCV_PCREL_HI20: +#ifdef DEBUG_RELOC + printf("PCREL_HI20: val=%lx addr=%lx\n", (long)val, (long)addr); +#endif + off32 = (int32_t)(val - addr + 0x800) >> 12; + write32le(ptr, (read32le(ptr) & 0xfff) + | ((off32 & 0xfffff) << 12)); + riscv32_record_pcrel_hi(s1, addr, val); + return; + case R_RISCV_GOT_HI20: + val = s1->got->sh_addr + get_sym_attr(s1, sym_index, 0)->got_offset; + off32 = (int32_t)(val - addr + 0x800) >> 12; + write32le(ptr, (read32le(ptr) & 0xfff) + | ((off32 & 0xfffff) << 12)); + riscv32_record_pcrel_hi(s1, addr, val); + return; + case R_RISCV_PCREL_LO12_I: +#ifdef DEBUG_RELOC + printf("PCREL_LO12_I: val=%lx addr=%lx\n", (long)val, (long)addr); +#endif + addr = val; + if (!riscv32_lookup_pcrel_hi(s1, addr, &val)) + tcc_error_noabort("unsupported hi/lo pcrel reloc scheme"); + write32le(ptr, (read32le(ptr) & 0xfffff) + | (((val - addr) & 0xfff) << 20)); + return; + case R_RISCV_PCREL_LO12_S: + addr = val; + if (!riscv32_lookup_pcrel_hi(s1, addr, &val)) + tcc_error_noabort("unsupported hi/lo pcrel reloc scheme"); + off32 = val - addr; + write32le(ptr, (read32le(ptr) & ~0xfe000f80) + | ((off32 & 0xfe0) << 20) + | ((off32 & 0x01f) << 7)); + return; + + case R_RISCV_RVC_BRANCH: + off32 = (val - addr); + if ((off32 + (1 << 8)) & ~(uint32_t)0x1fe) + tcc_error_noabort("R_RISCV_RVC_BRANCH relocation failed" + " (val=%lx, addr=%lx)", (long)val, (long)addr); + write16le(ptr, (read16le(ptr) & 0xe383) + | (((off32 >> 5) & 1) << 2) + | (((off32 >> 1) & 3) << 3) + | (((off32 >> 6) & 3) << 5) + | (((off32 >> 3) & 3) << 10) + | (((off32 >> 8) & 1) << 12)); + return; + case R_RISCV_RVC_JUMP: + off32 = (val - addr); + if ((off32 + (1 << 11)) & ~(uint32_t)0xffe) + tcc_error_noabort("R_RISCV_RVC_BRANCH relocation failed" + " (val=%lx, addr=%lx)", (long)val, (long)addr); + write16le(ptr, (read16le(ptr) & 0xe003) + | (((off32 >> 5) & 1) << 2) + | (((off32 >> 1) & 7) << 3) + | (((off32 >> 7) & 1) << 6) + | (((off32 >> 6) & 1) << 7) + | (((off32 >> 10) & 1) << 8) + | (((off32 >> 8) & 3) << 9) + | (((off32 >> 4) & 1) << 11) + | (((off32 >> 11) & 1) << 12)); + return; + + case R_RISCV_32: + if (s1->output_type & TCC_OUTPUT_DYN) { + qrel->r_offset = rel->r_offset; + qrel->r_info = ELFW(R_INFO)(0, R_RISCV_RELATIVE); + qrel->r_addend = (int)read32le(ptr) + val; + qrel++; + } + add32le(ptr, val); + return; + case R_RISCV_JUMP_SLOT: + add32le(ptr, val); + return; + case R_RISCV_ADD32: + write32le(ptr, read32le(ptr) + val); + return; + case R_RISCV_SUB32: + write32le(ptr, read32le(ptr) - val); + return; + case R_RISCV_ADD16: + write16le(ptr, read16le(ptr) + val); + return; + case R_RISCV_SUB8: + *ptr -= val; + return; + case R_RISCV_SUB16: + write16le(ptr, read16le(ptr) - val); + return; + case R_RISCV_SET6: + *ptr = (*ptr & ~0x3f) | (val & 0x3f); + return; + case R_RISCV_SET8: + *ptr = (*ptr & ~0xff) | (val & 0xff); + return; + case R_RISCV_SET16: + write16le(ptr, val); + return; + case R_RISCV_SUB6: + *ptr = (*ptr & ~0x3f) | ((*ptr - val) & 0x3f); + return; + case R_RISCV_32_PCREL: + if (s1->output_type & TCC_OUTPUT_DYN) { + /* DLL relocation */ + esym_index = get_sym_attr(s1, sym_index, 0)->dyn_index; + if (esym_index) { + qrel->r_offset = rel->r_offset; + qrel->r_info = ELFW(R_INFO)(esym_index, R_RISCV_32_PCREL); + qrel->r_addend = (int)read32le(ptr) + rel->r_addend; + qrel++; + break; + } + } + add32le(ptr, val - addr); + return; + case R_RISCV_SET_ULEB128: + case R_RISCV_SUB_ULEB128: + /* ignore. used in section .debug_loclists */ + return; + case R_RISCV_COPY: + /* XXX */ + return; + + default: + fprintf(stderr, "FIXME: handle reloc type %x at %x [%p] to %x\n", + type, (unsigned)addr, ptr, (unsigned)val); + return; + } +} +#endif diff --git a/riscv32-tok.h b/riscv32-tok.h new file mode 100644 index 000000000..0d48bb8f8 --- /dev/null +++ b/riscv32-tok.h @@ -0,0 +1,490 @@ +/* ------------------------------------------------------------------ */ +/* WARNING: relative order of tokens is important. */ + +/* + * The specifications are available under https://riscv.org/technical/specifications/ + */ + +#define DEF_ASM_WITH_SUFFIX(x, y) \ + DEF(TOK_ASM_ ## x ## _ ## y, #x "." #y) + +#define DEF_ASM_WITH_SUFFIXES(x, y, z) \ + DEF(TOK_ASM_ ## x ## _ ## y ## _ ## z, #x "." #y "." #z) + +#define DEF_ASM_FENCE(x) \ + DEF(TOK_ASM_ ## x ## _fence, #x) + +/* register */ + /* integer */ + DEF_ASM(x0) + DEF_ASM(x1) + DEF_ASM(x2) + DEF_ASM(x3) + DEF_ASM(x4) + DEF_ASM(x5) + DEF_ASM(x6) + DEF_ASM(x7) + DEF_ASM(x8) + DEF_ASM(x9) + DEF_ASM(x10) + DEF_ASM(x11) + DEF_ASM(x12) + DEF_ASM(x13) + DEF_ASM(x14) + DEF_ASM(x15) + DEF_ASM(x16) + DEF_ASM(x17) + DEF_ASM(x18) + DEF_ASM(x19) + DEF_ASM(x20) + DEF_ASM(x21) + DEF_ASM(x22) + DEF_ASM(x23) + DEF_ASM(x24) + DEF_ASM(x25) + DEF_ASM(x26) + DEF_ASM(x27) + DEF_ASM(x28) + DEF_ASM(x29) + DEF_ASM(x30) + DEF_ASM(x31) + /* float */ + DEF_ASM(f0) + DEF_ASM(f1) + DEF_ASM(f2) + DEF_ASM(f3) + DEF_ASM(f4) + DEF_ASM(f5) + DEF_ASM(f6) + DEF_ASM(f7) + DEF_ASM(f8) + DEF_ASM(f9) + DEF_ASM(f10) + DEF_ASM(f11) + DEF_ASM(f12) + DEF_ASM(f13) + DEF_ASM(f14) + DEF_ASM(f15) + DEF_ASM(f16) + DEF_ASM(f17) + DEF_ASM(f18) + DEF_ASM(f19) + DEF_ASM(f20) + DEF_ASM(f21) + DEF_ASM(f22) + DEF_ASM(f23) + DEF_ASM(f24) + DEF_ASM(f25) + DEF_ASM(f26) + DEF_ASM(f27) + DEF_ASM(f28) + DEF_ASM(f29) + DEF_ASM(f30) + DEF_ASM(f31) + +/* register ABI mnemonics, refer to RISC-V ABI 1.0 */ + /* integer */ + DEF_ASM(zero) + DEF_ASM(ra) + DEF_ASM(sp) + DEF_ASM(gp) + DEF_ASM(tp) + DEF_ASM(t0) + DEF_ASM(t1) + DEF_ASM(t2) + DEF_ASM(s0) + DEF_ASM(s1) + DEF_ASM(a0) + DEF_ASM(a1) + DEF_ASM(a2) + DEF_ASM(a3) + DEF_ASM(a4) + DEF_ASM(a5) + DEF_ASM(a6) + DEF_ASM(a7) + DEF_ASM(s2) + DEF_ASM(s3) + DEF_ASM(s4) + DEF_ASM(s5) + DEF_ASM(s6) + DEF_ASM(s7) + DEF_ASM(s8) + DEF_ASM(s9) + DEF_ASM(s10) + DEF_ASM(s11) + DEF_ASM(t3) + DEF_ASM(t4) + DEF_ASM(t5) + DEF_ASM(t6) + /* float */ + DEF_ASM(ft0) + DEF_ASM(ft1) + DEF_ASM(ft2) + DEF_ASM(ft3) + DEF_ASM(ft4) + DEF_ASM(ft5) + DEF_ASM(ft6) + DEF_ASM(ft7) + DEF_ASM(fs0) + DEF_ASM(fs1) + DEF_ASM(fa0) + DEF_ASM(fa1) + DEF_ASM(fa2) + DEF_ASM(fa3) + DEF_ASM(fa4) + DEF_ASM(fa5) + DEF_ASM(fa6) + DEF_ASM(fa7) + DEF_ASM(fs2) + DEF_ASM(fs3) + DEF_ASM(fs4) + DEF_ASM(fs5) + DEF_ASM(fs6) + DEF_ASM(fs7) + DEF_ASM(fs8) + DEF_ASM(fs9) + DEF_ASM(fs10) + DEF_ASM(fs11) + DEF_ASM(ft8) + DEF_ASM(ft9) + DEF_ASM(ft10) + DEF_ASM(ft11) + /* not in the ABI */ + DEF_ASM(pc) + +/* Loads */ + + DEF_ASM(lb) + DEF_ASM(lh) + DEF_ASM(lw) + DEF_ASM(lbu) + DEF_ASM(lhu) + /* RV64 */ + DEF_ASM(ld) + DEF_ASM(lwu) + +/* Stores */ + + DEF_ASM(sb) + DEF_ASM(sh) + DEF_ASM(sw) + /* RV64 */ + DEF_ASM(sd) + +/* Shifts */ + + DEF_ASM(sll) + DEF_ASM(srl) + DEF_ASM(sra) + /* RV64 */ + DEF_ASM(slli) + DEF_ASM(srli) + DEF_ASM(sllw) + DEF_ASM(slliw) + DEF_ASM(srlw) + DEF_ASM(srliw) + DEF_ASM(srai) + DEF_ASM(sraw) + DEF_ASM(sraiw) + +/* Arithmetic */ + + DEF_ASM(add) + DEF_ASM(addi) + DEF_ASM(sub) + DEF_ASM(lui) + DEF_ASM(auipc) + /* RV64 */ + DEF_ASM(addw) + DEF_ASM(addiw) + DEF_ASM(subw) + +/* Logical */ + + DEF_ASM(xor) + DEF_ASM(xori) + DEF_ASM(or) + DEF_ASM(ori) + DEF_ASM(and) + DEF_ASM(andi) + +/* Compare */ + + DEF_ASM(slt) + DEF_ASM(slti) + DEF_ASM(sltu) + DEF_ASM(sltiu) + +/* Branch */ + + DEF_ASM(beq) + DEF_ASM(bne) + DEF_ASM(blt) + DEF_ASM(bge) + DEF_ASM(bltu) + DEF_ASM(bgeu) + +/* Jump */ + + DEF_ASM(jal) + DEF_ASM(jalr) + +/* Sync */ + + DEF_ASM(fence) + /* Zifencei extension */ + DEF_ASM_WITH_SUFFIX(fence, i) + +/* System call */ + + /* used to be called scall and sbreak */ + DEF_ASM(ecall) + DEF_ASM(ebreak) + +/* Counters */ + + DEF_ASM(rdcycle) + DEF_ASM(rdcycleh) + DEF_ASM(rdtime) + DEF_ASM(rdtimeh) + DEF_ASM(rdinstret) + DEF_ASM(rdinstreth) + +/* “M” Standard Extension for Integer Multiplication and Division, V2.0 */ + DEF_ASM(mul) + DEF_ASM(mulh) + DEF_ASM(mulhsu) + DEF_ASM(mulhu) + DEF_ASM(div) + DEF_ASM(divu) + DEF_ASM(rem) + DEF_ASM(remu) + /* RV64 */ + DEF_ASM(mulw) + DEF_ASM(divw) + DEF_ASM(divuw) + DEF_ASM(remw) + DEF_ASM(remuw) + +/* "F"/"D" Extension for Single/Double-Precision Floating Point Arithmetic, V2.2 */ + /* enough implemented for musl */ + DEF_ASM_WITH_SUFFIX(fsgnj, s) + DEF_ASM_WITH_SUFFIX(fsgnj, d) + DEF_ASM_WITH_SUFFIX(fmadd, s) + DEF_ASM_WITH_SUFFIX(fmadd, d) + DEF_ASM_WITH_SUFFIX(fmax, s) + DEF_ASM_WITH_SUFFIX(fmax, d) + DEF_ASM_WITH_SUFFIX(fmin, s) + DEF_ASM_WITH_SUFFIX(fmin, d) + DEF_ASM_WITH_SUFFIX(fsqrt, s) + DEF_ASM_WITH_SUFFIX(fsqrt, d) + +/* "C" Extension for Compressed Instructions, V2.0 */ + DEF_ASM_WITH_SUFFIX(c, nop) +/* Loads */ + DEF_ASM_WITH_SUFFIX(c, li) + DEF_ASM_WITH_SUFFIX(c, lw) + DEF_ASM_WITH_SUFFIX(c, lwsp) + /* single float */ + DEF_ASM_WITH_SUFFIX(c, flw) + DEF_ASM_WITH_SUFFIX(c, flwsp) + /* double float */ + DEF_ASM_WITH_SUFFIX(c, fld) + DEF_ASM_WITH_SUFFIX(c, fldsp) + /* RV64 */ + DEF_ASM_WITH_SUFFIX(c, ld) + DEF_ASM_WITH_SUFFIX(c, ldsp) + +/* Stores */ + + DEF_ASM_WITH_SUFFIX(c, sw) + DEF_ASM_WITH_SUFFIX(c, sd) + DEF_ASM_WITH_SUFFIX(c, swsp) + DEF_ASM_WITH_SUFFIX(c, sdsp) + /* single float */ + DEF_ASM_WITH_SUFFIX(c, fsw) + DEF_ASM_WITH_SUFFIX(c, fswsp) + /* double float */ + DEF_ASM_WITH_SUFFIX(c, fsd) + DEF_ASM_WITH_SUFFIX(c, fsdsp) + +/* Shifts */ + DEF_ASM_WITH_SUFFIX(c, slli) + DEF_ASM_WITH_SUFFIX(c, srli) + DEF_ASM_WITH_SUFFIX(c, srai) + +/* Arithmetic */ + DEF_ASM_WITH_SUFFIX(c, add) + DEF_ASM_WITH_SUFFIX(c, addi) + DEF_ASM_WITH_SUFFIX(c, addi16sp) + DEF_ASM_WITH_SUFFIX(c, addi4spn) + DEF_ASM_WITH_SUFFIX(c, lui) + DEF_ASM_WITH_SUFFIX(c, sub) + DEF_ASM_WITH_SUFFIX(c, mv) + /* RV64 */ + DEF_ASM_WITH_SUFFIX(c, addw) + DEF_ASM_WITH_SUFFIX(c, addiw) + DEF_ASM_WITH_SUFFIX(c, subw) + +/* Logical */ + DEF_ASM_WITH_SUFFIX(c, xor) + DEF_ASM_WITH_SUFFIX(c, or) + DEF_ASM_WITH_SUFFIX(c, and) + DEF_ASM_WITH_SUFFIX(c, andi) + +/* Branch */ + DEF_ASM_WITH_SUFFIX(c, beqz) + DEF_ASM_WITH_SUFFIX(c, bnez) + +/* Jump */ + DEF_ASM_WITH_SUFFIX(c, j) + DEF_ASM_WITH_SUFFIX(c, jr) + DEF_ASM_WITH_SUFFIX(c, jal) + DEF_ASM_WITH_SUFFIX(c, jalr) + +/* System call */ + DEF_ASM_WITH_SUFFIX(c, ebreak) + +/* XXX F Extension: Single-Precision Floating Point */ +/* XXX D Extension: Double-Precision Floating Point */ +/* from the spec: Tables 16.5–16.7 list the RVC instructions. */ + +/* “Zicsr”, Control and Status Register (CSR) Instructions, V2.0 */ + DEF_ASM(csrrw) + DEF_ASM(csrrs) + DEF_ASM(csrrc) + DEF_ASM(csrrwi) + DEF_ASM(csrrsi) + DEF_ASM(csrrci) + /* registers */ + DEF_ASM(cycle) + DEF_ASM(fcsr) + DEF_ASM(fflags) + DEF_ASM(frm) + DEF_ASM(instret) + DEF_ASM(time) + /* RV32I-only */ + DEF_ASM(cycleh) + DEF_ASM(instreth) + DEF_ASM(timeh) + /* pseudo */ + DEF_ASM(csrc) + DEF_ASM(csrci) + DEF_ASM(csrr) + DEF_ASM(csrs) + DEF_ASM(csrsi) + DEF_ASM(csrw) + DEF_ASM(csrwi) + DEF_ASM(frcsr) + DEF_ASM(frflags) + DEF_ASM(frrm) + DEF_ASM(fscsr) + DEF_ASM(fsflags) + DEF_ASM(fsrm) + +/* Privileged Instructions */ + + DEF_ASM(mrts) + DEF_ASM(mrth) + DEF_ASM(hrts) + DEF_ASM(wfi) + +/* pseudoinstructions */ + DEF_ASM(beqz) + DEF_ASM(bgez) + DEF_ASM(bgt) + DEF_ASM(bgtu) + DEF_ASM(bgtz) + DEF_ASM(ble) + DEF_ASM(bleu) + DEF_ASM(blez) + DEF_ASM(bltz) + DEF_ASM(bnez) + DEF_ASM(call) + DEF_ASM_WITH_SUFFIX(fabs, d) + DEF_ASM_WITH_SUFFIX(fabs, s) + DEF_ASM(fld) + DEF_ASM(flw) + DEF_ASM_WITH_SUFFIX(fmv, d) + DEF_ASM_WITH_SUFFIX(fmv, s) + DEF_ASM_WITH_SUFFIX(fneg, d) + DEF_ASM_WITH_SUFFIX(fneg, s) + DEF_ASM(fsd) + DEF_ASM(fsw) + DEF_ASM(j) + DEF_ASM(jump) + DEF_ASM(jr) + DEF_ASM(la) + DEF_ASM(li) + DEF_ASM(lla) + DEF_ASM(mv) + DEF_ASM(neg) + DEF_ASM(negw) + DEF_ASM(nop) + DEF_ASM(not) + DEF_ASM(ret) + DEF_ASM(seqz) + DEF_ASM_WITH_SUFFIX(sext, w) + DEF_ASM(sgtz) + DEF_ASM(sltz) + DEF_ASM(snez) + DEF_ASM(tail) + +/* Possible values for .option directive */ + DEF_ASM(arch) + DEF_ASM(rvc) + DEF_ASM(norvc) + DEF_ASM(pic) + DEF_ASM(nopic) + DEF_ASM(relax) + DEF_ASM(norelax) + DEF_ASM(push) + DEF_ASM(pop) + +/* “A” Standard Extension for Atomic Instructions, Version 2.1 */ + /* XXX: Atomic memory operations */ + DEF_ASM_WITH_SUFFIX(lr, w) + DEF_ASM_WITH_SUFFIXES(lr, w, aq) + DEF_ASM_WITH_SUFFIXES(lr, w, rl) + DEF_ASM_WITH_SUFFIXES(lr, w, aqrl) + + DEF_ASM_WITH_SUFFIX(lr, d) + DEF_ASM_WITH_SUFFIXES(lr, d, aq) + DEF_ASM_WITH_SUFFIXES(lr, d, rl) + DEF_ASM_WITH_SUFFIXES(lr, d, aqrl) + + + DEF_ASM_WITH_SUFFIX(sc, w) + DEF_ASM_WITH_SUFFIXES(sc, w, aq) + DEF_ASM_WITH_SUFFIXES(sc, w, rl) + DEF_ASM_WITH_SUFFIXES(sc, w, aqrl) + + DEF_ASM_WITH_SUFFIX(sc, d) + DEF_ASM_WITH_SUFFIXES(sc, d, aq) + DEF_ASM_WITH_SUFFIXES(sc, d, rl) + DEF_ASM_WITH_SUFFIXES(sc, d, aqrl) + +/* `fence` arguments */ +/* NOTE: Order is important */ + DEF_ASM_FENCE(w) + DEF_ASM_FENCE(r) + DEF_ASM_FENCE(rw) + + DEF_ASM_FENCE(o) + DEF_ASM_FENCE(ow) + DEF_ASM_FENCE(or) + DEF_ASM_FENCE(orw) + + DEF_ASM_FENCE(i) + DEF_ASM_FENCE(iw) + DEF_ASM_FENCE(ir) + DEF_ASM_FENCE(irw) + + DEF_ASM_FENCE(io) + DEF_ASM_FENCE(iow) + DEF_ASM_FENCE(ior) + DEF_ASM_FENCE(iorw) + +#undef DEF_ASM_FENCE +#undef DEF_ASM_WITH_SUFFIX +#undef DEF_ASM_WITH_SUFFIXES diff --git a/tcc.c b/tcc.c index e1819239d..0d555dbe8 100644 --- a/tcc.c +++ b/tcc.c @@ -191,6 +191,8 @@ static const char version[] = "AArch64" #elif defined TCC_TARGET_RISCV64 "riscv64" +#elif defined TCC_TARGET_RISCV32 + "riscv32" #endif #ifdef TCC_TARGET_PE " Windows" diff --git a/tcc.h b/tcc.h index e7a2f1e26..49b5eabbc 100644 --- a/tcc.h +++ b/tcc.h @@ -148,12 +148,14 @@ extern long double strtold (const char *__nptr, char **__endptr); /* #define TCC_TARGET_ARM *//* ARMv4 code generator */ /* #define TCC_TARGET_ARM64 *//* ARMv8 code generator */ /* #define TCC_TARGET_C67 *//* TMS320C67xx code generator */ -/* #define TCC_TARGET_RISCV64 *//* risc-v code generator */ +/* #define TCC_TARGET_RISCV64 *//* risc-v 64 code generator */ +/* #define TCC_TARGET_RISCV32 *//* risc-v 32 code generator */ /* default target is I386 */ #if !defined(TCC_TARGET_I386) && !defined(TCC_TARGET_ARM) && \ !defined(TCC_TARGET_ARM64) && !defined(TCC_TARGET_C67) && \ - !defined(TCC_TARGET_X86_64) && !defined(TCC_TARGET_RISCV64) + !defined(TCC_TARGET_X86_64) && !defined(TCC_TARGET_RISCV64) && \ + !defined(TCC_TARGET_RISCV32) # if defined __x86_64__ # define TCC_TARGET_X86_64 # elif defined __arm__ @@ -163,8 +165,10 @@ extern long double strtold (const char *__nptr, char **__endptr); # define TCC_ARM_HARDFLOAT # elif defined __aarch64__ # define TCC_TARGET_ARM64 -# elif defined __riscv +# elif defined __riscv && defined __LP64__ # define TCC_TARGET_RISCV64 +# elif defined __riscv && !defined __LP64__ +# define TCC_TARGET_RISCV32 # else # define TCC_TARGET_I386 # endif @@ -189,6 +193,8 @@ extern long double strtold (const char *__nptr, char **__endptr); # define TCC_IS_NATIVE # elif defined __riscv && defined __LP64__ && defined TCC_TARGET_RISCV64 # define TCC_IS_NATIVE +# elif defined __riscv && !defined __LP64__ && defined TCC_TARGET_RISCV32 +# define TCC_IS_NATIVE # endif #endif @@ -229,7 +235,8 @@ extern long double strtold (const char *__nptr, char **__endptr); cross-compilers made by a mingw-GCC */ #if defined TCC_TARGET_PE \ || (defined TCC_TARGET_MACHO && defined TCC_TARGET_ARM64) \ - || (defined _WIN32 && !defined __GNUC__) + || (defined _WIN32 && !defined __GNUC__) \ + || defined TCC_TARGET_RISCV32 # define TCC_USING_DOUBLE_FOR_LDOUBLE 1 #endif @@ -309,6 +316,8 @@ extern long double strtold (const char *__nptr, char **__endptr); # define CONFIG_TCC_ELFINTERP "/lib64/ld-linux-x86-64.so.2" # elif defined(TCC_TARGET_RISCV64) # define CONFIG_TCC_ELFINTERP "/lib/ld-linux-riscv64-lp64d.so.1" +# elif defined(TCC_TARGET_RISCV32) +# define CONFIG_TCC_ELFINTERP "/lib/ld-linux-riscv32-ilp32.so.1" # elif defined(TCC_ARM_EABI) # define DEFAULT_ELFINTERP(s) default_elfinterp(s) # else @@ -395,6 +404,10 @@ extern long double strtold (const char *__nptr, char **__endptr); # include "riscv64-gen.c" # include "riscv64-link.c" # include "riscv64-asm.c" +#elif defined(TCC_TARGET_RISCV32) +# include "riscv32-gen.c" +# include "riscv32-link.c" +# include "riscv32-asm.c" #else #error unknown target #endif @@ -409,6 +422,14 @@ extern long double strtold (const char *__nptr, char **__endptr); # define ElfW_Rel ElfW(Rela) # define SHT_RELX SHT_RELA # define REL_SECTION_FMT ".rela%s" +#elif defined TCC_TARGET_RISCV32 +/* RISC-V always uses RELA relocations, even for RV32 */ +# define ELFCLASSW ELFCLASS32 +# define ElfW(type) Elf##32##_##type +# define ELFW(type) ELF##32##_##type +# define ElfW_Rel ElfW(Rela) +# define SHT_RELX SHT_RELA +# define REL_SECTION_FMT ".rela%s" #else # define ELFCLASSW ELFCLASS32 # define ElfW(type) Elf##32##_##type @@ -803,6 +824,9 @@ struct TCCState { #ifdef TCC_TARGET_ARM unsigned char float_abi; /* float ABI of the generated code*/ #endif +#ifdef TCC_TARGET_RISCV32 + unsigned char fpu; /* if true, emit inline F/D instructions (-mfpu) */ +#endif unsigned char has_text_addr; addr_t text_addr; /* address of text section */ @@ -937,7 +961,7 @@ struct TCCState { ElfW_Rel *qrel; #define qrel s1->qrel -#ifdef TCC_TARGET_RISCV64 +#if defined TCC_TARGET_RISCV64 || defined TCC_TARGET_RISCV32 struct pcrel_hi { addr_t addr, val; } last_hi; struct pcrel_hi *pcrel_hi_entries; int nb_pcrel_hi_entries; @@ -1725,6 +1749,13 @@ ST_FUNC void gen_cvt_sxtw(void); ST_FUNC void gen_increment_tcov (SValue *sv); #endif +/* ------------ riscv32-gen.c ------------ */ +#ifdef TCC_TARGET_RISCV32 +ST_FUNC void gen_va_start(void); +ST_FUNC void arch_transfer_ret_regs(int); +ST_FUNC void gen_increment_tcov (SValue *sv); +#endif + /* ------------ c67-gen.c ------------ */ #ifdef TCC_TARGET_C67 #endif diff --git a/tccasm.c b/tccasm.c index 523cbab0c..df806e104 100644 --- a/tccasm.c +++ b/tccasm.c @@ -958,7 +958,7 @@ static void asm_parse_directive(TCCState *s1, int global) next(); break; #endif -#ifdef TCC_TARGET_RISCV64 +#if defined TCC_TARGET_RISCV64 || defined TCC_TARGET_RISCV32 case TOK_ASMDIR_option: next(); switch(tok){ @@ -1100,7 +1100,7 @@ static void tcc_assemble_inline(TCCState *s1, const char *str, int len, int glob { const int *saved_macro_ptr = macro_ptr; int dotid = set_idnum('.', IS_ID); -#ifndef TCC_TARGET_RISCV64 +#if !defined TCC_TARGET_RISCV64 && !defined TCC_TARGET_RISCV32 int dolid = set_idnum('$', 0); #endif @@ -1110,7 +1110,7 @@ static void tcc_assemble_inline(TCCState *s1, const char *str, int len, int glob tcc_assemble_internal(s1, 0, global); tcc_close(); -#ifndef TCC_TARGET_RISCV64 +#if !defined TCC_TARGET_RISCV64 && !defined TCC_TARGET_RISCV32 set_idnum('$', dolid); #endif set_idnum('.', dotid); @@ -1176,7 +1176,7 @@ static void subst_asm_operands(ASMOperand *operands, int nb_operands, if (*str == 'c' || *str == 'n' || *str == 'b' || *str == 'w' || *str == 'h' || *str == 'k' || *str == 'q' || *str == 'l' || -#ifdef TCC_TARGET_RISCV64 +#if defined TCC_TARGET_RISCV64 || defined TCC_TARGET_RISCV32 *str == 'z' || #endif /* P in GCC would add "@PLT" to symbol refs in PIC mode, diff --git a/tccdbg.c b/tccdbg.c index 67e85643f..25c7d8518 100644 --- a/tccdbg.c +++ b/tccdbg.c @@ -860,7 +860,7 @@ ST_FUNC void tcc_eh_frame_start(TCCState *s1) dwarf_data1(eh_frame_section, DW_CFA_def_cfa); dwarf_uleb128(eh_frame_section, 31); // x31 (sp) dwarf_uleb128(eh_frame_section, 0); // ofs 0 -#elif defined TCC_TARGET_RISCV64 +#elif defined TCC_TARGET_RISCV64 || defined TCC_TARGET_RISCV32 eh_frame_section->data[s1->eh_start + 8] = 3; // version = 3 dwarf_uleb128(eh_frame_section, 1); // code_alignment_factor dwarf_sleb128(eh_frame_section, -4); // data_alignment_factor @@ -897,7 +897,7 @@ static void tcc_debug_frame_end(TCCState *s1, int size) dwarf_reloc(eh_frame_section, eh_section_sym, R_ARM_REL32); #elif defined TCC_TARGET_ARM64 dwarf_reloc(eh_frame_section, eh_section_sym, R_AARCH64_PREL32); -#elif defined TCC_TARGET_RISCV64 +#elif defined TCC_TARGET_RISCV64 || defined TCC_TARGET_RISCV32 dwarf_reloc(eh_frame_section, eh_section_sym, R_RISCV_32_PCREL); #endif dwarf_data4(eh_frame_section, func_ind); // PC Begin @@ -962,7 +962,7 @@ static void tcc_debug_frame_end(TCCState *s1, int size) dwarf_data1(eh_frame_section, DW_CFA_restore + 29); // x29 (fp) dwarf_data1(eh_frame_section, DW_CFA_def_cfa_offset); dwarf_uleb128(eh_frame_section, 0); -#elif defined TCC_TARGET_RISCV64 +#elif defined TCC_TARGET_RISCV64 || defined TCC_TARGET_RISCV32 dwarf_data1(eh_frame_section, DW_CFA_advance_loc + 4); dwarf_data1(eh_frame_section, DW_CFA_def_cfa_offset); dwarf_uleb128(eh_frame_section, 16); // ofs 16 @@ -2405,7 +2405,7 @@ ST_FUNC void tcc_debug_funcend(TCCState *s1, int size) dwarf_data1(dwarf_info_section, DW_OP_reg13); // sp #elif defined TCC_TARGET_ARM64 dwarf_data1(dwarf_info_section, DW_OP_reg29); // reg 29 -#elif defined TCC_TARGET_RISCV64 +#elif defined TCC_TARGET_RISCV64 || defined TCC_TARGET_RISCV32 dwarf_data1(dwarf_info_section, DW_OP_reg8); // r8(s0) #else dwarf_data1(dwarf_info_section, DW_OP_call_frame_cfa); @@ -2582,7 +2582,7 @@ ST_FUNC void tcc_tcov_block_begin(TCCState *s1) sv.sym = &label; #if defined TCC_TARGET_I386 || defined TCC_TARGET_X86_64 || \ defined TCC_TARGET_ARM || defined TCC_TARGET_ARM64 || \ - defined TCC_TARGET_RISCV64 + defined TCC_TARGET_RISCV64 || defined TCC_TARGET_RISCV32 gen_increment_tcov (&sv); #else vpushv(&sv); diff --git a/tccelf.c b/tccelf.c index b71c6f2b7..f5154fe6f 100644 --- a/tccelf.c +++ b/tccelf.c @@ -145,7 +145,7 @@ ST_FUNC void tccelf_delete(TCCState *s1) dynarray_reset(&s1->priv_sections, &s1->nb_priv_sections); tcc_free(s1->sym_attrs); -#ifdef TCC_TARGET_RISCV64 +#if defined TCC_TARGET_RISCV64 || defined TCC_TARGET_RISCV32 tcc_free(s1->pcrel_hi_entries); #endif symtab_section = NULL; /* for tccrun.c:rt_printline() */ @@ -1130,7 +1130,7 @@ static void relocate_section(TCCState *s1, Section *s, Section *sr) addr_t tgt, addr; int is_dwarf = s->sh_num >= s1->dwlo && s->sh_num < s1->dwhi; -#ifdef TCC_TARGET_RISCV64 +#if defined TCC_TARGET_RISCV64 || defined TCC_TARGET_RISCV32 s1->nb_pcrel_hi_entries = 0; #endif @@ -1212,7 +1212,8 @@ static int prepare_dynamic_rel(TCCState *s1, Section *sr) int count = 0; #if defined(TCC_TARGET_I386) || defined(TCC_TARGET_X86_64) || \ defined(TCC_TARGET_ARM) || defined(TCC_TARGET_ARM64) || \ - defined(TCC_TARGET_RISCV64) + defined(TCC_TARGET_RISCV64) || \ + defined(TCC_TARGET_RISCV32) ElfW_Rel *rel; for_each_elem(sr, 0, rel, ElfW_Rel) { int sym_index = ELFW(R_SYM)(rel->r_info); @@ -1239,6 +1240,8 @@ static int prepare_dynamic_rel(TCCState *s1, Section *sr) #elif defined(TCC_TARGET_RISCV64) case R_RISCV_32: case R_RISCV_64: +#elif defined(TCC_TARGET_RISCV32) + case R_RISCV_32: #endif count++; break; @@ -1875,7 +1878,7 @@ static void tcc_add_linker_symbols(TCCState *s1) #if TARGETOS_OpenBSD set_global_sym(s1, "__executable_start", NULL, ELF_START_ADDR); #endif -#ifdef TCC_TARGET_RISCV64 +#if defined TCC_TARGET_RISCV64 || defined TCC_TARGET_RISCV32 /* XXX should be .sdata+0x800, not .data+0x800 */ set_global_sym(s1, "__global_pointer$", data_section, 0x800); #endif @@ -2500,7 +2503,7 @@ static void fill_dynamic(TCCState *s1, struct dyn_inf *dyninf) put_dt(dynamic, DT_SYMTAB, s1->dynsym->sh_addr); put_dt(dynamic, DT_STRSZ, dyninf->dynstr->data_offset); put_dt(dynamic, DT_SYMENT, sizeof(ElfW(Sym))); -#if PTR_SIZE == 8 +#if SHT_RELX == SHT_RELA put_dt(dynamic, DT_RELA, dyninf->rel_addr); put_dt(dynamic, DT_RELASZ, dyninf->rel_size); put_dt(dynamic, DT_RELAENT, sizeof(ElfW_Rel)); @@ -2632,6 +2635,8 @@ static int tcc_output_elf(TCCState *s1, FILE *f, int phnum, ElfW(Phdr) *phdr) #elif defined TCC_TARGET_RISCV64 /* XXX should be configurable */ ehdr.e_flags = EF_RISCV_FLOAT_ABI_DOUBLE; +#elif defined TCC_TARGET_RISCV32 + ehdr.e_flags = EF_RISCV_FLOAT_ABI_SOFT; #endif if (file_type == TCC_OUTPUT_OBJ) { @@ -3345,7 +3350,7 @@ ST_FUNC int tcc_load_object_file(TCCState *s1, ptr = s->data + offset; full_read(fd, ptr, size); } -#if defined TCC_TARGET_ARM || defined TCC_TARGET_ARM64 || defined TCC_TARGET_RISCV64 +#if defined TCC_TARGET_ARM || defined TCC_TARGET_ARM64 || defined TCC_TARGET_RISCV64 || defined TCC_TARGET_RISCV32 /* align code sections to instruction lenght */ /* This is needed if we compile a c file after this */ if (s->sh_flags & SHF_EXECINSTR) @@ -3452,7 +3457,7 @@ ST_FUNC int tcc_load_object_file(TCCState *s1, if (!sym_index && !sm_table[sh->sh_info].link_once #ifdef TCC_TARGET_ARM && type != R_ARM_V4BX -#elif defined TCC_TARGET_RISCV64 +#elif defined TCC_TARGET_RISCV64 || defined TCC_TARGET_RISCV32 && type != R_RISCV_ALIGN && type != R_RISCV_RELAX #endif diff --git a/tccgen.c b/tccgen.c index 50802edf1..99455a7b0 100644 --- a/tccgen.c +++ b/tccgen.c @@ -236,7 +236,7 @@ static int R_RET(int t) #ifdef TCC_TARGET_X86_64 if ((t & VT_BTYPE) == VT_LDOUBLE) return TREG_ST0; -#elif defined TCC_TARGET_RISCV64 +#elif defined TCC_TARGET_RISCV64 || defined TCC_TARGET_RISCV32 if ((t & VT_BTYPE) == VT_LDOUBLE) return REG_IRET; #endif @@ -250,12 +250,17 @@ static int R2_RET(int t) #if PTR_SIZE == 4 if (t == VT_LLONG) return REG_IRE2; +#ifdef TCC_TARGET_RISCV32 + /* soft-float: double is 8 bytes, needs register pair on RV32 */ + if (t == VT_DOUBLE) + return REG_IRE2; +#endif #elif defined TCC_TARGET_X86_64 if (t == VT_QLONG) return REG_IRE2; if (t == VT_QFLOAT) return REG_FRE2; -#elif defined TCC_TARGET_RISCV64 +#elif defined TCC_TARGET_RISCV64 || defined TCC_TARGET_RISCV32 if (t == VT_LDOUBLE) return REG_IRE2; #endif @@ -287,7 +292,7 @@ static int RC_TYPE(int t) return RC_ST0; if ((t & VT_BTYPE) == VT_QFLOAT) return RC_FRET; -#elif defined TCC_TARGET_RISCV64 +#elif defined TCC_TARGET_RISCV64 || defined TCC_TARGET_RISCV32 if ((t & VT_BTYPE) == VT_LDOUBLE) return RC_INT; #endif @@ -1907,7 +1912,7 @@ ST_FUNC int gv(int rc) bt = vtop->type.t & VT_BTYPE; -#ifdef TCC_TARGET_RISCV64 +#if defined TCC_TARGET_RISCV64 || defined TCC_TARGET_RISCV32 /* XXX mega hack */ if (bt == VT_LDOUBLE && rc == RC_FLOAT) rc = RC_INT; @@ -2285,6 +2290,66 @@ static void gen_opl(int op) This is not needed when comparing switch cases */ save_regs(4); } +#if defined(TCC_TARGET_RISCV32) + /* RISC-V has no flags register, so the "re-test NE on same + comparison" trick used for flag-based architectures doesn't + work. Force both high words into registers so the comparison + is always register-register (not slti), then save the hardware + register numbers for the NE re-test. Branch instructions + only read registers, so they're still live after gvtst. */ + { + unsigned short saved_cmp_r; + + /* compare high */ + op1 = op; + if (op1 == TOK_LT) + op1 = TOK_LE; + else if (op1 == TOK_GT) + op1 = TOK_GE; + else if (op1 == TOK_ULT) + op1 = TOK_ULE; + else if (op1 == TOK_UGT) + op1 = TOK_UGE; + a = 0; + b = 0; + /* Force both operands into registers so gen_op uses + register-register comparison (not slti with immediate). + This ensures cmp_r encodes a real register pair that + can be reused for the NE test below. */ + gv2(RC_INT, RC_INT); + gen_op(op1); + /* Save the register pair from the comparison. Since we + forced both operands into registers above, cmp_r always + encodes two real registers (not a reg-vs-zero from slti). */ + saved_cmp_r = vtop->cmp_r; + if (op == TOK_NE) { + b = gvtst(0, 0); + } else { + a = gvtst(1, 0); + if (op != TOK_EQ) { + /* generate non equal test using saved register pair */ + vpushi(0); + vset_VT_CMP(TOK_NE); + vtop->cmp_r = saved_cmp_r; + b = gvtst(0, 0); + } + } + /* compare low. Always unsigned */ + op1 = op; + if (op1 == TOK_LT) + op1 = TOK_ULT; + else if (op1 == TOK_LE) + op1 = TOK_ULE; + else if (op1 == TOK_GT) + op1 = TOK_UGT; + else if (op1 == TOK_GE) + op1 = TOK_UGE; + gen_op(op1); + gvtst_set(1, a); + gvtst_set(0, b); + } + break; +#else /* compare high */ op1 = op; /* when values are equal, we need to compare low words. since @@ -2329,6 +2394,7 @@ static void gen_opl(int op) gvtst_set(1, a); gvtst_set(0, b); break; +#endif } } #endif @@ -3164,6 +3230,14 @@ ST_FUNC void gen_op(int op) vtop->type.t = VT_INT; } else { vtop->type.t = t; +#ifdef TCC_USING_DOUBLE_FOR_LDOUBLE + /* Preserve VT_LONG if either operand was originally + long double (VT_DOUBLE|VT_LONG), so varargs passing + can detect it later for ABI conversion */ + if ((t & VT_BTYPE) == VT_DOUBLE + && ((t1 | t2) & VT_LONG)) + vtop->type.t |= VT_LONG; +#endif } } // Make sure that we have converted to an rvalue: @@ -3171,7 +3245,7 @@ ST_FUNC void gen_op(int op) gv(is_float(vtop->type.t & VT_BTYPE) ? RC_FLOAT : RC_INT); } -#if defined TCC_TARGET_ARM64 || defined TCC_TARGET_RISCV64 || defined TCC_TARGET_ARM +#if defined TCC_TARGET_ARM64 || defined TCC_TARGET_RISCV64 || defined TCC_TARGET_RISCV32 || defined TCC_TARGET_ARM #define gen_cvt_itof1 gen_cvt_itof #else /* generic itof for unsigned long long case */ @@ -3198,7 +3272,7 @@ static void gen_cvt_itof1(int t) } #endif -#if defined TCC_TARGET_ARM64 || defined TCC_TARGET_RISCV64 +#if defined TCC_TARGET_ARM64 || defined TCC_TARGET_RISCV64 || defined TCC_TARGET_RISCV32 #define gen_cvt_ftoi1 gen_cvt_ftoi #else /* generic ftoi for unsigned long long case */ @@ -5863,7 +5937,7 @@ ST_FUNC void unary(void) mk_pointer(&type); vset(&type, VT_LOCAL, 0); /* local frame */ while (level--) { -#ifdef TCC_TARGET_RISCV64 +#if defined TCC_TARGET_RISCV64 || defined TCC_TARGET_RISCV32 vpushi(2*PTR_SIZE); gen_op('-'); #endif @@ -5875,7 +5949,7 @@ ST_FUNC void unary(void) #ifdef TCC_TARGET_ARM vpushi(2*PTR_SIZE); gen_op('+'); -#elif defined TCC_TARGET_RISCV64 +#elif defined TCC_TARGET_RISCV64 || defined TCC_TARGET_RISCV32 vpushi(PTR_SIZE); gen_op('-'); #else @@ -5887,7 +5961,7 @@ ST_FUNC void unary(void) } } break; -#ifdef TCC_TARGET_RISCV64 +#if defined TCC_TARGET_RISCV64 || defined TCC_TARGET_RISCV32 case TOK_builtin_va_start: parse_builtin_params(0, "ee"); r = vtop->r & VT_VALMASK; @@ -6288,7 +6362,7 @@ ST_FUNC void unary(void) if (ret_nregs < 0) { vsetc(&ret.type, ret.r, &ret.c); -#ifdef TCC_TARGET_RISCV64 +#if defined TCC_TARGET_RISCV64 || defined TCC_TARGET_RISCV32 arch_transfer_ret_regs(1); #endif } else { @@ -6785,7 +6859,7 @@ static void gfunc_return(CType *func_type) ret_nregs = gfunc_sret(func_type, func_var, &ret_type, &ret_align, ®size); if (ret_nregs < 0) { -#ifdef TCC_TARGET_RISCV64 +#if defined TCC_TARGET_RISCV64 || defined TCC_TARGET_RISCV32 arch_transfer_ret_regs(0); #endif } else if (0 == ret_nregs) { diff --git a/tcctok.h b/tcctok.h index b7cc9d409..0c981aefa 100644 --- a/tcctok.h +++ b/tcctok.h @@ -179,7 +179,7 @@ #elif defined TCC_TARGET_ARM64 DEF(TOK_builtin_va_start, "__builtin_va_start") DEF(TOK_builtin_va_arg, "__builtin_va_arg") -#elif defined TCC_TARGET_RISCV64 +#elif defined TCC_TARGET_RISCV64 || defined TCC_TARGET_RISCV32 DEF(TOK_builtin_va_start, "__builtin_va_start") #endif @@ -206,7 +206,7 @@ DEF(TOK_pack, "pack") #if !defined(TCC_TARGET_I386) && !defined(TCC_TARGET_X86_64) && \ !defined(TCC_TARGET_ARM) && !defined(TCC_TARGET_ARM64) && \ - !defined(TCC_TARGET_RISCV64) + !defined(TCC_TARGET_RISCV64) && !defined(TCC_TARGET_RISCV32) /* already defined for assembler */ DEF(TOK_ASM_push, "push") DEF(TOK_ASM_pop, "pop") @@ -306,8 +306,53 @@ #if defined TCC_TARGET_PE DEF(TOK___chkstk, "__chkstk") #endif -#if defined TCC_TARGET_ARM64 || defined TCC_TARGET_RISCV64 +#if defined TCC_TARGET_ARM64 || defined TCC_TARGET_RISCV64 || defined TCC_TARGET_RISCV32 DEF(TOK___arm64_clear_cache, "__arm64_clear_cache") +#endif +#if defined TCC_TARGET_RISCV32 + /* soft-float single-precision libcalls */ + DEF(TOK___addsf3, "__addsf3") + DEF(TOK___subsf3, "__subsf3") + DEF(TOK___mulsf3, "__mulsf3") + DEF(TOK___divsf3, "__divsf3") + DEF(TOK___eqsf2, "__eqsf2") + DEF(TOK___nesf2, "__nesf2") + DEF(TOK___ltsf2, "__ltsf2") + DEF(TOK___lesf2, "__lesf2") + DEF(TOK___gtsf2, "__gtsf2") + DEF(TOK___gesf2, "__gesf2") + /* soft-float double-precision libcalls */ + DEF(TOK___adddf3, "__adddf3") + DEF(TOK___subdf3, "__subdf3") + DEF(TOK___muldf3, "__muldf3") + DEF(TOK___divdf3, "__divdf3") + DEF(TOK___eqdf2, "__eqdf2") + DEF(TOK___nedf2, "__nedf2") + DEF(TOK___ltdf2, "__ltdf2") + DEF(TOK___ledf2, "__ledf2") + DEF(TOK___gtdf2, "__gtdf2") + DEF(TOK___gedf2, "__gedf2") + /* soft-float conversion libcalls */ + DEF(TOK___extendsfdf2, "__extendsfdf2") + DEF(TOK___truncdfsf2, "__truncdfsf2") + DEF(TOK___fixsfsi, "__fixsfsi") + DEF(TOK___fixdfsi, "__fixdfsi") + DEF(TOK___fixunssfsi, "__fixunssfsi") + DEF(TOK___fixunsdfsi, "__fixunsdfsi") + DEF(TOK___fixsfdi, "__fixsfdi") + DEF(TOK___fixdfdi, "__fixdfdi") + /* TOK___fixunssfdi, TOK___fixunsdfdi already in #ifndef TCC_ARM_EABI block */ + DEF(TOK___floatsisf, "__floatsisf") + DEF(TOK___floatsidf, "__floatsidf") + DEF(TOK___floatunsisf, "__floatunsisf") + DEF(TOK___floatunsidf, "__floatunsidf") + DEF(TOK___floatdisf, "__floatdisf") + DEF(TOK___floatdidf, "__floatdidf") + /* TOK___floatundisf, TOK___floatundidf already in #ifndef TCC_ARM_EABI block */ + DEF(TOK___negsf2, "__negsf2") + DEF(TOK___negdf2, "__negdf2") +#endif +#if defined TCC_TARGET_ARM64 || defined TCC_TARGET_RISCV64 || defined TCC_TARGET_RISCV32 DEF(TOK___addtf3, "__addtf3") DEF(TOK___subtf3, "__subtf3") DEF(TOK___multf3, "__multf3") @@ -407,7 +452,7 @@ DEF_ASMDIR(code32) #elif defined(TCC_TARGET_X86_64) DEF_ASMDIR(code64) -#elif defined(TCC_TARGET_RISCV64) +#elif defined(TCC_TARGET_RISCV64) || defined(TCC_TARGET_RISCV32) DEF_ASMDIR(option) #endif DEF_ASMDIR(short) @@ -428,3 +473,7 @@ #if defined TCC_TARGET_RISCV64 #include "riscv64-tok.h" #endif + +#if defined TCC_TARGET_RISCV32 +#include "riscv32-tok.h" +#endif diff --git a/tests/run-rv32-tests.sh b/tests/run-rv32-tests.sh new file mode 100755 index 000000000..4c280f942 --- /dev/null +++ b/tests/run-rv32-tests.sh @@ -0,0 +1,281 @@ +#!/bin/bash +# run-rv32-tests.sh — Run TCC tests2 and pp suites for riscv32 via qemu-user +# +# Usage: cd ~/tinycc && bash tests/run-rv32-tests.sh [test-number...] +# With no args, runs all tests. With args, runs only those numbered tests. +# Example: bash tests/run-rv32-tests.sh 22 31 46 + +set -u + +# ── Paths ────────────────────────────────────────────────────────────────── +TCC_BUILD="$HOME/sonata-linux/buildroot/output/build/tcc-riscv32" +SYSROOT="$HOME/sonata-linux/buildroot/output/host/riscv32-buildroot-linux-gnu/sysroot" +TESTS2_DIR="$(cd "$(dirname "$0")/tests2" && pwd)" +PP_DIR="$(cd "$(dirname "$0")/pp" && pwd)" + +TCC="$TCC_BUILD/tcc" +TCC_FLAGS="-B $TCC_BUILD -I $SYSROOT/usr/include -L $SYSROOT/usr/lib" + +export QEMU_LD_PREFIX="$SYSROOT" + +TMPDIR=$(mktemp -d /tmp/tcc-rv32-test.XXXXXX) +trap 'rm -rf "$TMPDIR"' EXIT + +# ── Skip lists ───────────────────────────────────────────────────────────── +# x86 asm tests +SKIP_X86="85 98 99 127" +# Bound-checking tests (no bcheck support on riscv32) +SKIP_BCHECK="112 113 114 115 116 117 126 132" +# Non-standard C +SKIP_NONSTD="34" +# 32-bit bitfield alignment (same skip as i386/arm in Makefile) +SKIP_32BIT="95 95_bitfields_ms" +# -dt mode tests (require -run which is not available on riscv32) +SKIP_DT="60 96 125 128" +# Struct return + cleanup attribute interaction (first field corrupted by hidden return pointer) +SKIP_CLEANUP="101" +# ARM64-specific +SKIP_ARM64="73" + +SKIP_SET=" $SKIP_X86 $SKIP_BCHECK $SKIP_NONSTD $SKIP_32BIT $SKIP_DT $SKIP_CLEANUP $SKIP_ARM64 " + +is_skipped() { + local num="$1" name="$2" + [[ "$SKIP_SET" == *" $num "* ]] && return 0 + [[ "$name" == "95_bitfields_ms" ]] && return 0 + return 1 +} + +# ── Per-test flags and args ──────────────────────────────────────────────── +get_flags() { + local name="$1" + case "$name" in + 22_floating_point|24_math_library) echo "-lm" ;; + 76_dollars_in_identifiers) echo "-fdollars-in-identifiers" ;; + 60_errors_and_warnings|96_nodata_wanted|125_atomic_misc|128_run_atexit) + echo "-dt" ;; + 106_versym) echo "-pthread" ;; + 124_atomic_counter) echo "-pthread -latomic" ;; + 136_atomic_gcc_style) echo "-latomic" ;; + *) echo "" ;; + esac +} + +get_args() { + local name="$1" + case "$name" in + 31_args) echo "arg1 arg2 arg3 arg4 arg5" ;; + 46_grep) echo "'[^* ]*[:a:d: ]+\:\*-/: \$\$' $TESTS2_DIR/46_grep.c" ;; + *) echo "" ;; + esac +} + +# Tests that must be compiled to exe (not -run) +needs_norun() { + local name="$1" + case "$name" in + 42_function_pointer|106_versym|108_constructor|120_alias|126_bound_global) + return 0 ;; + *) return 1 ;; + esac +} + +# Tests with extra source files +get_extra_sources() { + local name="$1" + case "$name" in + 104_inline) echo "$TESTS2_DIR/104+_inline.c" ;; + 120_alias) echo "$TESTS2_DIR/120+_alias.c" ;; + *) echo "" ;; + esac +} + +# Tests needing address scrubbing in output +needs_addr_scrub() { + local name="$1" + case "$name" in + 112_backtrace|113_btdll|126_bound_global) return 0 ;; + *) return 1 ;; + esac +} + +# ── Color output ─────────────────────────────────────────────────────────── +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[0;33m' +NC='\033[0m' + +# ── Run a single tests2 test ────────────────────────────────────────────── +run_test2() { + local src="$1" + local name=$(basename "$src" .c) + local num="${name%%_*}" + local expect="$TESTS2_DIR/$name.expect" + local output="$TMPDIR/$name.output" + local exe="$TMPDIR/$name.exe" + local flags=$(get_flags "$name") + local extra=$(get_extra_sources "$name") + + if is_skipped "$num" "$name"; then + echo -e " ${YELLOW}SKIP${NC} $name" + return 2 + fi + + if [[ ! -f "$expect" ]]; then + echo -e " ${YELLOW}SKIP${NC} $name (no .expect)" + return 2 + fi + + local rc=0 + + if [[ "$flags" == *"-dt"* ]]; then + # -dt mode: TCC runs snippets internally + $TCC $TCC_FLAGS $flags "$src" $extra 2>&1 \ + | sed -e "s|$TESTS2_DIR/||g" > "$output" || true + elif needs_norun "$name"; then + # Compile to exe, then run + $TCC $TCC_FLAGS $flags -o "$exe" "$src" $extra 2>&1 && { + local args + args=$(get_args "$name") + eval "$exe" $args 2>&1 + } + rc=$? + { if [[ $rc -ne 0 ]] && [[ -s "$output" ]]; then true; fi; } 2>/dev/null + # Capture output + { + $TCC $TCC_FLAGS $flags -o "$exe" "$src" $extra 2>&1 + eval "$exe" $(get_args "$name") 2>&1 + } | sed -e "s|$TESTS2_DIR/||g" > "$output" || true + else + # Default: compile to exe and run (since -run is broken) + local args + args=$(get_args "$name") + { + $TCC $TCC_FLAGS $flags -o "$exe" "$src" $extra 2>&1 && \ + eval "$exe" $args 2>&1 + } | sed -e "s|$TESTS2_DIR/||g" > "$output" || true + fi + + # For -dt tests, output was already captured above + if [[ "$flags" != *"-dt"* ]] && ! needs_norun "$name"; then + # Already captured above in the default path + true + fi + + # Address scrubbing for backtrace tests + if needs_addr_scrub "$name"; then + sed -i -e 's/[0-9A-Fa-fx]\{5,\}/......../g' \ + -e 's/0x[0-9A-Fa-f]\{1,\}/0x?/g' "$output" + fi + + # Compare + if diff -Nbu "$expect" "$output" > "$TMPDIR/$name.diff" 2>&1; then + echo -e " ${GREEN}PASS${NC} $name" + rm -f "$output" "$TMPDIR/$name.diff" + return 0 + else + echo -e " ${RED}FAIL${NC} $name" + # Show first 20 lines of diff + head -30 "$TMPDIR/$name.diff" | sed 's/^/ /' + return 1 + fi +} + +# ── Run a single pp test ────────────────────────────────────────────────── +run_pp_test() { + local src="$1" + local base=$(basename "$src") + local name="${base%.*}" + local expect="$PP_DIR/$name.expect" + local output="$TMPDIR/pp_$name.output" + + if [[ ! -f "$expect" ]]; then + echo -e " ${YELLOW}SKIP${NC} pp/$name (no .expect)" + return 2 + fi + + $TCC $TCC_FLAGS -E -P "$src" 2>&1 \ + | sed -e "s|$PP_DIR/||g" > "$output" || true + + local diff_opts="-Nbu" + # Test 02 needs -w (ignore all whitespace) + [[ "$name" == "02" ]] && diff_opts="-Nbuw" + + if diff $diff_opts "$expect" "$output" > "$TMPDIR/pp_$name.diff" 2>&1; then + echo -e " ${GREEN}PASS${NC} pp/$name" + rm -f "$output" "$TMPDIR/pp_$name.diff" + return 0 + else + echo -e " ${RED}FAIL${NC} pp/$name" + head -20 "$TMPDIR/pp_$name.diff" | sed 's/^/ /' + return 1 + fi +} + +# ── Main ─────────────────────────────────────────────────────────────────── +echo "=== TCC riscv32 Test Suite ===" +echo "TCC: $TCC" +echo "Sysroot: $SYSROOT" +echo "Temp: $TMPDIR" +echo "" + +# Verify TCC works +if ! $TCC $TCC_FLAGS -E -P - <<< "" > /dev/null 2>&1; then + echo "ERROR: TCC cannot run. Check QEMU_LD_PREFIX and paths." + exit 1 +fi + +pass=0 fail=0 skip=0 + +# Filter tests if args given +filter_nums=("$@") + +# ── tests2 ── +echo "── tests2 ──────────────────────────────────────────────" +for src in "$TESTS2_DIR"/[0-9]*_*.c; do + name=$(basename "$src" .c) + # Skip the "+" companion files (104+_inline, 120+_alias) + [[ "$name" == *+* ]] && continue + num="${name%%_*}" + + # If filter specified, only run matching tests + if [[ ${#filter_nums[@]} -gt 0 ]]; then + match=0 + for f in "${filter_nums[@]}"; do + [[ "$num" == "$f" ]] && match=1 && break + done + [[ $match -eq 0 ]] && continue + fi + + run_test2 "$src" + rc=$? + case $rc in + 0) ((pass++)) ;; + 1) ((fail++)) ;; + 2) ((skip++)) ;; + esac +done + +# ── pp ── +if [[ ${#filter_nums[@]} -eq 0 ]]; then + echo "" + echo "── pp ──────────────────────────────────────────────────" + for src in "$PP_DIR"/[0-9]*.[cS] "$PP_DIR"/pp-*.c; do + [[ -f "$src" ]] || continue + run_pp_test "$src" + rc=$? + case $rc in + 0) ((pass++)) ;; + 1) ((fail++)) ;; + 2) ((skip++)) ;; + esac + done +fi + +# ── Summary ── +echo "" +echo "════════════════════════════════════════════════════════" +echo -e " ${GREEN}PASS: $pass${NC} ${RED}FAIL: $fail${NC} ${YELLOW}SKIP: $skip${NC} TOTAL: $((pass+fail+skip))" +echo "════════════════════════════════════════════════════════" + +[[ $fail -eq 0 ]] && exit 0 || exit 1