diff --git a/.gitignore b/.gitignore index d2216ddb4..349eaddcf 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,7 @@ .tmp/ _deps/ build/ +build_qemu/ Debug/ CMakeFiles/ CMakeScripts/ diff --git a/CMakeLists.txt b/CMakeLists.txt index 9d348e684..c51e82a07 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -69,4 +69,49 @@ set(CPACK_SOURCE_IGNORE_FILES ".*~$" ) set(CPACK_VERBATIM_VARIABLES YES) -include(CPack) \ No newline at end of file +include(CPack) + +# Enable the QEMU Virt demo application (Target: kernel.elf) +set(QEMU_DEMO_DIR ${CMAKE_CURRENT_LIST_DIR}/ports/risc-v32/gnu/example_build/qemu_virt) + +add_executable(kernel.elf + ${QEMU_DEMO_DIR}/demo_threadx.c + ${QEMU_DEMO_DIR}/entry.s + ${QEMU_DEMO_DIR}/uart.c + ${QEMU_DEMO_DIR}/plic.c + ${QEMU_DEMO_DIR}/hwtimer.c + ${QEMU_DEMO_DIR}/trap.c + ${QEMU_DEMO_DIR}/board.c + ${QEMU_DEMO_DIR}/tx_initialize_low_level.S +) + +target_link_libraries(kernel.elf PRIVATE threadx) + +target_include_directories(kernel.elf PRIVATE + ${CMAKE_CURRENT_LIST_DIR}/common/inc + ${CMAKE_CURRENT_LIST_DIR}/ports/${THREADX_ARCH}/${THREADX_TOOLCHAIN}/inc + ${QEMU_DEMO_DIR} +) + +# Linker options +target_link_options(kernel.elf PRIVATE + -T${QEMU_DEMO_DIR}/link.lds + -nostartfiles + -Wl,-Map=kernel.map +) + +# QEMU Test Runner Target +find_package(Python3 COMPONENTS Interpreter) +if(Python3_FOUND) + add_custom_target(check-functional-riscv32 + COMMAND ${Python3_EXECUTABLE} ${CMAKE_CURRENT_LIST_DIR}/test/ports/azrtos_test_tx_gnu_riscv32_qemu.py + --elf $ + --qemu qemu-system-riscv32 + --gdb gdb + DEPENDS kernel.elf + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + COMMENT "Running QEMU/GDB Test Runner..." + ) +else() + message(WARNING "Python3 not found, run_qemu_test target will not be available.") +endif() \ No newline at end of file diff --git a/cmake/riscv32_gnu.cmake b/cmake/riscv32_gnu.cmake index 617b12760..536af0c6f 100644 --- a/cmake/riscv32_gnu.cmake +++ b/cmake/riscv32_gnu.cmake @@ -4,7 +4,7 @@ set(CMAKE_SYSTEM_PROCESSOR risc-v32) set(THREADX_ARCH "risc-v32") set(THREADX_TOOLCHAIN "gnu") -set(ARCH_FLAGS "-g -march=rv32gc -mabi=ilp32d -mcmodel=medany") +set(ARCH_FLAGS "-g -march=rv32gc -mabi=ilp32d -mcmodel=medany -mrelax") set(CFLAGS "${ARCH_FLAGS}") set(ASFLAGS "${ARCH_FLAGS}") set(LDFLAGS "${ARCH_FLAGS}") diff --git a/ports/risc-v32/gnu/example_build/qemu_virt/demo_threadx.c b/ports/risc-v32/gnu/example_build/qemu_virt/demo_threadx.c index f21dbb26b..69fa1363d 100644 --- a/ports/risc-v32/gnu/example_build/qemu_virt/demo_threadx.c +++ b/ports/risc-v32/gnu/example_build/qemu_virt/demo_threadx.c @@ -112,7 +112,7 @@ void tx_application_define(void *first_unused_memory) tx_thread_create(&thread_2, "thread 2", thread_2_entry, 2, pointer, DEMO_STACK_SIZE, - 16, 16, 4, TX_AUTO_START); + 10, 10, 4, TX_AUTO_START); /* Allocate the stack for thread 3. */ tx_byte_allocate(&byte_pool_0, (VOID **) &pointer, DEMO_STACK_SIZE, TX_NO_WAIT); @@ -201,7 +201,7 @@ UINT status; thread_0_counter++; /* Sleep for 10 ticks. */ - tx_thread_sleep(10); + tx_thread_sleep(1); /* Set event flag 0 to wakeup thread 5. */ status = tx_event_flags_set(&event_flags_0, 0x1, TX_OR); @@ -337,6 +337,7 @@ ULONG actual_flags; } } +float fpu_test_val = 0.0f; void thread_6_and_7_entry(ULONG thread_input) { @@ -363,6 +364,9 @@ UINT status; if (status != TX_SUCCESS) break; + /* FPU Test*/ + fpu_test_val += 1.1f; + /* Get the mutex again with suspension. This shows that an owning thread may retrieve the mutex it owns multiple times. */ diff --git a/ports/risc-v32/gnu/example_build/qemu_virt/entry.s b/ports/risc-v32/gnu/example_build/qemu_virt/entry.s index 9b202ca16..202132d8d 100644 --- a/ports/risc-v32/gnu/example_build/qemu_virt/entry.s +++ b/ports/risc-v32/gnu/example_build/qemu_virt/entry.s @@ -1,5 +1,5 @@ -.section .text +.section .init .align 4 .global _start .extern main @@ -11,7 +11,10 @@ _start: bne t0, zero, 1f li x1, 0 li x2, 0 - li x3, 0 +.option push +.option norelax + la gp, __global_pointer$ +.option pop li x4, 0 li x5, 0 li x6, 0 diff --git a/ports/risc-v32/gnu/example_build/qemu_virt/link.lds b/ports/risc-v32/gnu/example_build/qemu_virt/link.lds index 522f90d96..76ad16b11 100644 --- a/ports/risc-v32/gnu/example_build/qemu_virt/link.lds +++ b/ports/risc-v32/gnu/example_build/qemu_virt/link.lds @@ -8,6 +8,10 @@ SECTIONS * where qemu's -kernel jumps. */ . = 0x80000000; + + .init : { + KEEP (*(.init)) + } .text : { *(.text .text.*) @@ -24,6 +28,7 @@ SECTIONS .data : { . = ALIGN(16); + PROVIDE( __global_pointer$ = . + 0x800 ); *(.sdata .sdata.*) /* do not need to distinguish this from .data */ . = ALIGN(16); *(.data .data.*) diff --git a/ports/risc-v32/gnu/example_build/qemu_virt/tx_initialize_low_level.S b/ports/risc-v32/gnu/example_build/qemu_virt/tx_initialize_low_level.S index 9a7a74ffd..33f0b01fe 100644 --- a/ports/risc-v32/gnu/example_build/qemu_virt/tx_initialize_low_level.S +++ b/ports/risc-v32/gnu/example_build/qemu_virt/tx_initialize_low_level.S @@ -9,6 +9,7 @@ **************************************************************************/ #include "csr.h" +#include "tx_port.h" .section .text .align 4 @@ -21,6 +22,7 @@ /* AUTHOR */ /* */ /* Akif Ejaz, 10xEngineers */ +/* Wei-Chen Lai, National Cheng Kung University */ /* */ /* DESCRIPTION */ /* */ @@ -60,13 +62,13 @@ .extern trap_handler .extern _tx_thread_context_restore trap_entry: -#if defined(__riscv_float_abi_single) || defined(__riscv_float_abi_double) - addi sp, sp, -260 // Allocate space for all registers - with floating point enabled (65*4) +#if defined(__riscv_flen) && ((__riscv_flen == 32)||(__riscv_flen == 64)) + addi sp, sp, -65*REGBYTES // Allocate space for all registers - with floating point enabled #else - addi sp, sp, -128 // Allocate space for all registers - without floating point enabled (32*4) + addi sp, sp, -32*REGBYTES // Allocate space for all registers - without floating point enabled #endif - sw x1, 112(sp) // Store RA (28*4 = 112, because call will override ra [ra is a callee register in riscv]) + STORE x1, 28*REGBYTES(sp) // Store RA, 28*REGBYTES(because call will override ra [ra is a calle register in riscv]) call _tx_thread_context_save @@ -133,6 +135,11 @@ _err: .extern board_init _tx_initialize_low_level: +/* debug print + .section .rodata +debug_str_init: + .string "DEBUG : threadx/ports/risc-v32/gnu/example_build/qemu_virt/tx_initialize_low_level.S, _tx_initialize_low_level\n" +*/ .section .text la t0, _tx_thread_system_stack_ptr @@ -155,6 +162,10 @@ _tx_initialize_low_level: addi sp, sp, -4 sw ra, 0(sp) call board_init +/* debug print + la a0, debug_str_init + call uart_puts +*/ lw ra, 0(sp) addi sp, sp, 4 la t0, trap_entry diff --git a/ports/risc-v32/gnu/inc/tx_port.h b/ports/risc-v32/gnu/inc/tx_port.h index ed0146751..7034ccc63 100644 --- a/ports/risc-v32/gnu/inc/tx_port.h +++ b/ports/risc-v32/gnu/inc/tx_port.h @@ -30,6 +30,7 @@ /* AUTHOR */ /* */ /* Akif Ejaz, 10xEngineers */ +/* Wei-Chen Lai, National Cheng Kung University */ /* */ /* DESCRIPTION */ /* */ @@ -47,7 +48,121 @@ #ifndef TX_PORT_H #define TX_PORT_H -#ifndef __ASSEMBLER__ +#ifdef __ASSEMBLER__ + + +#if __riscv_xlen == 64 +# define SLL32 sllw +# define STORE sd +# define LOAD ld +# define LWU lwu +# define LOG_REGBYTES 3 +#else +# define SLL32 sll +# define STORE sw +# define LOAD lw +# define LWU lw +# define LOG_REGBYTES 2 +#endif +#define REGBYTES (1 << LOG_REGBYTES) + +/* Define stack frame offsets for thread context save/restore. + These offsets correspond to the layout used in tx_thread_context_save.S + and tx_thread_context_restore.S. */ + +/* General Purpose Registers */ +#define TX_STACK_OFFSET_X1 (28 * REGBYTES) /* ra */ +#define TX_STACK_OFFSET_X5 (19 * REGBYTES) /* t0 */ +#define TX_STACK_OFFSET_X6 (18 * REGBYTES) /* t1 */ +#define TX_STACK_OFFSET_X7 (17 * REGBYTES) /* t2 */ +#define TX_STACK_OFFSET_X8 (12 * REGBYTES) /* s0/fp */ +#define TX_STACK_OFFSET_X9 (11 * REGBYTES) /* s1 */ +#define TX_STACK_OFFSET_X10 (27 * REGBYTES) /* a0 */ +#define TX_STACK_OFFSET_X11 (26 * REGBYTES) /* a1 */ +#define TX_STACK_OFFSET_X12 (25 * REGBYTES) /* a2 */ +#define TX_STACK_OFFSET_X13 (24 * REGBYTES) /* a3 */ +#define TX_STACK_OFFSET_X14 (23 * REGBYTES) /* a4 */ +#define TX_STACK_OFFSET_X15 (22 * REGBYTES) /* a5 */ +#define TX_STACK_OFFSET_X16 (21 * REGBYTES) /* a6 */ +#define TX_STACK_OFFSET_X17 (20 * REGBYTES) /* a7 */ +#define TX_STACK_OFFSET_X18 (10 * REGBYTES) /* s2 */ +#define TX_STACK_OFFSET_X19 (9 * REGBYTES) /* s3 */ +#define TX_STACK_OFFSET_X20 (8 * REGBYTES) /* s4 */ +#define TX_STACK_OFFSET_X21 (7 * REGBYTES) /* s5 */ +#define TX_STACK_OFFSET_X22 (6 * REGBYTES) /* s6 */ +#define TX_STACK_OFFSET_X23 (5 * REGBYTES) /* s7 */ +#define TX_STACK_OFFSET_X24 (4 * REGBYTES) /* s8 */ +#define TX_STACK_OFFSET_X25 (3 * REGBYTES) /* s9 */ +#define TX_STACK_OFFSET_X26 (2 * REGBYTES) /* s10 */ +#define TX_STACK_OFFSET_X27 (1 * REGBYTES) /* s11 */ +#define TX_STACK_OFFSET_X28 (16 * REGBYTES) /* t3 */ +#define TX_STACK_OFFSET_X29 (15 * REGBYTES) /* t4 */ +#define TX_STACK_OFFSET_X30 (14 * REGBYTES) /* t5 */ +#define TX_STACK_OFFSET_X31 (13 * REGBYTES) /* t6 */ + +/* Special Registers */ +#define TX_STACK_OFFSET_MSTATUS (29 * REGBYTES) +#define TX_STACK_OFFSET_MEPC (30 * REGBYTES) +#define TX_STACK_OFFSET_FCSR (63 * REGBYTES) + +/* Stack Frame Offsets */ +#define TX_STACK_OFFSET_TYPE (0 * REGBYTES) + +/* Floating Point Registers (F0-F31) */ +/* Note: Base offset for FPU regs is 31 * REGBYTES */ +/* Floating Point Registers (F0-F31) */ +/* Note: Base offset for FPU regs is 31 * REGBYTES */ +#define TX_STACK_OFFSET_F0 (31 * REGBYTES) +#define TX_STACK_OFFSET_F1 (32 * REGBYTES) +#define TX_STACK_OFFSET_F2 (33 * REGBYTES) +#define TX_STACK_OFFSET_F3 (34 * REGBYTES) +#define TX_STACK_OFFSET_F4 (35 * REGBYTES) +#define TX_STACK_OFFSET_F5 (36 * REGBYTES) +#define TX_STACK_OFFSET_F6 (37 * REGBYTES) +#define TX_STACK_OFFSET_F7 (38 * REGBYTES) +#define TX_STACK_OFFSET_F8 (39 * REGBYTES) +#define TX_STACK_OFFSET_F9 (40 * REGBYTES) +#define TX_STACK_OFFSET_F10 (41 * REGBYTES) +#define TX_STACK_OFFSET_F11 (42 * REGBYTES) +#define TX_STACK_OFFSET_F12 (43 * REGBYTES) +#define TX_STACK_OFFSET_F13 (44 * REGBYTES) +#define TX_STACK_OFFSET_F14 (45 * REGBYTES) +#define TX_STACK_OFFSET_F15 (46 * REGBYTES) +#define TX_STACK_OFFSET_F16 (47 * REGBYTES) +#define TX_STACK_OFFSET_F17 (48 * REGBYTES) +#define TX_STACK_OFFSET_F18 (49 * REGBYTES) +#define TX_STACK_OFFSET_F19 (50 * REGBYTES) +#define TX_STACK_OFFSET_F20 (51 * REGBYTES) +#define TX_STACK_OFFSET_F21 (52 * REGBYTES) +#define TX_STACK_OFFSET_F22 (53 * REGBYTES) +#define TX_STACK_OFFSET_F23 (54 * REGBYTES) +#define TX_STACK_OFFSET_F24 (55 * REGBYTES) +#define TX_STACK_OFFSET_F25 (56 * REGBYTES) +#define TX_STACK_OFFSET_F26 (57 * REGBYTES) +#define TX_STACK_OFFSET_F27 (58 * REGBYTES) +#define TX_STACK_OFFSET_F28 (59 * REGBYTES) +#define TX_STACK_OFFSET_F29 (60 * REGBYTES) +#define TX_STACK_OFFSET_F30 (61 * REGBYTES) +#define TX_STACK_OFFSET_F31 (62 * REGBYTES) + +/* FCSR is stored after F31 */ +/* FCSR is stored after F31 */ +#define TX_STACK_OFFSET_FCSR (63 * REGBYTES) + +/* Thread Control Block (TX_THREAD) Offsets */ +#define TX_THREAD_RUN_COUNT (1 * REGBYTES) +#define TX_THREAD_STACK_PTR (2 * REGBYTES) +#define TX_THREAD_STACK_END (4 * REGBYTES) +#define TX_THREAD_TIME_SLICE (6 * REGBYTES) + +/* Stack Frame Sizes */ +/* FPU Enabled: 65 Registers (x0-x31, f0-f31, fcsr) + Alignment */ +#define TX_THREAD_FRAME_SIZE_FPU (65 * REGBYTES) +/* FPU Disabled: 32 Registers (x0-x31) + Alignment */ +#define TX_THREAD_FRAME_SIZE_INT (32 * REGBYTES) + + +#else /*not __ASSEMBLER__ */ /* Include for memset. */ #include @@ -62,16 +177,15 @@ alternately be defined on the command line. */ #include "tx_user.h" -#endif /* TX_INCLUDE_USER_DEFINE_FILE */ +#endif -#endif /* __ASSEMBLER__ */ + +/* Define compiler library include files. */ /* Define ThreadX basic types for this port. */ #define VOID void - -#ifndef __ASSEMBLER__ typedef char CHAR; typedef unsigned char UCHAR; typedef int INT; @@ -82,7 +196,8 @@ typedef unsigned long long ULONG64; typedef short SHORT; typedef unsigned short USHORT; #define ULONG64_DEFINED -#endif /* __ASSEMBLER__ */ +#define ALIGN_TYPE_DEFINED +#define ALIGN_TYPE ULONG64 @@ -229,36 +344,25 @@ typedef unsigned short USHORT; is used to define a local function save area for the disable and restore macros. */ -/* Expose helper used to perform an atomic read/modify/write of mstatus. - The helper composes and returns the posture per ThreadX contract. */ -#ifndef __ASSEMBLER__ -UINT _tx_thread_interrupt_control(UINT new_posture); -#endif - #ifdef TX_DISABLE_INLINE -#define TX_INTERRUPT_SAVE_AREA register UINT interrupt_save; +ULONG64 _tx_thread_interrupt_control(unsigned int new_posture); -#define TX_DISABLE __asm__ volatile("csrrci %0, mstatus, 8" : "=r" (interrupt_save) :: "memory"); -#define TX_RESTORE { \ - unsigned long _temp_mstatus; \ - __asm__ volatile( \ - "csrc mstatus, 8\n" \ - "andi %0, %1, 8\n" \ - "csrs mstatus, %0" \ - : "=&r" (_temp_mstatus) \ - : "r" (interrupt_save) \ - : "memory"); \ - } - -#else - -#define TX_INTERRUPT_SAVE_AREA register UINT interrupt_save; +#define TX_INTERRUPT_SAVE_AREA register ULONG64 interrupt_save; #define TX_DISABLE interrupt_save = _tx_thread_interrupt_control(TX_INT_DISABLE); #define TX_RESTORE _tx_thread_interrupt_control(interrupt_save); -#endif /* TX_DISABLE_INLINE */ +#else + +#define TX_INTERRUPT_SAVE_AREA ULONG64 interrupt_save; +/* Atomically read mstatus into interrupt_save and clear bit 3 of mstatus. */ +#define TX_DISABLE {__asm__ ("csrrci %0, mstatus, 0x08" : "=r" (interrupt_save) : );}; +/* We only care about mstatus.mie (bit 3), so mask interrupt_save and write to mstatus. */ +#define TX_RESTORE {register ULONG64 __tempmask = interrupt_save & 0x08; \ + __asm__ ("csrrs x0, mstatus, %0 \n\t" : : "r" (__tempmask) : );}; + +#endif /* Define the interrupt lockout macros for each ThreadX object. */ @@ -273,13 +377,12 @@ UINT _tx_thread_interrupt_control(UIN /* Define the version ID of ThreadX. This may be utilized by the application. */ -#ifndef __ASSEMBLER__ #ifdef TX_THREAD_INIT CHAR _tx_version_id[] = - "(c) 2024 Microsoft Corp. (c) 2026-present Eclipse ThreadX contributors. * ThreadX RISC-V32/GNU Version 6.5.0.202601 *"; + "Copyright (c) 2024 Microsoft Corporation. * ThreadX RISC-V32/GNU Version 6.4.2 *"; #else extern CHAR _tx_version_id[]; -#endif /* TX_THREAD_INIT */ -#endif /* __ASSEMBLER__ */ +#endif -#endif /* TX_PORT_H */ \ No newline at end of file +#endif /*not __ASSEMBLER__ */ +#endif diff --git a/ports/risc-v32/gnu/src/tx_initialize_low_level.S b/ports/risc-v32/gnu/src/tx_initialize_low_level.S index 703466bda..b14e71247 100644 --- a/ports/risc-v32/gnu/src/tx_initialize_low_level.S +++ b/ports/risc-v32/gnu/src/tx_initialize_low_level.S @@ -18,6 +18,7 @@ /** */ /**************************************************************************/ /**************************************************************************/ +#include "tx_port.h" .section .data .global __tx_free_memory_start @@ -34,6 +35,7 @@ __tx_free_memory_start: /* AUTHOR */ /* */ /* Akif Ejaz, 10xEngineers */ +/* Wei-Chen Lai, National Cheng Kung University */ /* */ /* DESCRIPTION */ /* */ @@ -65,48 +67,55 @@ __tx_free_memory_start: .global _tx_initialize_low_level .weak _tx_initialize_low_level _tx_initialize_low_level: - - /* Save the system stack pointer. */ - /* _tx_thread_system_stack_ptr = sp; */ - + andi sp, sp, -16 // Align stack pointer to 16 bytes la t0, _tx_thread_system_stack_ptr // Pickup address of system stack ptr sw sp, 0(t0) // Save system stack pointer - /* Pickup first free address. */ - /* _tx_initialize_unused_memory(__tx_free_memory_start); */ - la t0, __tx_free_memory_start // Pickup first free address la t1, _tx_initialize_unused_memory // Pickup address of unused memory sw t0, 0(t1) // Save unused memory address - /* Initialize floating point control/status register if floating point is enabled. */ -#ifdef __riscv_flen - li t0, 0 - csrw fcsr, t0 // Clear FP control/status register +#if defined(__riscv_flen) && ((__riscv_flen == 32) || (__riscv_flen == 64)) + li t0, 0x00006000 // Set FS field of mstatus to 3 (Dirty) + csrs mstatus, t0 + fscsr x0 // Clear the fcsr #endif + la t0, _tx_trap_handler // Pickup trap handler address + csrw mtvec, t0 // Store trap handler address ret -/* Timer Interrupt Handler Note: - Platform-specific implementations must provide their own timer ISR. - The timer interrupt handler should follow this execution flow: + /* Define the actual timer interrupt/exception handler. */ - 1. Disable interrupts (if not done by hardware exception entry) - 2. Allocate interrupt stack frame (65*4 bytes with FP, 32*4 bytes without) - 3. Save RA (x1) on the stack at offset 28*4 - 4. Call _tx_thread_context_save to save thread context - 5. Call _tx_timer_interrupt to process the timer tick - 6. Call _tx_thread_context_restore to resume execution (does not return) + .global _tx_trap_handler +_tx_trap_handler: + /* Before calling _tx_thread_context_save, we have to allocate an interrupt + stack frame and save the current value of x1 (ra). */ - Example (for CLINT timer): +//#if defined(__riscv_float_abi_single) || defined(__riscv_float_abi_double) +// addi sp, sp, -520 // Allocate space for all registers - with floating point enabled +//#else +// addi sp, sp, -256 // Allocate space for all registers - without floating point enabled +//#endif +// sd x1, 224(sp) // Store RA - _tx_timer_interrupt_handler: - addi sp, sp, -32*4 - sw ra, 28*4(sp) - call _tx_thread_context_save - call _tx_timer_interrupt - j _tx_thread_context_restore +#if defined(__riscv_flen) && ((__riscv_flen == 32) || (__riscv_flen == 64)) + addi sp, sp, -TX_THREAD_FRAME_SIZE_FPU // Allocate space for all registers - with floating point enabled +#else + addi sp, sp, -TX_THREAD_FRAME_SIZE_INT // Allocate space for all registers - without floating point enabled +#endif - The port assumes Machine mode (M-mode) execution. - For Supervisor mode (S-mode), use sstatus and SIE/SPIE instead of mstatus. - See the RISC-V Privileged Specification for more details. */ \ No newline at end of file + STORE x1, TX_STACK_OFFSET_X1(sp) + call _tx_thread_context_save // Call ThreadX context save + csrr t0, mcause // Pickup mcause + li t1, 0x80000007 + beq t0, t1, _tx_timer_handler_entry // If mcause is Timer Interrupt, call timer interrupt handler + + j _tx_trap_exit + /* Call the ThreadX timer routine. */ + _tx_timer_handler_entry: + call _tx_timer_interrupt // Call timer interrupt handler + + /* Timer interrupt processing is done, jump to ThreadX context restore. */ +_tx_trap_exit: + j _tx_thread_context_restore // Jump to ThreadX context restore function. Note: this does not return! diff --git a/ports/risc-v32/gnu/src/tx_thread_context_restore.S b/ports/risc-v32/gnu/src/tx_thread_context_restore.S index 73a07f61d..3f6592d9c 100644 --- a/ports/risc-v32/gnu/src/tx_thread_context_restore.S +++ b/ports/risc-v32/gnu/src/tx_thread_context_restore.S @@ -19,6 +19,8 @@ /**************************************************************************/ /**************************************************************************/ +#include "tx_port.h" + .section .text /**************************************************************************/ /* */ @@ -29,6 +31,7 @@ /* AUTHOR */ /* */ /* Akif Ejaz, 10xEngineers */ +/* Wei-Chen Lai, National Cheng Kung University */ /* */ /* DESCRIPTION */ /* */ @@ -61,7 +64,7 @@ _tx_thread_context_restore: /* Lockout interrupts. */ - csrci mstatus, 0x08 // Disable interrupts (MIE bit 3) + csrci mstatus, 0x08 // Disable interrupts #ifdef TX_ENABLE_EXECUTION_CHANGE_NOTIFY call _tx_execution_isr_exit // Call the ISR execution exit function @@ -72,9 +75,9 @@ _tx_thread_context_restore: { */ la t0, _tx_thread_system_state // Pickup addr of nested interrupt count - lw t1, 0(t0) // Pickup nested interrupt count + LOAD t1, 0(t0) // Pickup nested interrupt count addi t1, t1, -1 // Decrement the nested interrupt counter - sw t1, 0(t0) // Store new nested count + STORE t1, 0(t0) // Store new nested count beqz t1, _tx_thread_not_nested_restore // If 0, not nested restore /* Interrupts are nested. */ @@ -83,52 +86,60 @@ _tx_thread_context_restore: interrupt. */ /* Recover floating point registers. */ -#if defined(__riscv_float_abi_single) - flw f0, 31*4(sp) // Recover ft0 - flw f1, 32*4(sp) // Recover ft1 - flw f2, 33*4(sp) // Recover ft2 - flw f3, 34*4(sp) // Recover ft3 - flw f4, 35*4(sp) // Recover ft4 - flw f5, 36*4(sp) // Recover ft5 - flw f6, 37*4(sp) // Recover ft6 - flw f7, 38*4(sp) // Recover ft7 - flw f10, 41*4(sp) // Recover fa0 - flw f11, 42*4(sp) // Recover fa1 - flw f12, 43*4(sp) // Recover fa2 - flw f13, 44*4(sp) // Recover fa3 - flw f14, 45*4(sp) // Recover fa4 - flw f15, 46*4(sp) // Recover fa5 - flw f16, 47*4(sp) // Recover fa6 - flw f17, 48*4(sp) // Recover fa7 - flw f28, 59*4(sp) // Recover ft8 - flw f29, 60*4(sp) // Recover ft9 - flw f30, 61*4(sp) // Recover ft10 - flw f31, 62*4(sp) // Recover ft11 - lw t0, 63*4(sp) // Recover fcsr - csrw fcsr, t0 // Restore fcsr -#elif defined(__riscv_float_abi_double) - fld f0, 31*4(sp) // Recover ft0 - fld f1, 32*4(sp) // Recover ft1 - fld f2, 33*4(sp) // Recover ft2 - fld f3, 34*4(sp) // Recover ft3 - fld f4, 35*4(sp) // Recover ft4 - fld f5, 36*4(sp) // Recover ft5 - fld f6, 37*4(sp) // Recover ft6 - fld f7, 38*4(sp) // Recover ft7 - fld f10, 41*4(sp) // Recover fa0 - fld f11, 42*4(sp) // Recover fa1 - fld f12, 43*4(sp) // Recover fa2 - fld f13, 44*4(sp) // Recover fa3 - fld f14, 45*4(sp) // Recover fa4 - fld f15, 46*4(sp) // Recover fa5 - fld f16, 47*4(sp) // Recover fa6 - fld f17, 48*4(sp) // Recover fa7 - fld f28, 59*4(sp) // Recover ft8 - fld f29, 60*4(sp) // Recover ft9 - fld f30, 61*4(sp) // Recover ft10 - fld f31, 62*4(sp) // Recover ft11 - lw t0, 63*4(sp) // Recover fcsr - csrw fcsr, t0 // Restore fcsr +#if defined(__riscv_flen) + LOAD t1, TX_STACK_OFFSET_MSTATUS(sp) // Pickup thread's floating point state */ + /* Check if floating point is enabled */ + srli t1, t1, 13 + andi t1, t1, 0x3 + beqz t1, _tx_thread_skip_fp_restore // Skip floating point restore FS is Off +#if __riscv_flen == 32 + flw f0, TX_STACK_OFFSET_F0(sp) // Recover ft0 + flw f1, TX_STACK_OFFSET_F1(sp) // Recover ft1 + flw f2, TX_STACK_OFFSET_F2(sp) // Recover ft2 + flw f3, TX_STACK_OFFSET_F3(sp) // Recover ft3 + flw f4, TX_STACK_OFFSET_F4(sp) // Recover ft4 + flw f5, TX_STACK_OFFSET_F5(sp) // Recover ft5 + flw f6, TX_STACK_OFFSET_F6(sp) // Recover ft6 + flw f7, TX_STACK_OFFSET_F7(sp) // Recover ft7 + flw f10,TX_STACK_OFFSET_F10(sp) // Recover fa0 + flw f11,TX_STACK_OFFSET_F11(sp) // Recover fa1 + flw f12,TX_STACK_OFFSET_F12(sp) // Recover fa2 + flw f13,TX_STACK_OFFSET_F13(sp) // Recover fa3 + flw f14,TX_STACK_OFFSET_F14(sp) // Recover fa4 + flw f15,TX_STACK_OFFSET_F15(sp) // Recover fa5 + flw f16,TX_STACK_OFFSET_F16(sp) // Recover fa6 + flw f17,TX_STACK_OFFSET_F17(sp) // Recover fa7 + flw f28,TX_STACK_OFFSET_F28(sp) // Recover ft8 + flw f29,TX_STACK_OFFSET_F29(sp) // Recover ft9 + flw f30,TX_STACK_OFFSET_F30(sp) // Recover ft10 + flw f31,TX_STACK_OFFSET_F31(sp) // Recover ft11 + lw t0, TX_STACK_OFFSET_FCSR(sp) // Recover fcsr + csrw fcsr, t0 // +#elif __riscv_flen == 64 + fld f0, TX_STACK_OFFSET_F0(sp) // Recover ft0 + fld f1, TX_STACK_OFFSET_F1(sp) // Recover ft1 + fld f2, TX_STACK_OFFSET_F2(sp) // Recover ft2 + fld f3, TX_STACK_OFFSET_F3(sp) // Recover ft3 + fld f4, TX_STACK_OFFSET_F4(sp) // Recover ft4 + fld f5, TX_STACK_OFFSET_F5(sp) // Recover ft5 + fld f6, TX_STACK_OFFSET_F6(sp) // Recover ft6 + fld f7, TX_STACK_OFFSET_F7(sp) // Recover ft7 + fld f10,TX_STACK_OFFSET_F10(sp) // Recover fa0 + fld f11,TX_STACK_OFFSET_F11(sp) // Recover fa1 + fld f12,TX_STACK_OFFSET_F12(sp) // Recover fa2 + fld f13,TX_STACK_OFFSET_F13(sp) // Recover fa3 + fld f14,TX_STACK_OFFSET_F14(sp) // Recover fa4 + fld f15,TX_STACK_OFFSET_F15(sp) // Recover fa5 + fld f16,TX_STACK_OFFSET_F16(sp) // Recover fa6 + fld f17,TX_STACK_OFFSET_F17(sp) // Recover fa7 + fld f28,TX_STACK_OFFSET_F28(sp) // Recover ft8 + fld f29,TX_STACK_OFFSET_F29(sp) // Recover ft9 + fld f30,TX_STACK_OFFSET_F30(sp) // Recover ft10 + fld f31,TX_STACK_OFFSET_F31(sp) // Recover ft11 + LOAD t0, TX_STACK_OFFSET_FCSR(sp) // Recover fcsr + csrw fcsr, t0 // +#endif +_tx_thread_skip_fp_restore: #endif /* Recover standard registers. */ @@ -138,54 +149,33 @@ _tx_thread_context_restore: Also skip the saved registers since they have been restored by any function we called, except s0 since we use it ourselves. */ - lw t0, 30*4(sp) // Recover mepc + LOAD t0, TX_STACK_OFFSET_MEPC(sp) // Recover mepc csrw mepc, t0 // Setup mepc - - /* Compose mstatus via read/modify/write to avoid clobbering unrelated bits. - Set MPIE and restore MPP to Machine, preserve other fields. */ - - csrr t1, mstatus - - /* Clear MPP/MPIE/MIE bits in t1 then set desired values. */ - - li t2, 0x1888 // MPP(0x1800) | MPIE(0x80) | MIE(0x08) - li t3, 0x1800 // Set MPP to Machine mode (bits 12:11) - - /* Construct new mstatus in t1: clear mask bits, set MPP/MPIE and optionally FP bit, - preserve everything except the bits we will modify. */ - - li t4, ~0x1888 // Clear mask for MPP/MPIE/MIE - and t1, t1, t4 - or t1, t1, t3 - -#if defined(__riscv_float_abi_single) || defined(__riscv_float_abi_double) - li t0, 0x2000 // Set FS bits (bits 14:13 to 01) for FP state - or t1, t1, t0 -#endif - csrw mstatus, t1 // Update mstatus safely - - lw ra, 28*4(sp) // Recover return address - lw t0, 19*4(sp) // Recover t0 - lw t1, 18*4(sp) // Recover t1 - lw t2, 17*4(sp) // Recover t2 - lw s0, 12*4(sp) // Recover s0 - lw a0, 27*4(sp) // Recover a0 - lw a1, 26*4(sp) // Recover a1 - lw a2, 25*4(sp) // Recover a2 - lw a3, 24*4(sp) // Recover a3 - lw a4, 23*4(sp) // Recover a4 - lw a5, 22*4(sp) // Recover a5 - lw a6, 21*4(sp) // Recover a6 - lw a7, 20*4(sp) // Recover a7 - lw t3, 16*4(sp) // Recover t3 - lw t4, 15*4(sp) // Recover t4 - lw t5, 14*4(sp) // Recover t5 - lw t6, 13*4(sp) // Recover t6 - -#if defined(__riscv_float_abi_single) || defined(__riscv_float_abi_double) - addi sp, sp, 65*4 // Recover stack frame - with floating point enabled + LOAD t0, TX_STACK_OFFSET_MSTATUS(sp) // Recover mstatus + csrw mstatus, t0 + + LOAD x1, TX_STACK_OFFSET_X1(sp) // Recover RA + LOAD x5, TX_STACK_OFFSET_X5(sp) // Recover t0 + LOAD x6, TX_STACK_OFFSET_X6(sp) // Recover t1 + LOAD x7, TX_STACK_OFFSET_X7(sp) // Recover t2 + LOAD x8, TX_STACK_OFFSET_X8(sp) // Recover s0 + LOAD x10, TX_STACK_OFFSET_X10(sp) // Recover a0 + LOAD x11, TX_STACK_OFFSET_X11(sp) // Recover a1 + LOAD x12, TX_STACK_OFFSET_X12(sp) // Recover a2 + LOAD x13, TX_STACK_OFFSET_X13(sp) // Recover a3 + LOAD x14, TX_STACK_OFFSET_X14(sp) // Recover a4 + LOAD x15, TX_STACK_OFFSET_X15(sp) // Recover a5 + LOAD x16, TX_STACK_OFFSET_X16(sp) // Recover a6 + LOAD x17, TX_STACK_OFFSET_X17(sp) // Recover a7 + LOAD x28, TX_STACK_OFFSET_X28(sp) // Recover t3 + LOAD x29, TX_STACK_OFFSET_X29(sp) // Recover t4 + LOAD x30, TX_STACK_OFFSET_X30(sp) // Recover t5 + LOAD x31, TX_STACK_OFFSET_X31(sp) // Recover t6 + +#if defined(__riscv_flen) && ((__riscv_flen == 32) || (__riscv_flen == 64)) + addi sp, sp, TX_THREAD_FRAME_SIZE_FPU // Recover stack frame - with floating point enabled #else - addi sp, sp, 32*4 // Recover stack frame - without floating point enabled + addi sp, sp, TX_THREAD_FRAME_SIZE_INT // Recover stack frame - without floating point enabled #endif mret // Return to point of interrupt @@ -196,21 +186,16 @@ _tx_thread_not_nested_restore: || (_tx_thread_preempt_disable)) { */ - la t0, _tx_thread_current_ptr // Pickup current thread pointer address - lw t1, 0(t0) // Pickup current thread pointer - + la t0, _tx_thread_current_ptr + LOAD t1, 0(t0) // Pickup current thread pointer beqz t1, _tx_thread_idle_system_restore // If NULL, idle system restore - - la t0, _tx_thread_preempt_disable // Pickup preempt disable flag address - lw t2, 0(t0) // Pickup preempt disable flag (UINT) - + la t0, _tx_thread_preempt_disable + LOAD t2, 0(t0) // Pickup preempt disable flag bgtz t2, _tx_thread_no_preempt_restore // If set, restore interrupted thread - - la t0, _tx_thread_execute_ptr // Pickup thread execute pointer address - lw t2, 0(t0) // Pickup thread execute pointer - + la t0, _tx_thread_execute_ptr + LOAD t2, 0(t0) // Pickup thread execute pointer bne t1, t2, _tx_thread_preempt_restore // If higher-priority thread is ready, preempt @@ -218,57 +203,65 @@ _tx_thread_no_preempt_restore: /* Restore interrupted thread or ISR. */ /* Pickup the saved stack pointer. */ - /* sp = _tx_thread_current_ptr -> tx_thread_stack_ptr; */ + /* SP = _tx_thread_current_ptr -> tx_thread_stack_ptr; */ - lw sp, 8(t1) // Switch back to thread's stack + LOAD sp, TX_THREAD_STACK_PTR(t1) // Switch back to thread's stack /* Recover floating point registers. */ -#if defined(__riscv_float_abi_single) - flw f0, 31*4(sp) // Recover ft0 - flw f1, 32*4(sp) // Recover ft1 - flw f2, 33*4(sp) // Recover ft2 - flw f3, 34*4(sp) // Recover ft3 - flw f4, 35*4(sp) // Recover ft4 - flw f5, 36*4(sp) // Recover ft5 - flw f6, 37*4(sp) // Recover ft6 - flw f7, 38*4(sp) // Recover ft7 - flw f10, 41*4(sp) // Recover fa0 - flw f11, 42*4(sp) // Recover fa1 - flw f12, 43*4(sp) // Recover fa2 - flw f13, 44*4(sp) // Recover fa3 - flw f14, 45*4(sp) // Recover fa4 - flw f15, 46*4(sp) // Recover fa5 - flw f16, 47*4(sp) // Recover fa6 - flw f17, 48*4(sp) // Recover fa7 - flw f28, 59*4(sp) // Recover ft8 - flw f29, 60*4(sp) // Recover ft9 - flw f30, 61*4(sp) // Recover ft10 - flw f31, 62*4(sp) // Recover ft11 - lw t0, 63*4(sp) // Recover fcsr - csrw fcsr, t0 // Restore fcsr -#elif defined(__riscv_float_abi_double) - fld f0, 31*4(sp) // Recover ft0 - fld f1, 32*4(sp) // Recover ft1 - fld f2, 33*4(sp) // Recover ft2 - fld f3, 34*4(sp) // Recover ft3 - fld f4, 35*4(sp) // Recover ft4 - fld f5, 36*4(sp) // Recover ft5 - fld f6, 37*4(sp) // Recover ft6 - fld f7, 38*4(sp) // Recover ft7 - fld f10, 41*4(sp) // Recover fa0 - fld f11, 42*4(sp) // Recover fa1 - fld f12, 43*4(sp) // Recover fa2 - fld f13, 44*4(sp) // Recover fa3 - fld f14, 45*4(sp) // Recover fa4 - fld f15, 46*4(sp) // Recover fa5 - fld f16, 47*4(sp) // Recover fa6 - fld f17, 48*4(sp) // Recover fa7 - fld f28, 59*4(sp) // Recover ft8 - fld f29, 60*4(sp) // Recover ft9 - fld f30, 61*4(sp) // Recover ft10 - fld f31, 62*4(sp) // Recover ft11 - lw t0, 63*4(sp) // Recover fcsr - csrw fcsr, t0 // Restore fcsr +#if defined(__riscv_flen) + LOAD t1, TX_STACK_OFFSET_MSTATUS(sp) // Pickup thread's floating point state */ + /* Check if floating point is enabled */ + srli t1, t1, 13 + andi t1, t1, 0x3 + beqz t1, _tx_thread_no_preempt_skip_fp_restore // Skip floating point restore FS is Off +#if __riscv_flen == 32 + flw f0, TX_STACK_OFFSET_F0(sp) // Recover ft0 + flw f1, TX_STACK_OFFSET_F1(sp) // Recover ft1 + flw f2, TX_STACK_OFFSET_F2(sp) // Recover ft2 + flw f3, TX_STACK_OFFSET_F3(sp) // Recover ft3 + flw f4, TX_STACK_OFFSET_F4(sp) // Recover ft4 + flw f5, TX_STACK_OFFSET_F5(sp) // Recover ft5 + flw f6, TX_STACK_OFFSET_F6(sp) // Recover ft6 + flw f7, TX_STACK_OFFSET_F7(sp) // Recover ft7 + flw f10,TX_STACK_OFFSET_F10(sp) // Recover fa0 + flw f11,TX_STACK_OFFSET_F11(sp) // Recover fa1 + flw f12,TX_STACK_OFFSET_F12(sp) // Recover fa2 + flw f13,TX_STACK_OFFSET_F13(sp) // Recover fa3 + flw f14,TX_STACK_OFFSET_F14(sp) // Recover fa4 + flw f15,TX_STACK_OFFSET_F15(sp) // Recover fa5 + flw f16,TX_STACK_OFFSET_F16(sp) // Recover fa6 + flw f17,TX_STACK_OFFSET_F17(sp) // Recover fa7 + flw f28,TX_STACK_OFFSET_F28(sp) // Recover ft8 + flw f29,TX_STACK_OFFSET_F29(sp) // Recover ft9 + flw f30,TX_STACK_OFFSET_F30(sp) // Recover ft10 + flw f31,TX_STACK_OFFSET_F31(sp) // Recover ft11 + lw t0, TX_STACK_OFFSET_FCSR(sp) // Recover fcsr + csrw fcsr, t0 // +#elif __riscv_flen == 64 + fld f0, TX_STACK_OFFSET_F0(sp) // Recover ft0 + fld f1, TX_STACK_OFFSET_F1(sp) // Recover ft1 + fld f2, TX_STACK_OFFSET_F2(sp) // Recover ft2 + fld f3, TX_STACK_OFFSET_F3(sp) // Recover ft3 + fld f4, TX_STACK_OFFSET_F4(sp) // Recover ft4 + fld f5, TX_STACK_OFFSET_F5(sp) // Recover ft5 + fld f6, TX_STACK_OFFSET_F6(sp) // Recover ft6 + fld f7, TX_STACK_OFFSET_F7(sp) // Recover ft7 + fld f10,TX_STACK_OFFSET_F10(sp) // Recover fa0 + fld f11,TX_STACK_OFFSET_F11(sp) // Recover fa1 + fld f12,TX_STACK_OFFSET_F12(sp) // Recover fa2 + fld f13,TX_STACK_OFFSET_F13(sp) // Recover fa3 + fld f14,TX_STACK_OFFSET_F14(sp) // Recover fa4 + fld f15,TX_STACK_OFFSET_F15(sp) // Recover fa5 + fld f16,TX_STACK_OFFSET_F16(sp) // Recover fa6 + fld f17,TX_STACK_OFFSET_F17(sp) // Recover fa7 + fld f28,TX_STACK_OFFSET_F28(sp) // Recover ft8 + fld f29,TX_STACK_OFFSET_F29(sp) // Recover ft9 + fld f30,TX_STACK_OFFSET_F30(sp) // Recover ft10 + fld f31,TX_STACK_OFFSET_F31(sp) // Recover ft11 + LOAD t0, TX_STACK_OFFSET_FCSR(sp) // Recover fcsr + csrw fcsr, t0 // +#endif +_tx_thread_no_preempt_skip_fp_restore: #endif /* Recover the saved context and return to the point of interrupt. */ @@ -277,46 +270,33 @@ _tx_thread_no_preempt_restore: /* Restore registers, Skip global pointer because that does not change */ - lw t0, 30*4(sp) // Recover mepc + LOAD t0, TX_STACK_OFFSET_MEPC(sp) // Recover mepc csrw mepc, t0 // Setup mepc - - /* Compose mstatus via read/modify/write to avoid clobbering unrelated bits. */ - - csrr t1, mstatus - li t2, 0x1888 // MPP(0x1800) | MPIE(0x80) | MIE(0x08) - li t3, 0x1800 // Set MPP to Machine mode - li t4, ~0x1888 // Clear mask for MPP/MPIE/MIE - and t1, t1, t4 - or t1, t1, t3 - -#if defined(__riscv_float_abi_single) || defined(__riscv_float_abi_double) - li t0, 0x2000 // Set FS bits for FP state - or t1, t1, t0 -#endif - csrw mstatus, t1 // Update mstatus safely - - lw ra, 28*4(sp) // Recover return address - lw t0, 19*4(sp) // Recover t0 - lw t1, 18*4(sp) // Recover t1 - lw t2, 17*4(sp) // Recover t2 - lw s0, 12*4(sp) // Recover s0 - lw a0, 27*4(sp) // Recover a0 - lw a1, 26*4(sp) // Recover a1 - lw a2, 25*4(sp) // Recover a2 - lw a3, 24*4(sp) // Recover a3 - lw a4, 23*4(sp) // Recover a4 - lw a5, 22*4(sp) // Recover a5 - lw a6, 21*4(sp) // Recover a6 - lw a7, 20*4(sp) // Recover a7 - lw t3, 16*4(sp) // Recover t3 - lw t4, 15*4(sp) // Recover t4 - lw t5, 14*4(sp) // Recover t5 - lw t6, 13*4(sp) // Recover t6 - -#if defined(__riscv_float_abi_single) || defined(__riscv_float_abi_double) - addi sp, sp, 65*4 // Recover stack frame - with floating point enabled + LOAD t0, TX_STACK_OFFSET_MSTATUS(sp) // Recover mstatus + csrw mstatus, t0 + + LOAD x1, TX_STACK_OFFSET_X1(sp) // Recover RA + LOAD x5, TX_STACK_OFFSET_X5(sp) // Recover t0 + LOAD x6, TX_STACK_OFFSET_X6(sp) // Recover t1 + LOAD x7, TX_STACK_OFFSET_X7(sp) // Recover t2 + LOAD x8, TX_STACK_OFFSET_X8(sp) // Recover s0 + LOAD x10, TX_STACK_OFFSET_X10(sp) // Recover a0 + LOAD x11, TX_STACK_OFFSET_X11(sp) // Recover a1 + LOAD x12, TX_STACK_OFFSET_X12(sp) // Recover a2 + LOAD x13, TX_STACK_OFFSET_X13(sp) // Recover a3 + LOAD x14, TX_STACK_OFFSET_X14(sp) // Recover a4 + LOAD x15, TX_STACK_OFFSET_X15(sp) // Recover a5 + LOAD x16, TX_STACK_OFFSET_X16(sp) // Recover a6 + LOAD x17, TX_STACK_OFFSET_X17(sp) // Recover a7 + LOAD x28, TX_STACK_OFFSET_X28(sp) // Recover t3 + LOAD x29, TX_STACK_OFFSET_X29(sp) // Recover t4 + LOAD x30, TX_STACK_OFFSET_X30(sp) // Recover t5 + LOAD x31, TX_STACK_OFFSET_X31(sp) // Recover t6 + +#if defined(__riscv_flen) && ((__riscv_flen == 32) || (__riscv_flen == 64)) + addi sp, sp, TX_THREAD_FRAME_SIZE_FPU // Recover stack frame - with floating point enabled #else - addi sp, sp, 32*4 // Recover stack frame - without floating point enabled + addi sp, sp, TX_THREAD_FRAME_SIZE_INT // Recover stack frame - without floating point enabled #endif mret // Return to point of interrupt @@ -327,67 +307,74 @@ _tx_thread_preempt_restore: /* Instead of directly activating the thread again, ensure we save the entire stack frame by saving the remaining registers. */ - lw t0, 8(t1) // Pickup thread's stack pointer - ori t3, zero, 1 // Build interrupt stack type - sw t3, 0(t0) // Store stack type - + LOAD sp, TX_THREAD_STACK_PTR(t1) + ori t3, x0, 1 // Build interrupt stack type + STORE t3, 0(sp) // Store stack type /* Store floating point preserved registers. */ -#ifdef __riscv_float_abi_single - fsw f8, 39*4(t0) // Store fs0 - fsw f9, 40*4(t0) // Store fs1 - fsw f18, 49*4(t0) // Store fs2 - fsw f19, 50*4(t0) // Store fs3 - fsw f20, 51*4(t0) // Store fs4 - fsw f21, 52*4(t0) // Store fs5 - fsw f22, 53*4(t0) // Store fs6 - fsw f23, 54*4(t0) // Store fs7 - fsw f24, 55*4(t0) // Store fs8 - fsw f25, 56*4(t0) // Store fs9 - fsw f26, 57*4(t0) // Store fs10 - fsw f27, 58*4(t0) // Store fs11 -#elif defined(__riscv_float_abi_double) - fsd f8, 39*4(t0) // Store fs0 - fsd f9, 40*4(t0) // Store fs1 - fsd f18, 49*4(t0) // Store fs2 - fsd f19, 50*4(t0) // Store fs3 - fsd f20, 51*4(t0) // Store fs4 - fsd f21, 52*4(t0) // Store fs5 - fsd f22, 53*4(t0) // Store fs6 - fsd f23, 54*4(t0) // Store fs7 - fsd f24, 55*4(t0) // Store fs8 - fsd f25, 56*4(t0) // Store fs9 - fsd f26, 57*4(t0) // Store fs10 - fsd f27, 58*4(t0) // Store fs11 +#if defined(__riscv_flen) + LOAD t2, TX_STACK_OFFSET_MSTATUS(sp) // Pickup thread's floating point state */ + /* Check if floating point is enabled */ + srli t2, t2, 13 + andi t2, t2, 0x3 + beqz t2, _tx_thread_preempt_skip_fp_restore // Skip floating point restore FS is Off +#if __riscv_flen == 32 + fsw f8, TX_STACK_OFFSET_F8(sp) // Store fs0 + fsw f9, TX_STACK_OFFSET_F9(sp) // Store fs1 + fsw f18, TX_STACK_OFFSET_F18(sp) // Store fs2 + fsw f19, TX_STACK_OFFSET_F19(sp) // Store fs3 + fsw f20, TX_STACK_OFFSET_F20(sp) // Store fs4 + fsw f21, TX_STACK_OFFSET_F21(sp) // Store fs5 + fsw f22, TX_STACK_OFFSET_F22(sp) // Store fs6 + fsw f23, TX_STACK_OFFSET_F23(sp) // Store fs7 + fsw f24, TX_STACK_OFFSET_F24(sp) // Store fs8 + fsw f25, TX_STACK_OFFSET_F25(sp) // Store fs9 + fsw f26, TX_STACK_OFFSET_F26(sp) // Store fs10 + fsw f27, TX_STACK_OFFSET_F27(sp) // Store fs11 +#elif __riscv_flen == 64 + fsd f8, TX_STACK_OFFSET_F8(sp) // Store fs0 + fsd f9, TX_STACK_OFFSET_F9(sp) // Store fs1 + fsd f18, TX_STACK_OFFSET_F18(sp) // Store fs2 + fsd f19, TX_STACK_OFFSET_F19(sp) // Store fs3 + fsd f20, TX_STACK_OFFSET_F20(sp) // Store fs4 + fsd f21, TX_STACK_OFFSET_F21(sp) // Store fs5 + fsd f22, TX_STACK_OFFSET_F22(sp) // Store fs6 + fsd f23, TX_STACK_OFFSET_F23(sp) // Store fs7 + fsd f24, TX_STACK_OFFSET_F24(sp) // Store fs8 + fsd f25, TX_STACK_OFFSET_F25(sp) // Store fs9 + fsd f26, TX_STACK_OFFSET_F26(sp) // Store fs10 + fsd f27, TX_STACK_OFFSET_F27(sp) // Store fs11 +#endif +_tx_thread_preempt_skip_fp_restore: #endif /* Store standard preserved registers. */ - sw x9, 11*4(t0) // Store s1 - sw x18, 10*4(t0) // Store s2 - sw x19, 9*4(t0) // Store s3 - sw x20, 8*4(t0) // Store s4 - sw x21, 7*4(t0) // Store s5 - sw x22, 6*4(t0) // Store s6 - sw x23, 5*4(t0) // Store s7 - sw x24, 4*4(t0) // Store s8 - sw x25, 3*4(t0) // Store s9 - sw x26, 2*4(t0) // Store s10 - sw x27, 1*4(t0) // Store s11 + STORE x9, TX_STACK_OFFSET_X9(sp) // Store s1 + STORE x18, TX_STACK_OFFSET_X18(sp) // Store s2 + STORE x19, TX_STACK_OFFSET_X19(sp) // Store s3 + STORE x20, TX_STACK_OFFSET_X20(sp) // Store s4 + STORE x21, TX_STACK_OFFSET_X21(sp) // Store s5 + STORE x22, TX_STACK_OFFSET_X22(sp) // Store s6 + STORE x23, TX_STACK_OFFSET_X23(sp) // Store s7 + STORE x24, TX_STACK_OFFSET_X24(sp) // Store s8 + STORE x25, TX_STACK_OFFSET_X25(sp) // Store s9 + STORE x26, TX_STACK_OFFSET_X26(sp) // Store s10 + STORE x27, TX_STACK_OFFSET_X27(sp) // Store s11 // Note: s0 is already stored! - + STORE sp, TX_THREAD_STACK_PTR(t1) /* Save the remaining time-slice and disable it. */ /* if (_tx_timer_time_slice) { */ la t0, _tx_timer_time_slice // Pickup time slice variable address - lw t2, 0(t0) // Pickup time slice + LOAD t2, 0(t0) // Pickup time slice beqz t2, _tx_thread_dont_save_ts // If 0, skip time slice processing /* _tx_thread_current_ptr -> tx_thread_time_slice = _tx_timer_time_slice _tx_timer_time_slice = 0; */ - sw t2, 24(t1) // Save current time slice - sw x0, 0(t0) // Clear global time slice + STORE t2, TX_THREAD_TIME_SLICE(t1) // Save current time slice + STORE x0, 0(t0) // Clear global time slice /* } */ @@ -398,9 +385,8 @@ _tx_thread_dont_save_ts: /* Return to the scheduler. */ /* _tx_thread_schedule(); */ - la t0, _tx_thread_current_ptr // Pickup current thread pointer address - sw x0, 0(t0) // Clear current thread pointer - + la t0, _tx_thread_current_ptr + STORE x0, 0(t0) // Clear current thread pointer*/ /* } */ _tx_thread_idle_system_restore: diff --git a/ports/risc-v32/gnu/src/tx_thread_context_save.S b/ports/risc-v32/gnu/src/tx_thread_context_save.S index 664029340..86c4d3c18 100644 --- a/ports/risc-v32/gnu/src/tx_thread_context_save.S +++ b/ports/risc-v32/gnu/src/tx_thread_context_save.S @@ -19,6 +19,8 @@ /**************************************************************************/ /**************************************************************************/ +#include "tx_port.h" + .section .text /**************************************************************************/ /* */ @@ -29,6 +31,7 @@ /* AUTHOR */ /* */ /* Akif Ejaz, 10xEngineers */ +/* Wei-Chen Lai, National Cheng Kung University */ /* */ /* DESCRIPTION */ /* */ @@ -55,202 +58,227 @@ /**************************************************************************/ /* VOID _tx_thread_context_save(VOID) { */ - .global _tx_thread_context_save + .global _tx_thread_context_save _tx_thread_context_save: - /* Upon entry to this routine, RA/x1 has been saved on the stack - and the stack has been already allocated for the entire context: - addi sp, sp, -32*4 (or -65*4) - sw ra, 28*4(sp) - */ + /* Upon entry to this routine, it is assumed that interrupts are locked + out and the interrupt stack fame has been allocated and x1 (ra) has + been saved on the stack. */ + + STORE x5, TX_STACK_OFFSET_X5(sp) // First store t0 and t1 + STORE x6, TX_STACK_OFFSET_X6(sp) - sw t0, 19*4(sp) // Store t0 - sw t1, 18*4(sp) // Store t1 + la x5, _tx_thread_system_state // Pickup address of system state + LOAD x6, 0(x5) // Pickup system state - /* Check for a nested interrupt. */ + /* Check for a nested interrupt condition. */ /* if (_tx_thread_system_state++) { */ + beqz x6, _tx_thread_not_nested_save // If 0, first interrupt condition + addi x6, x6, 1 // Increment the interrupt counter + STORE x6, 0(x5) // Store the interrupt counter + + /* Nested interrupt condition. + Save the reset of the scratch registers on the stack and return to the + calling ISR. */ - la t0, _tx_thread_system_state // Pickup addr of system state var - lw t1, 0(t0) // Pickup system state - addi t1, t1, 1 // Increment system state - sw t1, 0(t0) // Store system state - li t0, 1 - bgt t1, t0, _tx_thread_nested_save // If it's more than 1, nested interrupt - - /* First level interrupt, save the rest of the scratch registers and - check for a thread to preempt. */ - - sw t2, 17*4(sp) // Store t2 - sw s0, 12*4(sp) // Store s0 - sw a0, 27*4(sp) // Store a0 - sw a1, 26*4(sp) // Store a1 - sw a2, 25*4(sp) // Store a2 - sw a3, 24*4(sp) // Store a3 - sw a4, 23*4(sp) // Store a4 - sw a5, 22*4(sp) // Store a5 - sw a6, 21*4(sp) // Store a6 - sw a7, 20*4(sp) // Store a7 - sw t3, 16*4(sp) // Store t3 - sw t4, 15*4(sp) // Store t4 - sw t5, 14*4(sp) // Store t5 - sw t6, 13*4(sp) // Store t6 - - /* Save floating point registers. */ -#if defined(__riscv_float_abi_single) - fsw f0, 31*4(sp) // Store ft0 - fsw f1, 32*4(sp) // Store ft1 - fsw f2, 33*4(sp) // Store ft2 - fsw f3, 34*4(sp) // Store ft3 - fsw f4, 35*4(sp) // Store ft4 - fsw f5, 36*4(sp) // Store ft5 - fsw f6, 37*4(sp) // Store ft6 - fsw f7, 38*4(sp) // Store ft7 - fsw f10, 41*4(sp) // Store fa0 - fsw f11, 42*4(sp) // Store fa1 - fsw f12, 43*4(sp) // Store fa2 - fsw f13, 44*4(sp) // Store fa3 - fsw f14, 45*4(sp) // Store fa4 - fsw f15, 46*4(sp) // Store fa5 - fsw f16, 47*4(sp) // Store fa6 - fsw f17, 48*4(sp) // Store fa7 - fsw f28, 59*4(sp) // Store ft8 - fsw f29, 60*4(sp) // Store ft9 - fsw f30, 61*4(sp) // Store ft10 - fsw f31, 62*4(sp) // Store ft11 + STORE x7, TX_STACK_OFFSET_X7(sp) // Store t2 + STORE x8, TX_STACK_OFFSET_X8(sp) // Store s0 + STORE x10, TX_STACK_OFFSET_X10(sp) // Store a0 + STORE x11, TX_STACK_OFFSET_X11(sp) // Store a1 + STORE x12, TX_STACK_OFFSET_X12(sp) // Store a2 + STORE x13, TX_STACK_OFFSET_X13(sp) // Store a3 + STORE x14, TX_STACK_OFFSET_X14(sp) // Store a4 + STORE x15, TX_STACK_OFFSET_X15(sp) // Store a5 + STORE x16, TX_STACK_OFFSET_X16(sp) // Store a6 + STORE x17, TX_STACK_OFFSET_X17(sp) // Store a7 + STORE x28, TX_STACK_OFFSET_X28(sp) // Store t3 + STORE x29, TX_STACK_OFFSET_X29(sp) // Store t4 + STORE x30, TX_STACK_OFFSET_X30(sp) // Store t5 + STORE x31, TX_STACK_OFFSET_X31(sp) // Store t6 + csrr t0, mepc // Load exception program counter + STORE t0, TX_STACK_OFFSET_MEPC(sp) // Save it on the stack + + /* Save floating point scratch registers. */ +#if defined(__riscv_flen) + csrr t0, mstatus // Pickup thread's floating point state + STORE t0, TX_STACK_OFFSET_MSTATUS(sp) + /* Check the floating point status for lazy FPU. + Invariant: If FS is Off (0), the FP registers are Disabled/Unused + so we can skip saving them. + Note: We strictly save Initial (1), Clean (2), and Dirty (3) states. */ + srli t1, t0, 13 + andi t1, t1, 0x3 + beqz t1, _tx_skip_nested_fpu_save // Skip floating point save if FS is Off +#if (__riscv_flen == 32) + fsw f0, TX_STACK_OFFSET_F0(sp) // Store ft0 + fsw f1, TX_STACK_OFFSET_F1(sp) // Store ft1 + fsw f2, TX_STACK_OFFSET_F2(sp) // Store ft2 + fsw f3, TX_STACK_OFFSET_F3(sp) // Store ft3 + fsw f4, TX_STACK_OFFSET_F4(sp) // Store ft4 + fsw f5, TX_STACK_OFFSET_F5(sp) // Store ft5 + fsw f6, TX_STACK_OFFSET_F6(sp) // Store ft6 + fsw f7, TX_STACK_OFFSET_F7(sp) // Store ft7 + fsw f10,TX_STACK_OFFSET_F10(sp) // Store fa0 + fsw f11,TX_STACK_OFFSET_F11(sp) // Store fa1 + fsw f12,TX_STACK_OFFSET_F12(sp) // Store fa2 + fsw f13,TX_STACK_OFFSET_F13(sp) // Store fa3 + fsw f14,TX_STACK_OFFSET_F14(sp) // Store fa4 + fsw f15,TX_STACK_OFFSET_F15(sp) // Store fa5 + fsw f16,TX_STACK_OFFSET_F16(sp) // Store fa6 + fsw f17,TX_STACK_OFFSET_F17(sp) // Store fa7 + fsw f28,TX_STACK_OFFSET_F28(sp) // Store ft8 + fsw f29,TX_STACK_OFFSET_F29(sp) // Store ft9 + fsw f30,TX_STACK_OFFSET_F30(sp) // Store ft10 + fsw f31,TX_STACK_OFFSET_F31(sp) // Store ft11 csrr t0, fcsr - sw t0, 63*4(sp) // Store fcsr -#elif defined(__riscv_float_abi_double) - fsd f0, 31*4(sp) // Store ft0 - fsd f1, 32*4(sp) // Store ft1 - fsd f2, 33*4(sp) // Store ft2 - fsd f3, 34*4(sp) // Store ft3 - fsd f4, 35*4(sp) // Store ft4 - fsd f5, 36*4(sp) // Store ft5 - fsd f6, 37*4(sp) // Store ft6 - fsd f7, 38*4(sp) // Store ft7 - fsd f10, 41*4(sp) // Store fa0 - fsd f11, 42*4(sp) // Store fa1 - fsd f12, 43*4(sp) // Store fa2 - fsd f13, 44*4(sp) // Store fa3 - fsd f14, 45*4(sp) // Store fa4 - fsd f15, 46*4(sp) // Store fa5 - fsd f16, 47*4(sp) // Store fa6 - fsd f17, 48*4(sp) // Store fa7 - fsd f28, 59*4(sp) // Store ft8 - fsd f29, 60*4(sp) // Store ft9 - fsd f30, 61*4(sp) // Store ft10 - fsd f31, 62*4(sp) // Store ft11 + STORE t0, TX_STACK_OFFSET_FCSR(sp) // Store fcsr +#elif (__riscv_flen == 64) + fsd f0, TX_STACK_OFFSET_F0(sp) // Store ft0 + fsd f1, TX_STACK_OFFSET_F1(sp) // Store ft1 + fsd f2, TX_STACK_OFFSET_F2(sp) // Store ft2 + fsd f3, TX_STACK_OFFSET_F3(sp) // Store ft3 + fsd f4, TX_STACK_OFFSET_F4(sp) // Store ft4 + fsd f5, TX_STACK_OFFSET_F5(sp) // Store ft5 + fsd f6, TX_STACK_OFFSET_F6(sp) // Store ft6 + fsd f7, TX_STACK_OFFSET_F7(sp) // Store ft7 + fsd f10,TX_STACK_OFFSET_F10(sp) // Store fa0 + fsd f11,TX_STACK_OFFSET_F11(sp) // Store fa1 + fsd f12,TX_STACK_OFFSET_F12(sp) // Store fa2 + fsd f13,TX_STACK_OFFSET_F13(sp) // Store fa3 + fsd f14,TX_STACK_OFFSET_F14(sp) // Store fa4 + fsd f15,TX_STACK_OFFSET_F15(sp) // Store fa5 + fsd f16,TX_STACK_OFFSET_F16(sp) // Store fa6 + fsd f17,TX_STACK_OFFSET_F17(sp) // Store fa7 + fsd f28,TX_STACK_OFFSET_F28(sp) // Store ft8 + fsd f29,TX_STACK_OFFSET_F29(sp) // Store ft9 + fsd f30,TX_STACK_OFFSET_F30(sp) // Store ft10 + fsd f31,TX_STACK_OFFSET_F31(sp) // Store ft11 csrr t0, fcsr - sw t0, 63*4(sp) // Store fcsr + STORE t0, TX_STACK_OFFSET_FCSR(sp) // Store fcsr +#endif +_tx_skip_nested_fpu_save: #endif - csrr t0, mepc - sw t0, 30*4(sp) // Save it on the stack +#ifdef TX_ENABLE_EXECUTION_CHANGE_NOTIFY + call _tx_execution_isr_enter // Call the ISR execution enter function +#endif - /* Save mstatus. */ - csrr t0, mstatus - sw t0, 29*4(sp) + ret // Return to calling ISR - la t1, _tx_thread_current_ptr // Pickup address of current thread ptr - lw t2, 0(t1) // Pickup current thread pointer - beqz t2, _tx_thread_idle_system_save // If NULL, idle system was interrupted +_tx_thread_not_nested_save: + /* } */ - /* Save the current thread's stack pointer and switch to the system stack. */ - /* _tx_thread_current_ptr -> tx_thread_stack_ptr = sp; - sp = _tx_thread_system_stack_ptr; */ + /* Otherwise, not nested, check to see if a thread was running. */ + /* else if (_tx_thread_current_ptr) + { */ + addi x6, x6, 1 // Increment the interrupt counter + STORE x6, 0(x5) // Store the interrupt counter - sw sp, 8(t2) // Save stack pointer - la t0, _tx_thread_system_stack_ptr - lw sp, 0(t0) // Switch to system stack + /* Not nested: Find the user thread that was running and load our SP */ - /* Call the ISR execution exit function if enabled. */ -#ifdef TX_ENABLE_EXECUTION_CHANGE_NOTIFY - call _tx_execution_isr_enter // Call the ISR execution enter function -#endif + LOAD x5, _tx_thread_current_ptr // Pickup current thread pointer + beqz x5, _tx_thread_idle_system_save // If NULL, idle system was interrupted - ret // Return to ISR - -_tx_thread_nested_save: - - /* Nested interrupt! Just save the scratch registers and return to the ISR. */ - - sw t2, 17*4(sp) // Store t2 - sw s0, 12*4(sp) // Store s0 - sw a0, 27*4(sp) // Store a0 - sw a1, 26*4(sp) // Store a1 - sw a2, 25*4(sp) // Store a2 - sw a3, 24*4(sp) // Store a3 - sw a4, 23*4(sp) // Store a4 - sw a5, 22*4(sp) // Store a5 - sw a6, 21*4(sp) // Store a6 - sw a7, 20*4(sp) // Store a7 - sw t3, 16*4(sp) // Store t3 - sw t4, 15*4(sp) // Store t4 - sw t5, 14*4(sp) // Store t5 - sw t6, 13*4(sp) // Store t6 - - /* Save floating point registers. */ -#if defined(__riscv_float_abi_single) - fsw f0, 31*4(sp) // Store ft0 - fsw f1, 32*4(sp) // Store ft1 - fsw f2, 33*4(sp) // Store ft2 - fsw f3, 34*4(sp) // Store ft3 - fsw f4, 35*4(sp) // Store ft4 - fsw f5, 36*4(sp) // Store ft5 - fsw f6, 37*4(sp) // Store ft6 - fsw f7, 38*4(sp) // Store ft7 - fsw f10, 41*4(sp) // Store fa0 - fsw f11, 42*4(sp) // Store fa1 - fsw f12, 43*4(sp) // Store fa2 - fsw f13, 44*4(sp) // Store fa3 - fsw f14, 45*4(sp) // Store fa4 - fsw f15, 46*4(sp) // Store fa5 - fsw f16, 47*4(sp) // Store fa6 - fsw f17, 48*4(sp) // Store fa7 - fsw f28, 59*4(sp) // Store ft8 - fsw f29, 60*4(sp) // Store ft9 - fsw f30, 61*4(sp) // Store ft10 - fsw f31, 62*4(sp) // Store ft11 + /* Save the standard scratch registers. */ + + STORE x7, TX_STACK_OFFSET_X7(sp) // Store t2 + STORE x8, TX_STACK_OFFSET_X8(sp) // Store s0 + STORE x10, TX_STACK_OFFSET_X10(sp) // Store a0 + STORE x11, TX_STACK_OFFSET_X11(sp) // Store a1 + STORE x12, TX_STACK_OFFSET_X12(sp) // Store a2 + STORE x13, TX_STACK_OFFSET_X13(sp) // Store a3 + STORE x14, TX_STACK_OFFSET_X14(sp) // Store a4 + STORE x15, TX_STACK_OFFSET_X15(sp) // Store a5 + STORE x16, TX_STACK_OFFSET_X16(sp) // Store a6 + STORE x17, TX_STACK_OFFSET_X17(sp) // Store a7 + STORE x28, TX_STACK_OFFSET_X28(sp) // Store t3 + STORE x29, TX_STACK_OFFSET_X29(sp) // Store t4 + STORE x30, TX_STACK_OFFSET_X30(sp) // Store t5 + STORE x31, TX_STACK_OFFSET_X31(sp) // Store t6 + + csrr t0, mepc // Load exception program counter + STORE t0, TX_STACK_OFFSET_MEPC(sp) // Save it on the stack + + /* Save floating point scratch registers. */ +#if defined(__riscv_flen) + csrr t0, mstatus // Pickup thread's floating point state + STORE t0, TX_STACK_OFFSET_MSTATUS(sp) + /* Check the floating point status for lazy FPU*/ + srli t1, t0, 13 + andi t1, t1, 0x3 + beqz t1, _tx_skip_not_nested_fpu_save // Skip floating point save FS is Off +#if (__riscv_flen == 32) + fsw f0, TX_STACK_OFFSET_F0(sp) // Store ft0 + fsw f1, TX_STACK_OFFSET_F1(sp) // Store ft1 + fsw f2, TX_STACK_OFFSET_F2(sp) // Store ft2 + fsw f3, TX_STACK_OFFSET_F3(sp) // Store ft3 + fsw f4, TX_STACK_OFFSET_F4(sp) // Store ft4 + fsw f5, TX_STACK_OFFSET_F5(sp) // Store ft5 + fsw f6, TX_STACK_OFFSET_F6(sp) // Store ft6 + fsw f7, TX_STACK_OFFSET_F7(sp) // Store ft7 + fsw f10,TX_STACK_OFFSET_F10(sp) // Store fa0 + fsw f11,TX_STACK_OFFSET_F11(sp) // Store fa1 + fsw f12,TX_STACK_OFFSET_F12(sp) // Store fa2 + fsw f13,TX_STACK_OFFSET_F13(sp) // Store fa3 + fsw f14,TX_STACK_OFFSET_F14(sp) // Store fa4 + fsw f15,TX_STACK_OFFSET_F15(sp) // Store fa5 + fsw f16,TX_STACK_OFFSET_F16(sp) // Store fa6 + fsw f17,TX_STACK_OFFSET_F17(sp) // Store fa7 + fsw f28,TX_STACK_OFFSET_F28(sp) // Store ft8 + fsw f29,TX_STACK_OFFSET_F29(sp) // Store ft9 + fsw f30,TX_STACK_OFFSET_F30(sp) // Store ft10 + fsw f31,TX_STACK_OFFSET_F31(sp) // Store ft11 csrr t0, fcsr - sw t0, 63*4(sp) // Store fcsr -#elif defined(__riscv_float_abi_double) - fsd f0, 31*4(sp) // Store ft0 - fsd f1, 32*4(sp) // Store ft1 - fsd f2, 33*4(sp) // Store ft2 - fsd f3, 34*4(sp) // Store ft3 - fsd f4, 35*4(sp) // Store ft4 - fsd f5, 36*4(sp) // Store ft5 - fsd f6, 37*4(sp) // Store ft6 - fsd f7, 38*4(sp) // Store ft7 - fsd f10, 41*4(sp) // Store fa0 - fsd f11, 42*4(sp) // Store fa1 - fsd f12, 43*4(sp) // Store fa2 - fsd f13, 44*4(sp) // Store fa3 - fsd f14, 45*4(sp) // Store fa4 - fsd f15, 46*4(sp) // Store fa5 - fsd f16, 47*4(sp) // Store fa6 - fsd f17, 48*4(sp) // Store fa7 - fsd f28, 59*4(sp) // Store ft8 - fsd f29, 60*4(sp) // Store ft9 - fsd f30, 61*4(sp) // Store ft10 - fsd f31, 62*4(sp) // Store ft11 + STORE t0, TX_STACK_OFFSET_FCSR(sp) // Store fcsr +#elif (__riscv_flen == 64) + fsd f0, TX_STACK_OFFSET_F0(sp) // Store ft0 + fsd f1, TX_STACK_OFFSET_F1(sp) // Store ft1 + fsd f2, TX_STACK_OFFSET_F2(sp) // Store ft2 + fsd f3, TX_STACK_OFFSET_F3(sp) // Store ft3 + fsd f4, TX_STACK_OFFSET_F4(sp) // Store ft4 + fsd f5, TX_STACK_OFFSET_F5(sp) // Store ft5 + fsd f6, TX_STACK_OFFSET_F6(sp) // Store ft6 + fsd f7, TX_STACK_OFFSET_F7(sp) // Store ft7 + fsd f10,TX_STACK_OFFSET_F10(sp) // Store fa0 + fsd f11,TX_STACK_OFFSET_F11(sp) // Store fa1 + fsd f12,TX_STACK_OFFSET_F12(sp) // Store fa2 + fsd f13,TX_STACK_OFFSET_F13(sp) // Store fa3 + fsd f14,TX_STACK_OFFSET_F14(sp) // Store fa4 + fsd f15,TX_STACK_OFFSET_F15(sp) // Store fa5 + fsd f16,TX_STACK_OFFSET_F16(sp) // Store fa6 + fsd f17,TX_STACK_OFFSET_F17(sp) // Store fa7 + fsd f28,TX_STACK_OFFSET_F28(sp) // Store ft8 + fsd f29,TX_STACK_OFFSET_F29(sp) // Store ft9 + fsd f30,TX_STACK_OFFSET_F30(sp) // Store ft10 + fsd f31,TX_STACK_OFFSET_F31(sp) // Store ft11 csrr t0, fcsr - sw t0, 63*4(sp) // Store fcsr + STORE t0, TX_STACK_OFFSET_FCSR(sp) // Store fcsr #endif +_tx_skip_not_nested_fpu_save: +#endif + + /* Save the current stack pointer in the thread's control block. */ + /* _tx_thread_current_ptr -> tx_thread_stack_ptr = sp; */ - csrr t0, mepc - sw t0, 30*4(sp) // Save it on stack + /* Switch to the system stack. */ + /* sp = _tx_thread_system_stack_ptr; */ - csrr t0, mstatus - sw t0, 29*4(sp) + LOAD t1, _tx_thread_current_ptr // Pickup current thread pointer + STORE sp, TX_THREAD_STACK_PTR(t1) // Save stack pointer - /* Call the ISR execution exit function if enabled. */ #ifdef TX_ENABLE_EXECUTION_CHANGE_NOTIFY - call _tx_execution_isr_enter // Call the ISR execution enter function + /* _tx_execution_isr_enter is called with thread stack pointer */ + call _tx_execution_isr_enter // Call the ISR execution enter function #endif - ret // Return to ISR + + LOAD sp, _tx_thread_system_stack_ptr // Switch to system stack + ret // Return to calling ISR + + /* } + else + { */ _tx_thread_idle_system_save: @@ -263,9 +291,9 @@ _tx_thread_idle_system_save: /* } } */ -#if defined(__riscv_float_abi_single) || defined(__riscv_float_abi_double) - addi sp, sp, 65*4 // Recover stack frame - with floating point enabled +#if defined(__riscv_flen) && ((__riscv_flen == 32) || (__riscv_flen == 64)) + addi sp, sp, TX_THREAD_FRAME_SIZE_FPU // Recover stack frame - with floating point enabled #else - addi sp, sp, 32*4 // Recover the reserved stack space + addi sp, sp, TX_THREAD_FRAME_SIZE_INT // Recover the reserved stack space #endif - ret // Return to calling ISR + ret // Return to calling ISR diff --git a/ports/risc-v32/gnu/src/tx_thread_interrupt_control.S b/ports/risc-v32/gnu/src/tx_thread_interrupt_control.S index aab2955b5..b38b4e0f2 100644 --- a/ports/risc-v32/gnu/src/tx_thread_interrupt_control.S +++ b/ports/risc-v32/gnu/src/tx_thread_interrupt_control.S @@ -19,6 +19,8 @@ /**************************************************************************/ /**************************************************************************/ + RETURN_MASK = 0x0000000F + SET_SR_MASK = 0xFFFFFFF0 .section .text /**************************************************************************/ @@ -30,6 +32,7 @@ /* AUTHOR */ /* */ /* Akif Ejaz, 10xEngineers */ +/* Wei-Chen Lai, National Cheng Kung University */ /* */ /* DESCRIPTION */ /* */ @@ -57,32 +60,17 @@ { */ .global _tx_thread_interrupt_control _tx_thread_interrupt_control: + /* Pickup current interrupt lockout posture. */ - /* Pickup current interrupt posture. */ + csrr t0, mstatus + mv t1, t0 // Save original mstatus for return - csrr a1, mstatus // Pickup mstatus - andi a1, a1, 0x08 // Mask out all but MIE - - /* Check for the new posture. */ - - beqz a0, _tx_thread_interrupt_disable // If 0, disable interrupts - - /* Enable interrupts. */ - - csrsi mstatus, 0x08 // Enable interrupts (MIE bit 3) - j _tx_thread_interrupt_control_exit // Return to caller - -_tx_thread_interrupt_disable: - - /* Disable interrupts. */ - - csrci mstatus, 0x08 // Disable interrupts (MIE bit 3) - -_tx_thread_interrupt_control_exit: - - /* Return the old interrupt posture. */ - - mv a0, a1 // Setup return value - ret // Return to caller + /* Apply the new interrupt posture. */ + li t2, SET_SR_MASK // Build set SR mask + and t0, t0, t2 // Isolate interrupt lockout bits + or t0, t0, a0 // Put new lockout bits in + csrw mstatus, t0 + andi a0, t1, RETURN_MASK // Return original mstatus. + ret /* } */ diff --git a/ports/risc-v32/gnu/src/tx_thread_schedule.S b/ports/risc-v32/gnu/src/tx_thread_schedule.S index 1c235a2d2..09c356626 100644 --- a/ports/risc-v32/gnu/src/tx_thread_schedule.S +++ b/ports/risc-v32/gnu/src/tx_thread_schedule.S @@ -19,6 +19,7 @@ /**************************************************************************/ /**************************************************************************/ +#include "tx_port.h" .section .text /**************************************************************************/ @@ -30,6 +31,7 @@ /* AUTHOR */ /* */ /* Akif Ejaz, 10xEngineers */ +/* Wei-Chen Lai, National Cheng Kung University */ /* */ /* DESCRIPTION */ /* */ @@ -62,256 +64,258 @@ _tx_thread_schedule: /* Enable interrupts. */ - - csrsi mstatus, 0x08 // Enable interrupts (MIE bit 3) + csrsi mstatus, 0x08 // Enable interrupts /* Wait for a thread to execute. */ /* do { */ -_tx_thread_schedule_loop: la t0, _tx_thread_execute_ptr // Pickup address of execute ptr - lw t1, 0(t0) // Pickup execute pointer - bnez t1, _tx_thread_ready_to_run // If non-NULL, a thread is ready to run - -#ifndef TX_NO_WFI +_tx_thread_schedule_loop: + LOAD t1, 0(t0) // Pickup next thread to execute + bnez t1, _tx_thread_ready // If not NULL, break loop wfi // Wait for interrupt -#endif - j _tx_thread_schedule_loop // Check again + j _tx_thread_schedule_loop // Wait for interrupt - /* } - while (_tx_thread_execute_ptr == TX_NULL); */ +_tx_thread_ready: -_tx_thread_ready_to_run: - - /* At this point, t1 contains the pointer to the thread to execute. - Lockout interrupts. */ - - csrci mstatus, 0x08 // Disable interrupts (MIE bit 3) - - /* Check _tx_thread_execute_ptr again, in case an interrupt occurred - between the check and the disable. */ + /* } + while(_tx_thread_execute_ptr == TX_NULL); */ - lw t1, 0(t0) // Pickup execute pointer - beqz t1, _tx_thread_schedule_loop // If NULL, go back to wait loop + /* Yes! We have a thread to execute. Lockout interrupts and + transfer control to it. */ + csrci mstatus, 0x08 // Lockout interrupts - /* Yes! We have a thread to execute. */ + /* Setup the current thread pointer. */ /* _tx_thread_current_ptr = _tx_thread_execute_ptr; */ - la t0, _tx_thread_current_ptr // Pickup address of current thread - sw t1, 0(t0) // Setup current thread pointer + la t0, _tx_thread_current_ptr // Pickup current thread pointer address + STORE t1, 0(t0) // Set current thread pointer /* Increment the run count for this thread. */ /* _tx_thread_current_ptr -> tx_thread_run_count++; */ - lw t2, 4(t1) // Pickup run count + LOAD t2, TX_THREAD_RUN_COUNT(t1) // Pickup run count + LOAD t3, TX_THREAD_TIME_SLICE(t1) // Pickup time slice value addi t2, t2, 1 // Increment run count - sw t2, 4(t1) // Store run count + STORE t2, TX_THREAD_RUN_COUNT(t1) // Store new run count - /* Setup time-slice values. */ + /* Setup time-slice, if present. */ /* _tx_timer_time_slice = _tx_thread_current_ptr -> tx_thread_time_slice; */ - lw t2, 24(t1) // Pickup thread time-slice - la t3, _tx_timer_time_slice // Pickup address of time-slice - sw t2, 0(t3) // Setup time-slice + la t2, _tx_timer_time_slice // Pickup time-slice variable address + + /* Switch to the thread's stack. */ + /* SP = _tx_thread_execute_ptr -> tx_thread_stack_ptr; */ + + LOAD sp, TX_THREAD_STACK_PTR(t1) // Switch to thread's stack + STORE t3, 0(t2) // Store new time-slice*/ - /* Call the thread execution enter function if enabled. */ #ifdef TX_ENABLE_EXECUTION_CHANGE_NOTIFY call _tx_execution_thread_enter // Call the thread execution enter function #endif - /* Switch to the thread's stack. */ - /* sp = _tx_thread_current_ptr -> tx_thread_stack_ptr; */ - - lw sp, 8(t1) // Switch to thread stack - - /* Determine the type of stack frame. */ - /* if (*sp) - { */ - - lw t0, 0(sp) // Pickup stack type - beqz t0, _tx_thread_solicited_return // If 0, solicited return - - /* Recover floating point registers. */ -#if defined(__riscv_float_abi_single) - flw f0, 31*4(sp) // Recover ft0 - flw f1, 32*4(sp) // Recover ft1 - flw f2, 33*4(sp) // Recover ft2 - flw f3, 34*4(sp) // Recover ft3 - flw f4, 35*4(sp) // Recover ft4 - flw f5, 36*4(sp) // Recover ft5 - flw f6, 37*4(sp) // Recover ft6 - flw f7, 38*4(sp) // Recover ft7 - flw f8, 39*4(sp) // Recover fs0 - flw f9, 40*4(sp) // Recover fs1 - flw f10, 41*4(sp) // Recover fa0 - flw f11, 42*4(sp) // Recover fa1 - flw f12, 43*4(sp) // Recover fa2 - flw f13, 44*4(sp) // Recover fa3 - flw f14, 45*4(sp) // Recover fa4 - flw f15, 46*4(sp) // Recover fa5 - flw f16, 47*4(sp) // Recover fa6 - flw f17, 48*4(sp) // Recover fa7 - flw f18, 49*4(sp) // Recover fs2 - flw f19, 50*4(sp) // Recover fs3 - flw f20, 51*4(sp) // Recover fs4 - flw f21, 52*4(sp) // Recover fs5 - flw f22, 53*4(sp) // Recover fs6 - flw f23, 54*4(sp) // Recover fs7 - flw f24, 55*4(sp) // Recover fs8 - flw f25, 56*4(sp) // Recover fs9 - flw f26, 57*4(sp) // Recover fs10 - flw f27, 58*4(sp) // Recover fs11 - flw f28, 59*4(sp) // Recover ft8 - flw f29, 60*4(sp) // Recover ft9 - flw f30, 61*4(sp) // Recover ft10 - flw f31, 62*4(sp) // Recover ft11 - lw t0, 63*4(sp) // Recover fcsr - csrw fcsr, t0 // Restore fcsr -#elif defined(__riscv_float_abi_double) - fld f0, 31*4(sp) // Recover ft0 - fld f1, 32*4(sp) // Recover ft1 - fld f2, 33*4(sp) // Recover ft2 - fld f3, 34*4(sp) // Recover ft3 - fld f4, 35*4(sp) // Recover ft4 - fld f5, 36*4(sp) // Recover ft5 - fld f6, 37*4(sp) // Recover ft6 - fld f7, 38*4(sp) // Recover ft7 - fld f8, 39*4(sp) // Recover fs0 - fld f9, 40*4(sp) // Recover fs1 - fld f10, 41*4(sp) // Recover fa0 - fld f11, 42*4(sp) // Recover fa1 - fld f12, 43*4(sp) // Recover fa2 - fld f13, 44*4(sp) // Recover fa3 - fld f14, 45*4(sp) // Recover fa4 - fld f15, 46*4(sp) // Recover fa5 - fld f16, 47*4(sp) // Recover fa6 - fld f17, 48*4(sp) // Recover fa7 - fld f18, 49*4(sp) // Recover fs2 - fld f19, 50*4(sp) // Recover fs3 - fld f20, 51*4(sp) // Recover fs4 - fld f21, 52*4(sp) // Recover fs5 - fld f22, 53*4(sp) // Recover fs6 - fld f23, 54*4(sp) // Recover fs7 - fld f24, 55*4(sp) // Recover fs8 - fld f25, 56*4(sp) // Recover fs9 - fld f26, 57*4(sp) // Recover fs10 - fld f27, 58*4(sp) // Recover fs11 - fld f28, 59*4(sp) // Recover ft8 - fld f29, 60*4(sp) // Recover ft9 - fld f30, 61*4(sp) // Recover ft10 - fld f31, 62*4(sp) // Recover ft11 - lw t0, 63*4(sp) // Recover fcsr - csrw fcsr, t0 // Restore fcsr + /* Determine if an interrupt frame or a synchronous task suspension frame + is present. */ + + LOAD t2, 0(sp) // Pickup stack type + beqz t2, _tx_thread_synch_return // If 0, solicited thread return + + /* Determine if floating point registers need to be recovered. */ + +#if defined(__riscv_flen) + LOAD t1, TX_STACK_OFFSET_MSTATUS(t0) // Pickup thread's floating point state */ + /* Check if floating point is enabled */ + srli t1, t1, 13 + andi t1, t1, 0x3 + beqz t1, _tx_thread_schedule_skip_fp_restore // Skip floating point restore FS is Off +#if __riscv_flen == 32 + flw f0, TX_STACK_OFFSET_F0(sp) // Recover ft0 + flw f1, TX_STACK_OFFSET_F1(sp) // Recover ft1 + flw f2, TX_STACK_OFFSET_F2(sp) // Recover ft2 + flw f3, TX_STACK_OFFSET_F3(sp) // Recover ft3 + flw f4, TX_STACK_OFFSET_F4(sp) // Recover ft4 + flw f5, TX_STACK_OFFSET_F5(sp) // Recover ft5 + flw f6, TX_STACK_OFFSET_F6(sp) // Recover ft6 + flw f7, TX_STACK_OFFSET_F7(sp) // Recover ft7 + flw f8, TX_STACK_OFFSET_F8(sp) // Recover fs0 + flw f9, TX_STACK_OFFSET_F9(sp) // Recover fs1 + flw f10,TX_STACK_OFFSET_F10(sp) // Recover fa0 + flw f11,TX_STACK_OFFSET_F11(sp) // Recover fa1 + flw f12,TX_STACK_OFFSET_F12(sp) // Recover fa2 + flw f13,TX_STACK_OFFSET_F13(sp) // Recover fa3 + flw f14,TX_STACK_OFFSET_F14(sp) // Recover fa4 + flw f15,TX_STACK_OFFSET_F15(sp) // Recover fa5 + flw f16,TX_STACK_OFFSET_F16(sp) // Recover fa6 + flw f17,TX_STACK_OFFSET_F17(sp) // Recover fa7 + flw f18,TX_STACK_OFFSET_F18(sp) // Recover fs2 + flw f19,TX_STACK_OFFSET_F19(sp) // Recover fs3 + flw f20,TX_STACK_OFFSET_F20(sp) // Recover fs4 + flw f21,TX_STACK_OFFSET_F21(sp) // Recover fs5 + flw f22,TX_STACK_OFFSET_F22(sp) // Recover fs6 + flw f23,TX_STACK_OFFSET_F23(sp) // Recover fs7 + flw f24,TX_STACK_OFFSET_F24(sp) // Recover fs8 + flw f25,TX_STACK_OFFSET_F25(sp) // Recover fs9 + flw f26,TX_STACK_OFFSET_F26(sp) // Recover fs10 + flw f27,TX_STACK_OFFSET_F27(sp) // Recover fs11 + flw f28,TX_STACK_OFFSET_F28(sp) // Recover ft8 + flw f29,TX_STACK_OFFSET_F29(sp) // Recover ft9 + flw f30,TX_STACK_OFFSET_F30(sp) // Recover ft10 + flw f31,TX_STACK_OFFSET_F31(sp) // Recover ft11 + LOAD t0, TX_STACK_OFFSET_FCSR(sp) // Recover fcsr + csrw fcsr, t0 // +#elif __riscv_flen == 64 + fld f0, TX_STACK_OFFSET_F0(sp) // Recover ft0 + fld f1, TX_STACK_OFFSET_F1(sp) // Recover ft1 + fld f2, TX_STACK_OFFSET_F2(sp) // Recover ft2 + fld f3, TX_STACK_OFFSET_F3(sp) // Recover ft3 + fld f4, TX_STACK_OFFSET_F4(sp) // Recover ft4 + fld f5, TX_STACK_OFFSET_F5(sp) // Recover ft5 + fld f6, TX_STACK_OFFSET_F6(sp) // Recover ft6 + fld f7, TX_STACK_OFFSET_F7(sp) // Recover ft7 + fld f8, TX_STACK_OFFSET_F8(sp) // Recover fs0 + fld f9, TX_STACK_OFFSET_F9(sp) // Recover fs1 + fld f10,TX_STACK_OFFSET_F10(sp) // Recover fa0 + fld f11,TX_STACK_OFFSET_F11(sp) // Recover fa1 + fld f12,TX_STACK_OFFSET_F12(sp) // Recover fa2 + fld f13,TX_STACK_OFFSET_F13(sp) // Recover fa3 + fld f14,TX_STACK_OFFSET_F14(sp) // Recover fa4 + fld f15,TX_STACK_OFFSET_F15(sp) // Recover fa5 + fld f16,TX_STACK_OFFSET_F16(sp) // Recover fa6 + fld f17,TX_STACK_OFFSET_F17(sp) // Recover fa7 + fld f18,TX_STACK_OFFSET_F18(sp) // Recover fs2 + fld f19,TX_STACK_OFFSET_F19(sp) // Recover fs3 + fld f20,TX_STACK_OFFSET_F20(sp) // Recover fs4 + fld f21,TX_STACK_OFFSET_F21(sp) // Recover fs5 + fld f22,TX_STACK_OFFSET_F22(sp) // Recover fs6 + fld f23,TX_STACK_OFFSET_F23(sp) // Recover fs7 + fld f24,TX_STACK_OFFSET_F24(sp) // Recover fs8 + fld f25,TX_STACK_OFFSET_F25(sp) // Recover fs9 + fld f26,TX_STACK_OFFSET_F26(sp) // Recover fs10 + fld f27,TX_STACK_OFFSET_F27(sp) // Recover fs11 + fld f28,TX_STACK_OFFSET_F28(sp) // Recover ft8 + fld f29,TX_STACK_OFFSET_F29(sp) // Recover ft9 + fld f30,TX_STACK_OFFSET_F30(sp) // Recover ft10 + fld f31,TX_STACK_OFFSET_F31(sp) // Recover ft11 + LOAD t0, TX_STACK_OFFSET_FCSR(sp) // Recover fcsr + csrw fcsr, t0 // +#endif +_tx_thread_schedule_skip_fp_restore: #endif /* Recover standard registers. */ - lw t0, 30*4(sp) // Recover mepc - csrw mepc, t0 // Setup mepc - - li t0, 0x1880 // Prepare mstatus: MPP=Machine(0x1800) | MPIE(0x80) -#if defined(__riscv_float_abi_single) || defined(__riscv_float_abi_double) - li t1, 0x2000 // Set FS bits for FP state - or t0, t0, t1 + LOAD t0, TX_STACK_OFFSET_MEPC(sp) // Recover mepc + csrw mepc, t0 // Store mepc + li t0, 0x1880 // Prepare MPIP +#if defined(__riscv_flen) && ((__riscv_flen == 32) || (__riscv_flen == 64)) + li t1, 1<<13 + or t0, t1, t0 #endif - csrw mstatus, t0 // Set mstatus - - lw ra, 28*4(sp) // Recover return address - lw t0, 19*4(sp) // Recover t0 - lw t1, 18*4(sp) // Recover t1 - lw t2, 17*4(sp) // Recover t2 - lw s0, 12*4(sp) // Recover s0 - lw x9, 11*4(sp) // Recover s1 - lw a0, 27*4(sp) // Recover a0 - lw a1, 26*4(sp) // Recover a1 - lw a2, 25*4(sp) // Recover a2 - lw a3, 24*4(sp) // Recover a3 - lw a4, 23*4(sp) // Recover a4 - lw a5, 22*4(sp) // Recover a5 - lw a6, 21*4(sp) // Recover a6 - lw a7, 20*4(sp) // Recover a7 - lw t3, 16*4(sp) // Recover t3 - lw t4, 15*4(sp) // Recover t4 - lw t5, 14*4(sp) // Recover t5 - lw t6, 13*4(sp) // Recover t6 - lw x18, 10*4(sp) // Recover s2 - lw x19, 9*4(sp) // Recover s3 - lw x20, 8*4(sp) // Recover s4 - lw x21, 7*4(sp) // Recover s5 - lw x22, 6*4(sp) // Recover s6 - lw x23, 5*4(sp) // Recover s7 - lw x24, 4*4(sp) // Recover s8 - lw x25, 3*4(sp) // Recover s9 - lw x26, 2*4(sp) // Recover s10 - lw x27, 1*4(sp) // Recover s11 - -#if defined(__riscv_float_abi_single) || defined(__riscv_float_abi_double) - addi sp, sp, 65*4 // Recover stack frame - with floating point enabled + csrw mstatus, t0 // Enable MPIP + + LOAD x1, TX_STACK_OFFSET_X1(sp) // Recover RA + LOAD x5, TX_STACK_OFFSET_X5(sp) // Recover t0 + LOAD x6, TX_STACK_OFFSET_X6(sp) // Recover t1 + LOAD x7, TX_STACK_OFFSET_X7(sp) // Recover t2 + LOAD x8, TX_STACK_OFFSET_X8(sp) // Recover s0 + LOAD x9, TX_STACK_OFFSET_X9(sp) // Recover s1 + LOAD x10, TX_STACK_OFFSET_X10(sp) // Recover a0 + LOAD x11, TX_STACK_OFFSET_X11(sp) // Recover a1 + LOAD x12, TX_STACK_OFFSET_X12(sp) // Recover a2 + LOAD x13, TX_STACK_OFFSET_X13(sp) // Recover a3 + LOAD x14, TX_STACK_OFFSET_X14(sp) // Recover a4 + LOAD x15, TX_STACK_OFFSET_X15(sp) // Recover a5 + LOAD x16, TX_STACK_OFFSET_X16(sp) // Recover a6 + LOAD x17, TX_STACK_OFFSET_X17(sp) // Recover a7 + LOAD x18, TX_STACK_OFFSET_X18(sp) // Recover s2 + LOAD x19, TX_STACK_OFFSET_X19(sp) // Recover s3 + LOAD x20, TX_STACK_OFFSET_X20(sp) // Recover s4 + LOAD x21, TX_STACK_OFFSET_X21(sp) // Recover s5 + LOAD x22, TX_STACK_OFFSET_X22(sp) // Recover s6 + LOAD x23, TX_STACK_OFFSET_X23(sp) // Recover s7 + LOAD x24, TX_STACK_OFFSET_X24(sp) // Recover s8 + LOAD x25, TX_STACK_OFFSET_X25(sp) // Recover s9 + LOAD x26, TX_STACK_OFFSET_X26(sp) // Recover s10 + LOAD x27, TX_STACK_OFFSET_X27(sp) // Recover s11 + LOAD x28, TX_STACK_OFFSET_X28(sp) // Recover t3 + LOAD x29, TX_STACK_OFFSET_X29(sp) // Recover t4 + LOAD x30, TX_STACK_OFFSET_X30(sp) // Recover t5 + LOAD x31, TX_STACK_OFFSET_X31(sp) // Recover t6 + +#if defined(__riscv_flen) && ((__riscv_flen == 32) || (__riscv_flen == 64)) + addi sp, sp, TX_THREAD_FRAME_SIZE_FPU // Recover stack frame - with floating point registers #else - addi sp, sp, 32*4 // Recover stack frame - without floating point enabled + addi sp, sp, TX_THREAD_FRAME_SIZE_INT // Recover stack frame - without floating point registers +#endif + mret // Return to point of interrupt + +_tx_thread_synch_return: + +#if defined(__riscv_flen) + LOAD t1, TX_STACK_OFFSET_MSTATUS(t0) // Pickup thread's floating point state */ + /* Check if floating point is enabled. + Invariant: If FS is Off (0), skip restore. */ + srli t1, t1, 13 + andi t1, t1, 0x3 + beqz t1, _tx_thread_synch_return_skip_fp_restore // Skip floating point restore FS is Off +#if __riscv_flen == 32 + flw f8, TX_STACK_OFFSET_F8(sp) // Recover fs0 + flw f9, TX_STACK_OFFSET_F9(sp) // Recover fs1 + flw f18, TX_STACK_OFFSET_F18(sp) // Recover fs2 + flw f19, TX_STACK_OFFSET_F19(sp) // Recover fs3 + flw f20, TX_STACK_OFFSET_F20(sp) // Recover fs4 + flw f21, TX_STACK_OFFSET_F21(sp) // Recover fs5 + flw f22, TX_STACK_OFFSET_F22(sp) // Recover fs6 + flw f23, TX_STACK_OFFSET_F23(sp) // Recover fs7 + flw f24, TX_STACK_OFFSET_F24(sp) // Recover fs8 + flw f25, TX_STACK_OFFSET_F25(sp) // Recover fs9 + flw f26, TX_STACK_OFFSET_F26(sp) // Recover fs10 + flw f27, TX_STACK_OFFSET_F27(sp) // Recover fs11 + LOAD t0, TX_STACK_OFFSET_FCSR(sp) // Recover fcsr + csrw fcsr, t0 // +#elif __riscv_flen == 64 + fld f8, TX_STACK_OFFSET_F8(sp) // Recover fs0 + fld f9, TX_STACK_OFFSET_F9(sp) // Recover fs1 + fld f18, TX_STACK_OFFSET_F18(sp) // Recover fs2 + fld f19, TX_STACK_OFFSET_F19(sp) // Recover fs3 + fld f20, TX_STACK_OFFSET_F20(sp) // Recover fs4 + fld f21, TX_STACK_OFFSET_F21(sp) // Recover fs5 + fld f22, TX_STACK_OFFSET_F22(sp) // Recover fs6 + fld f23, TX_STACK_OFFSET_F23(sp) // Recover fs7 + fld f24, TX_STACK_OFFSET_F24(sp) // Recover fs8 + fld f25, TX_STACK_OFFSET_F25(sp) // Recover fs9 + fld f26, TX_STACK_OFFSET_F26(sp) // Recover fs10 + fld f27, TX_STACK_OFFSET_F27(sp) // Recover fs11 + LOAD t0, TX_STACK_OFFSET_FCSR(sp) // Recover fcsr + csrw fcsr, t0 // #endif - mret // Return to thread - -_tx_thread_solicited_return: - - /* Recover floating point registers. */ -#if defined(__riscv_float_abi_single) - flw f8, 15*4(sp) // Recover fs0 - flw f9, 16*4(sp) // Recover fs1 - flw f18, 17*4(sp) // Recover fs2 - flw f19, 18*4(sp) // Recover fs3 - flw f20, 19*4(sp) // Recover fs4 - flw f21, 20*4(sp) // Recover fs5 - flw f22, 21*4(sp) // Recover fs6 - flw f23, 22*4(sp) // Recover fs7 - flw f24, 23*4(sp) // Recover fs8 - flw f25, 24*4(sp) // Recover fs9 - flw f26, 25*4(sp) // Recover fs10 - flw f27, 26*4(sp) // Recover fs11 - lw t0, 27*4(sp) // Recover fcsr - csrw fcsr, t0 // Restore fcsr -#elif defined(__riscv_float_abi_double) - fld f8, 15*4(sp) // Recover fs0 - fld f9, 16*4(sp) // Recover fs1 - fld f18, 17*4(sp) // Recover fs2 - fld f19, 18*4(sp) // Recover fs3 - fld f20, 19*4(sp) // Recover fs4 - fld f21, 20*4(sp) // Recover fs5 - fld f22, 21*4(sp) // Recover fs6 - fld f23, 22*4(sp) // Recover fs7 - fld f24, 23*4(sp) // Recover fs8 - fld f25, 24*4(sp) // Recover fs9 - fld f26, 25*4(sp) // Recover fs10 - fld f27, 26*4(sp) // Recover fs11 - lw t0, 27*4(sp) // Recover fcsr - csrw fcsr, t0 // Restore fcsr +_tx_thread_synch_return_skip_fp_restore: #endif + /* Recover standard preserved registers. */ /* Recover standard registers. */ - lw t0, 14*4(sp) // Recover mstatus - csrw mstatus, t0 // Restore mstatus - - lw ra, 13*4(sp) // Recover return address - lw s0, 12*4(sp) // Recover s0 - lw s1, 11*4(sp) // Recover s1 - lw x18, 10*4(sp) // Recover s2 - lw x19, 9*4(sp) // Recover s3 - lw x20, 8*4(sp) // Recover s4 - lw x21, 7*4(sp) // Recover s5 - lw x22, 6*4(sp) // Recover s6 - lw x23, 5*4(sp) // Recover s7 - lw x24, 4*4(sp) // Recover s8 - lw x25, 3*4(sp) // Recover s9 - lw x26, 2*4(sp) // Recover s10 - lw x27, 1*4(sp) // Recover s11 - -#if defined(__riscv_float_abi_single) || defined(__riscv_float_abi_double) - addi sp, sp, 29*4 // Recover stack frame - with floating point enabled + LOAD x1, TX_STACK_OFFSET_X1(sp) // Recover RA + LOAD x8, TX_STACK_OFFSET_X8(sp) // Recover s0 + LOAD x9, TX_STACK_OFFSET_X9(sp) // Recover s1 + LOAD x18, TX_STACK_OFFSET_X18(sp) // Recover s2 + LOAD x19, TX_STACK_OFFSET_X19(sp) // Recover s3 + LOAD x20, TX_STACK_OFFSET_X20(sp) // Recover s4 + LOAD x21, TX_STACK_OFFSET_X21(sp) // Recover s5 + LOAD x22, TX_STACK_OFFSET_X22(sp) // Recover s6 + LOAD x23, TX_STACK_OFFSET_X23(sp) // Recover s7 + LOAD x24, TX_STACK_OFFSET_X24(sp) // Recover s8 + LOAD x25, TX_STACK_OFFSET_X25(sp) // Recover s9 + LOAD x26, TX_STACK_OFFSET_X26(sp) // Recover s10 + LOAD x27, TX_STACK_OFFSET_X27(sp) // Recover s11 + LOAD t0, TX_STACK_OFFSET_MSTATUS(sp) // Recover mstatus + csrw mstatus, t0 // Store mstatus, enables interrupt +#if defined(__riscv_flen) && ((__riscv_flen == 32) || (__riscv_flen == 64)) + addi sp, sp, TX_THREAD_FRAME_SIZE_FPU // Recover stack frame #else - addi sp, sp, 16*4 // Recover stack frame - without floating point enabled + addi sp, sp, TX_THREAD_FRAME_SIZE_INT // Recover stack frame #endif ret // Return to thread diff --git a/ports/risc-v32/gnu/src/tx_thread_stack_build.S b/ports/risc-v32/gnu/src/tx_thread_stack_build.S index 4ade60ca6..fbec685f5 100644 --- a/ports/risc-v32/gnu/src/tx_thread_stack_build.S +++ b/ports/risc-v32/gnu/src/tx_thread_stack_build.S @@ -19,6 +19,7 @@ /**************************************************************************/ /**************************************************************************/ +#include "tx_port.h" .section .text /**************************************************************************/ @@ -30,6 +31,7 @@ /* AUTHOR */ /* */ /* Akif Ejaz, 10xEngineers */ +/* Wei-Chen Lai, National Cheng Kung University */ /* */ /* DESCRIPTION */ /* */ @@ -131,91 +133,91 @@ If floating point support: Stack Bottom: (higher memory address) */ - lw t0, 16(a0) // Pickup end of stack area + LOAD t0, TX_THREAD_STACK_END(a0) // Pickup end of stack area li t1, ~15 // Build 16-byte alignment mask and t0, t0, t1 // Make sure 16-byte alignment /* Actually build the stack frame. */ -#if defined(__riscv_float_abi_single) || defined(__riscv_float_abi_double) - addi t0, t0, -65*4 +#if defined(__riscv_flen) && ((__riscv_flen == 32) || (__riscv_flen == 64)) + addi t0, t0, -TX_THREAD_FRAME_SIZE_FPU #else - addi t0, t0, -32*4 // Allocate space for the stack frame + addi t0, t0, -TX_THREAD_FRAME_SIZE_INT // Allocate space for the stack frame #endif li t1, 1 // Build stack type - sw t1, 0*4(t0) // Place stack type on the top - sw zero, 1*4(t0) // Initial s11 - sw zero, 2*4(t0) // Initial s10 - sw zero, 3*4(t0) // Initial s9 - sw zero, 4*4(t0) // Initial s8 - sw zero, 5*4(t0) // Initial s7 - sw zero, 6*4(t0) // Initial s6 - sw zero, 7*4(t0) // Initial s5 - sw zero, 8*4(t0) // Initial s4 - sw zero, 9*4(t0) // Initial s3 - sw zero, 10*4(t0) // Initial s2 - sw zero, 11*4(t0) // Initial s1 - sw zero, 12*4(t0) // Initial s0 - sw zero, 13*4(t0) // Initial t6 - sw zero, 14*4(t0) // Initial t5 - sw zero, 15*4(t0) // Initial t4 - sw zero, 16*4(t0) // Initial t3 - sw zero, 17*4(t0) // Initial t2 - sw zero, 18*4(t0) // Initial t1 - sw zero, 19*4(t0) // Initial t0 - sw zero, 20*4(t0) // Initial a7 - sw zero, 21*4(t0) // Initial a6 - sw zero, 22*4(t0) // Initial a5 - sw zero, 23*4(t0) // Initial a4 - sw zero, 24*4(t0) // Initial a3 - sw zero, 25*4(t0) // Initial a2 - sw zero, 26*4(t0) // Initial a1 - sw zero, 27*4(t0) // Initial a0 - sw zero, 28*4(t0) // Initial ra - sw a1, 30*4(t0) // Initial mepc (thread entry point) -#if defined(__riscv_float_abi_single) || defined(__riscv_float_abi_double) - sw zero, 31*4(t0) // Initial ft0 - sw zero, 32*4(t0) // Initial ft1 - sw zero, 33*4(t0) // Initial ft2 - sw zero, 34*4(t0) // Initial ft3 - sw zero, 35*4(t0) // Initial ft4 - sw zero, 36*4(t0) // Initial ft5 - sw zero, 37*4(t0) // Initial ft6 - sw zero, 38*4(t0) // Initial ft7 - sw zero, 39*4(t0) // Initial fs0 - sw zero, 40*4(t0) // Initial fs1 - sw zero, 41*4(t0) // Initial fa0 - sw zero, 42*4(t0) // Initial fa1 - sw zero, 43*4(t0) // Initial fa2 - sw zero, 44*4(t0) // Initial fa3 - sw zero, 45*4(t0) // Initial fa4 - sw zero, 46*4(t0) // Initial fa5 - sw zero, 47*4(t0) // Initial fa6 - sw zero, 48*4(t0) // Initial fa7 - sw zero, 49*4(t0) // Initial fs2 - sw zero, 50*4(t0) // Initial fs3 - sw zero, 51*4(t0) // Initial fs4 - sw zero, 52*4(t0) // Initial fs5 - sw zero, 53*4(t0) // Initial fs6 - sw zero, 54*4(t0) // Initial fs7 - sw zero, 55*4(t0) // Initial fs8 - sw zero, 56*4(t0) // Initial fs9 - sw zero, 57*4(t0) // Initial fs10 - sw zero, 58*4(t0) // Initial fs11 - sw zero, 59*4(t0) // Initial ft8 - sw zero, 60*4(t0) // Initial ft9 - sw zero, 61*4(t0) // Initial ft10 - sw zero, 62*4(t0) // Initial ft11 - csrr a1, fcsr // Read fcsr for initial value - sw a1, 63*4(t0) // Initial fcsr - sw zero, 64*4(t0) // Reserved word (0) + STORE t1, TX_STACK_OFFSET_TYPE(t0) // Place stack type on the top + STORE x0, TX_STACK_OFFSET_X27(t0) // Initial s11 + STORE x0, TX_STACK_OFFSET_X26(t0) // Initial s10 + STORE x0, TX_STACK_OFFSET_X25(t0) // Initial s9 + STORE x0, TX_STACK_OFFSET_X24(t0) // Initial s8 + STORE x0, TX_STACK_OFFSET_X23(t0) // Initial s7 + STORE x0, TX_STACK_OFFSET_X22(t0) // Initial s6 + STORE x0, TX_STACK_OFFSET_X21(t0) // Initial s5 + STORE x0, TX_STACK_OFFSET_X20(t0) // Initial s4 + STORE x0, TX_STACK_OFFSET_X19(t0) // Initial s3 + STORE x0, TX_STACK_OFFSET_X18(t0) // Initial s2 + STORE x0, TX_STACK_OFFSET_X9(t0) // Initial s1 + STORE x0, TX_STACK_OFFSET_X8(t0) // Initial s0 + STORE x0, TX_STACK_OFFSET_X31(t0) // Initial t6 + STORE x0, TX_STACK_OFFSET_X30(t0) // Initial t5 + STORE x0, TX_STACK_OFFSET_X29(t0) // Initial t4 + STORE x0, TX_STACK_OFFSET_X28(t0) // Initial t3 + STORE x0, TX_STACK_OFFSET_X7(t0) // Initial t2 + STORE x0, TX_STACK_OFFSET_X6(t0) // Initial t1 + STORE x0, TX_STACK_OFFSET_X5(t0) // Initial t0 + STORE x0, TX_STACK_OFFSET_X17(t0) // Initial a7 + STORE x0, TX_STACK_OFFSET_X16(t0) // Initial a6 + STORE x0, TX_STACK_OFFSET_X15(t0) // Initial a5 + STORE x0, TX_STACK_OFFSET_X14(t0) // Initial a4 + STORE x0, TX_STACK_OFFSET_X13(t0) // Initial a3 + STORE x0, TX_STACK_OFFSET_X12(t0) // Initial a2 + STORE x0, TX_STACK_OFFSET_X11(t0) // Initial a1 + STORE x0, TX_STACK_OFFSET_X10(t0) // Initial a0 + STORE x0, TX_STACK_OFFSET_X1(t0) // Initial ra + STORE a1, TX_STACK_OFFSET_MEPC(t0) // Initial mepc +#if defined(__riscv_flen) && ((__riscv_flen == 32) || (__riscv_flen == 64)) + STORE x0, TX_STACK_OFFSET_F0(t0) // Inital ft0 + STORE x0, TX_STACK_OFFSET_F1(t0) // Inital ft1 + STORE x0, TX_STACK_OFFSET_F2(t0) // Inital ft2 + STORE x0, TX_STACK_OFFSET_F3(t0) // Inital ft3 + STORE x0, TX_STACK_OFFSET_F4(t0) // Inital ft4 + STORE x0, TX_STACK_OFFSET_F5(t0) // Inital ft5 + STORE x0, TX_STACK_OFFSET_F6(t0) // Inital ft6 + STORE x0, TX_STACK_OFFSET_F7(t0) // Inital ft7 + STORE x0, TX_STACK_OFFSET_F8(t0) // Inital fs0 + STORE x0, TX_STACK_OFFSET_F9(t0) // Inital fs1 + STORE x0, TX_STACK_OFFSET_F10(t0) // Inital fa0 + STORE x0, TX_STACK_OFFSET_F11(t0) // Inital fa1 + STORE x0, TX_STACK_OFFSET_F12(t0) // Inital fa2 + STORE x0, TX_STACK_OFFSET_F13(t0) // Inital fa3 + STORE x0, TX_STACK_OFFSET_F14(t0) // Inital fa4 + STORE x0, TX_STACK_OFFSET_F15(t0) // Inital fa5 + STORE x0, TX_STACK_OFFSET_F16(t0) // Inital fa6 + STORE x0, TX_STACK_OFFSET_F17(t0) // Inital fa7 + STORE x0, TX_STACK_OFFSET_F18(t0) // Inital fs2 + STORE x0, TX_STACK_OFFSET_F19(t0) // Inital fs3 + STORE x0, TX_STACK_OFFSET_F20(t0) // Inital fs4 + STORE x0, TX_STACK_OFFSET_F21(t0) // Inital fs5 + STORE x0, TX_STACK_OFFSET_F22(t0) // Inital fs6 + STORE x0, TX_STACK_OFFSET_F23(t0) // Inital fs7 + STORE x0, TX_STACK_OFFSET_F24(t0) // Inital fs8 + STORE x0, TX_STACK_OFFSET_F25(t0) // Inital fs9 + STORE x0, TX_STACK_OFFSET_F26(t0) // Inital fs10 + STORE x0, TX_STACK_OFFSET_F27(t0) // Inital fs11 + STORE x0, TX_STACK_OFFSET_F28(t0) // Inital ft8 + STORE x0, TX_STACK_OFFSET_F29(t0) // Inital ft9 + STORE x0, TX_STACK_OFFSET_F30(t0) // Inital ft10 + STORE x0, TX_STACK_OFFSET_F31(t0) // Inital ft11 + csrr a1, fcsr // Read fcsr and use it for initial value for each thread + STORE a1, TX_STACK_OFFSET_FCSR(t0) // Initial fscr + STORE x0, (TX_THREAD_FRAME_SIZE_FPU-REGBYTES)(t0) // Reserved word (0) #else - sw zero, 31*4(t0) // Reserved word (0) + STORE x0, (TX_THREAD_FRAME_SIZE_INT-REGBYTES)(t0) // Reserved word (0) #endif /* Setup stack pointer. */ /* thread_ptr -> tx_thread_stack_ptr = t0; */ - sw t0, 8(a0) // Save stack pointer in thread's + STORE t0, TX_THREAD_STACK_PTR(a0) // Save stack pointer in thread's ret // control block and return /* } */ diff --git a/ports/risc-v32/gnu/src/tx_thread_system_return.S b/ports/risc-v32/gnu/src/tx_thread_system_return.S index 4090e7b26..caa49bd73 100644 --- a/ports/risc-v32/gnu/src/tx_thread_system_return.S +++ b/ports/risc-v32/gnu/src/tx_thread_system_return.S @@ -19,6 +19,7 @@ /**************************************************************************/ /**************************************************************************/ +#include "tx_port.h" .section .text /**************************************************************************/ @@ -30,6 +31,7 @@ /* AUTHOR */ /* */ /* Akif Ejaz, 10xEngineers */ +/* Wei-Chen Lai, National Cheng Kung University */ /* */ /* DESCRIPTION */ /* */ @@ -61,68 +63,68 @@ _tx_thread_system_return: /* Save minimal context on the stack. */ - /* sp -= sizeof(stack_frame); */ -#if defined(__riscv_float_abi_single) || defined(__riscv_float_abi_double) - addi sp, sp, -29*4 // Allocate space on the stack - with floating point enabled +#if defined(__riscv_flen) && ((__riscv_flen == 32) || (__riscv_flen == 64)) + addi sp, sp, -TX_THREAD_FRAME_SIZE_FPU // Allocate space on the stack - with floating point enabled #else - addi sp, sp, -16*4 // Allocate space on the stack - without floating point enabled + addi sp, sp, -TX_THREAD_FRAME_SIZE_INT // Allocate space on the stack - without floating point enabled #endif /* Store floating point preserved registers. */ -#if defined(__riscv_float_abi_single) - fsw f8, 15*4(sp) // Store fs0 - fsw f9, 16*4(sp) // Store fs1 - fsw f18, 17*4(sp) // Store fs2 - fsw f19, 18*4(sp) // Store fs3 - fsw f20, 19*4(sp) // Store fs4 - fsw f21, 20*4(sp) // Store fs5 - fsw f22, 21*4(sp) // Store fs6 - fsw f23, 22*4(sp) // Store fs7 - fsw f24, 23*4(sp) // Store fs8 - fsw f25, 24*4(sp) // Store fs9 - fsw f26, 25*4(sp) // Store fs10 - fsw f27, 26*4(sp) // Store fs11 +#if defined(__riscv_flen) && (__riscv_flen == 32) + fsw f8, TX_STACK_OFFSET_F8(sp) // Store fs0 + fsw f9, TX_STACK_OFFSET_F9(sp) // Store fs1 + fsw f18, TX_STACK_OFFSET_F18(sp) // Store fs2 + fsw f19, TX_STACK_OFFSET_F19(sp) // Store fs3 + fsw f20, TX_STACK_OFFSET_F20(sp) // Store fs4 + fsw f21, TX_STACK_OFFSET_F21(sp) // Store fs5 + fsw f22, TX_STACK_OFFSET_F22(sp) // Store fs6 + fsw f23, TX_STACK_OFFSET_F23(sp) // Store fs7 + fsw f24, TX_STACK_OFFSET_F24(sp) // Store fs8 + fsw f25, TX_STACK_OFFSET_F25(sp) // Store fs9 + fsw f26, TX_STACK_OFFSET_F26(sp) // Store fs10 + fsw f27, TX_STACK_OFFSET_F27(sp) // Store fs11 csrr t0, fcsr - sw t0, 27*4(sp) // Store fcsr -#elif defined(__riscv_float_abi_double) - fsd f8, 15*4(sp) // Store fs0 - fsd f9, 16*4(sp) // Store fs1 - fsd f18, 17*4(sp) // Store fs2 - fsd f19, 18*4(sp) // Store fs3 - fsd f20, 19*4(sp) // Store fs4 - fsd f21, 20*4(sp) // Store fs5 - fsd f22, 21*4(sp) // Store fs6 - fsd f23, 22*4(sp) // Store fs7 - fsd f24, 23*4(sp) // Store fs8 - fsd f25, 24*4(sp) // Store fs9 - fsd f26, 25*4(sp) // Store fs10 - fsd f27, 26*4(sp) // Store fs11 + STORE t0, TX_STACK_OFFSET_FCSR(sp) // Store fcsr +#elif defined(__riscv_flen) && (__riscv_flen == 64) + fsd f8, TX_STACK_OFFSET_F8(sp) // Store fs0 + fsd f9, TX_STACK_OFFSET_F9(sp) // Store fs1 + fsd f18, TX_STACK_OFFSET_F18(sp) // Store fs2 + fsd f19, TX_STACK_OFFSET_F19(sp) // Store fs3 + fsd f20, TX_STACK_OFFSET_F20(sp) // Store fs4 + fsd f21, TX_STACK_OFFSET_F21(sp) // Store fs5 + fsd f22, TX_STACK_OFFSET_F22(sp) // Store fs6 + fsd f23, TX_STACK_OFFSET_F23(sp) // Store fs7 + fsd f24, TX_STACK_OFFSET_F24(sp) // Store fs8 + fsd f25, TX_STACK_OFFSET_F25(sp) // Store fs9 + fsd f26, TX_STACK_OFFSET_F26(sp) // Store fs10 + fsd f27, TX_STACK_OFFSET_F27(sp) // Store fs11 csrr t0, fcsr - sw t0, 27*4(sp) // Store fcsr + STORE t0, TX_STACK_OFFSET_FCSR(sp) // Store fcsr #endif - sw zero, 0(sp) // Solicited stack type - sw ra, 13*4(sp) // Save return address - sw s0, 12*4(sp) // Save s0 - sw s1, 11*4(sp) // Save s1 - sw s2, 10*4(sp) // Save s2 - sw s3, 9*4(sp) // Save s3 - sw s4, 8*4(sp) // Save s4 - sw s5, 7*4(sp) // Save s5 - sw s6, 6*4(sp) // Save s6 - sw s7, 5*4(sp) // Save s7 - sw s8, 4*4(sp) // Save s8 - sw s9, 3*4(sp) // Save s9 - sw s10, 2*4(sp) // Save s10 - sw s11, 1*4(sp) // Save s11 + STORE x0, 0(sp) // Solicited stack type + STORE x1, TX_STACK_OFFSET_X1(sp) // Save RA + STORE x1, TX_STACK_OFFSET_MEPC(sp) // Save RA as MEPC + STORE x8, TX_STACK_OFFSET_X8(sp) // Save s0 + STORE x9, TX_STACK_OFFSET_X9(sp) // Save s1 + STORE x18, TX_STACK_OFFSET_X18(sp) // Save s2 + STORE x19, TX_STACK_OFFSET_X19(sp) // Save s3 + STORE x20, TX_STACK_OFFSET_X20(sp) // Save s4 + STORE x21, TX_STACK_OFFSET_X21(sp) // Save s5 + STORE x22, TX_STACK_OFFSET_X22(sp) // Save s6 + STORE x23, TX_STACK_OFFSET_X23(sp) // Save s7 + STORE x24, TX_STACK_OFFSET_X24(sp) // Save s8 + STORE x25, TX_STACK_OFFSET_X25(sp) // Save s9 + STORE x26, TX_STACK_OFFSET_X26(sp) // Save s10 + STORE x27, TX_STACK_OFFSET_X27(sp) // Save s11 csrr t0, mstatus // Pickup mstatus - sw t0, 14*4(sp) // Save mstatus + STORE t0, TX_STACK_OFFSET_MSTATUS(sp) // Save mstatus - /* Lockout interrupts. will be enabled in _tx_thread_schedule */ + /* Lockout interrupts. - will be enabled in _tx_thread_schedule */ - csrci mstatus, 0x08 // Disable interrupts (MIE bit 3) + csrci mstatus, 0xF #ifdef TX_ENABLE_EXECUTION_CHANGE_NOTIFY @@ -130,22 +132,22 @@ _tx_thread_system_return: #endif la t0, _tx_thread_current_ptr // Pickup address of pointer - lw t1, 0(t0) // Pickup current thread pointer - la t2, _tx_thread_system_stack_ptr // Pickup stack pointer address + LOAD t1, 0(t0) // Pickup current thread pointer + la t2,_tx_thread_system_stack_ptr // Pickup stack pointer address /* Save current stack and switch to system stack. */ /* _tx_thread_current_ptr -> tx_thread_stack_ptr = SP; SP = _tx_thread_system_stack_ptr; */ - sw sp, 8(t1) // Save stack pointer - lw sp, 0(t2) // Switch to system stack + STORE sp, TX_THREAD_STACK_PTR(t1) // Save stack pointer + LOAD sp, 0(t2) // Switch to system stack /* Determine if the time-slice is active. */ /* if (_tx_timer_time_slice) { */ la t4, _tx_timer_time_slice // Pickup time slice variable addr - lw t3, 0(t4) // Pickup time slice value + LOAD t3, 0(t4) // Pickup time slice value la t2, _tx_thread_schedule // Pickup address of scheduling loop beqz t3, _tx_thread_dont_save_ts // If no time-slice, don't save it @@ -153,8 +155,8 @@ _tx_thread_system_return: /* _tx_thread_current_ptr -> tx_thread_time_slice = _tx_timer_time_slice; _tx_timer_time_slice = 0; */ - sw t3, 24(t1) // Save current time-slice for thread - sw zero, 0(t4) // Clear time-slice variable + STORE t3, TX_THREAD_TIME_SLICE(t1) // Save current time-slice for thread + STORE x0, 0(t4) // Clear time-slice variable /* } */ _tx_thread_dont_save_ts: @@ -162,7 +164,7 @@ _tx_thread_dont_save_ts: /* Clear the current thread pointer. */ /* _tx_thread_current_ptr = TX_NULL; */ - sw x0, 0(t0) // Clear current thread pointer + STORE x0, 0(t0) // Clear current thread pointer jr t2 // Return to thread scheduler /* } */ diff --git a/test/ports/azrtos_test_tx_gnu_riscv32_qemu.py b/test/ports/azrtos_test_tx_gnu_riscv32_qemu.py new file mode 100644 index 000000000..f58117f32 --- /dev/null +++ b/test/ports/azrtos_test_tx_gnu_riscv32_qemu.py @@ -0,0 +1,283 @@ +import subprocess +import sys +import time +import os +import argparse +import socket +import select + +def print_content(content): + """Prints content using os.write to handle non-blocking stdout robustly.""" + try: + msg = f"{content}\n".encode('utf-8') + total_len = len(msg) + written = 0 + fd = sys.stdout.fileno() + while written < total_len: + try: + n = os.write(fd, msg[written:]) + written += n + except BlockingIOError: + select.select([], [fd], []) + except Exception: + pass + +def get_free_port(): + """Finds a free TCP port.""" + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + s.bind(('', 0)) + return s.getsockname()[1] + +def run_qemu_test(elf_path, qemu_bin, gdb_bin): + """ + Runs a test cycle using QEMU and GDB. + """ + print(f"Testing ELF: {elf_path}") + print(f"QEMU: {qemu_bin}") + print(f"GDB: {gdb_bin}") + + # Find a free port for GDB connection + gdb_port = get_free_port() + print(f"Using GDB port: {gdb_port}") + + # 1. Start QEMU in the background + qemu_cmd = [ + qemu_bin, + "-M", "virt", + "-nographic", + "-bios", "none", # Disable default OpenSBI to avoid overlap at 0x80000000 + "-kernel", elf_path, + "-gdb", f"tcp::{gdb_port}", "-S", + "-monitor", "none", # Disable monitor to avoid clutter + "-serial", "stdio" # Redirect serial output to stdio so we can see it + ] + + print(f"Starting QEMU: {' '.join(qemu_cmd)}") + qemu_process = subprocess.Popen( + qemu_cmd, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True + ) + + if qemu_process.poll() is not None: + print("QEMU failed to start.") + print(qemu_process.stderr.read()) + return False + + # 2. Create a GDB command file + # We use a defined command for the timer interrupt to perform the check automatically + gdb_cmds = """ +target remote :{port} +file {elf} +set pagination off +set confirm off + +# Setup Breakpoints +break tx_application_define +break thread_0_entry +break thread_6_and_7_entry +break _tx_timer_interrupt + +# Execute to Application Definition +continue + +# Verify Lazy FPU Context (Expect FS=Initial) +continue +print/x $mstatus + +# Verify FPU Logic and Register State +continue +finish +step +step +step +print/x $mstatus +info registers float +print fpu_test_val + +# Await Timer Interrupt +continue +print "Hit Timer Interrupt" + +# Verify MEPC Integrity - Save State +print/x $mepc +set $saved_pc = $mepc + +# Verify System Timer Before ISR +set $clock_before = _tx_timer_system_clock +print $clock_before + +# Configure Time-Slice Test Conditions +set _tx_timer_time_slice = 1 +set _tx_timer_expired_time_slice = 0 +set $ts_handler_called = 0 + +# Set Breakpoint at Time-Slice Handler with Auto-Continue +tbreak _tx_thread_time_slice +commands + set $ts_handler_called = 1 + continue +end + +# Set Breakpoint at ISR Return Address +set $ret_addr = $ra +tbreak *$ret_addr +continue + +# Verify Time-Slice Handler Was Called +if $ts_handler_called == 1 + print "SUCCESS: Time-slice handler called." +else + print "FAILURE: Time-slice handler NOT called." +end + +# Verify System Timer Increment (Monotonicity) +set $clock_after = _tx_timer_system_clock +print $clock_after + +if $clock_after > $clock_before + print "SUCCESS: System timer incremented." +else + print "FAILURE: System timer did not increment." +end + +# Verify MEPC Restoration Post-ISR +tbreak *$saved_pc +continue + +print "Back from ISR" +print/x $pc +set $diff = (long)$pc - (long)$saved_pc +if $diff == 0 + print "SUCCESS: MEPC restored correctly." +else + print "FAILURE: PC does not match saved MEPC." +end + +# Verify Preemption Logic (Thread Priority) +break _tx_thread_preempt_restore + +set $max_loops = 5 +set $loop_cnt = 0 +set $found_preemption = 0 + +while $loop_cnt < $max_loops + continue + set $loop_cnt = $loop_cnt + 1 + + + set $curr_ptr = _tx_thread_current_ptr + set $exec_ptr = _tx_thread_execute_ptr + + if $curr_ptr != 0 && $exec_ptr != 0 + print "Preemption Check: Current Prio=%d, Exec Prio=%d", $curr_ptr->tx_thread_priority, $exec_ptr->tx_thread_priority + set $curr_prio = $curr_ptr->tx_thread_priority + set $exec_prio = $exec_ptr->tx_thread_priority + + + if $exec_prio < $curr_prio + print "SUCCESS: Thread Preemption Verified." + set $found_preemption = 1 + loop_break + end + + if $exec_prio > $curr_prio + print "FAILURE: Preemption logic error - Lower priority running." + loop_break + end + else + print "FAILURE: Null thread pointers." + loop_break + end +end + +if $found_preemption == 0 + print "FAILURE: Preemption not observed." +end + +quit +""".format(port=gdb_port, elf=elf_path) + + gdb_cmd_file = "test_cmds.gdb" + with open(gdb_cmd_file, "w") as f: + f.write(gdb_cmds) + + # 3. Run GDB + gdb_cmd = [ + gdb_bin, + "--batch", + "-x", gdb_cmd_file + ] + + print_content(f"Starting GDB: {' '.join(gdb_cmd)}") + + try: + gdb_process = subprocess.run( + gdb_cmd, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True + ) + + print_content("GDB Output:") + print_content(gdb_process.stdout) + if gdb_process.stderr: + print_content("GDB Error Output:") + print_content(gdb_process.stderr) + + except Exception as e: + print_content(f"An error occurred during test execution: {e}") + return False + + finally: + # 4. Clean up + print_content("Stopping QEMU...") + qemu_process.terminate() + try: + qemu_process.wait(timeout=2) + except subprocess.TimeoutExpired: + print_content("QEMU did not terminate gracefully, killing it forcefullly.") + qemu_process.kill() + + # Verify results + timer_hit = "Breakpoint 4, _tx_timer_interrupt" in gdb_process.stdout + fpu_verified = False + lazy_fpu_verified = False + + if "Breakpoint 2, thread_0_entry" in gdb_process.stdout: + if "$1 =" in gdb_process.stdout: + print_content("SUCCESS: Checked thread_0 mstatus (Expect FS=0 Off/Init for Lazy Save).") + lazy_fpu_verified = True + + if "Breakpoint 3, thread_6_and_7_entry" in gdb_process.stdout: + if "1.10" in gdb_process.stdout or "fpu_test_val" in gdb_process.stdout: + print_content("SUCCESS: FPU instructions executed and registers inspected.") + fpu_verified = True + else: + print_content("FAILURE: Hit thread, but failed to inspect FPU. Output does not contain expected value.") + + if timer_hit: + print_content("SUCCESS: Timer Interrupt verified! Hit _tx_timer_interrupt.") + else: + print_content("FAILURE: Did not hit timer interrupt.") + + if timer_hit and fpu_verified and lazy_fpu_verified: + return True + else: + return False + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Run ThreadX QEMU/GDB Test") + parser.add_argument("--elf", required=True, help="Path to the kernel ELF file") + parser.add_argument("--qemu", default="qemu-system-riscv32", help="Path to QEMU binary") + parser.add_argument("--gdb", default="riscv-none-elf-gdb", help="Path to GDB binary") + + args = parser.parse_args() + + success = run_qemu_test(args.elf, args.qemu, args.gdb) + + if success: + sys.exit(0) + else: + sys.exit(1)