Created
June 28, 2025 16:12
-
-
Save purofle/7256c90daa3042d319a873921f58e79a to your computer and use it in GitHub Desktop.
RISC-V in GPU!
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #include "info/info_desc.hpp" | |
| #include "stream.hpp" | |
| #include <cstring> | |
| #include <elf.h> | |
| #include <fcntl.h> | |
| #include <gelf.h> | |
| #include <iostream> | |
| #include <libelf.h> | |
| #include <sycl/sycl.hpp> | |
| #include <unistd.h> | |
| #include <vector> | |
| #include <cstring> // For std::memcpy and std::memset | |
| #include <fcntl.h> | |
| #include <gelf.h> | |
| #include <iostream> | |
| #include <libelf.h> | |
| #include <string> | |
| #include <unistd.h> | |
| #include <vector> | |
| // RV32I_State 结构体保持不变 | |
| struct RV32I_State { | |
| uint32_t regs[32]; | |
| uint32_t pc; | |
| }; | |
| bool load_elf(const std::string &filename, std::vector<uint32_t> &memory, | |
| uint32_t &entry_pc) { | |
| if (elf_version(EV_CURRENT) == EV_NONE) { | |
| std::cerr << "ELF library initialization failed: " << elf_errmsg(-1) | |
| << "\n"; | |
| return false; | |
| } | |
| int fd = open(filename.c_str(), O_RDONLY); | |
| if (fd < 0) { | |
| perror("Failed to open file"); | |
| return false; | |
| } | |
| Elf *e = elf_begin(fd, ELF_C_READ, nullptr); | |
| if (!e) { | |
| std::cerr << "elf_begin() failed: " << elf_errmsg(-1) << "\n"; | |
| close(fd); | |
| return false; | |
| } | |
| // 确保是 RISC-V 32-bit ELF 文件 | |
| GElf_Ehdr ehdr; | |
| if (!gelf_getehdr(e, &ehdr)) { | |
| std::cerr << "gelf_getehdr() failed: " << elf_errmsg(-1) << "\n"; | |
| elf_end(e); | |
| close(fd); | |
| return false; | |
| } | |
| // e_machine == EM_RISCV and e_ident[EI_CLASS] == ELFCLASS32 | |
| if (ehdr.e_machine != EM_RISCV) { | |
| std::cerr << "Error: Not a RISC-V 32-bit ELF file.\n"; | |
| elf_end(e); | |
| close(fd); | |
| return false; | |
| } | |
| size_t phnum; | |
| if (elf_getphdrnum(e, &phnum) != 0) { | |
| std::cerr << "elf_getphdrnum() failed: " << elf_errmsg(-1) << "\n"; | |
| elf_end(e); | |
| close(fd); | |
| return false; | |
| } | |
| for (size_t i = 0; i < phnum; ++i) { | |
| GElf_Phdr phdr; | |
| if (!gelf_getphdr(e, i, &phdr)) { | |
| std::cerr << "gelf_getphdr() failed for segment " << i << ": " | |
| << elf_errmsg(-1) << "\n"; | |
| continue; | |
| } | |
| // 关键修正 1: 加载所有类型为 PT_LOAD 的段,而不仅仅是可执行段 | |
| if (phdr.p_type != PT_LOAD) { | |
| continue; | |
| } | |
| std::cout << "Loading segment: vaddr=0x" << std::hex << phdr.p_vaddr | |
| << ", filesz=" << std::dec << phdr.p_filesz | |
| << ", memsz=" << std::dec << phdr.p_memsz << "\n"; | |
| // 安全检查:确保段不会写出内存边界 | |
| // memory.size() 是以 uint32_t 为单位,所以总字节数是 memory.size() * 4 | |
| if (phdr.p_vaddr + phdr.p_memsz > memory.size() * 4) { | |
| std::cerr << "Error: Segment is too large to fit in memory.\n"; | |
| // 不继续加载,因为程序镜像将不完整 | |
| elf_end(e); | |
| close(fd); | |
| return false; | |
| } | |
| // p_filesz > 0 才需要从文件读取 | |
| if (phdr.p_filesz > 0) { | |
| // 将文件指针移动到段的偏移处 | |
| if (lseek(fd, phdr.p_offset, SEEK_SET) != (off_t)phdr.p_offset) { | |
| perror("lseek failed"); | |
| continue; // 或者直接返回 false | |
| } | |
| // 关键修正 2: 直接将整个段数据完整地读入和复制 | |
| // 目标地址是 memory vector 的起始地址加上段的虚拟地址 | |
| char *mem_ptr = reinterpret_cast<char *>(memory.data()) + phdr.p_vaddr; | |
| if (read(fd, mem_ptr, phdr.p_filesz) != (ssize_t)phdr.p_filesz) { | |
| perror("read failed"); | |
| continue; // 或者直接返回 false | |
| } | |
| } | |
| // 关键修正 3: 处理 .bss 段 (p_memsz > p_filesz) | |
| // 将文件中没有但内存中需要的部分清零 | |
| if (phdr.p_memsz > phdr.p_filesz) { | |
| size_t bss_size = phdr.p_memsz - phdr.p_filesz; | |
| char *bss_start_ptr = reinterpret_cast<char *>(memory.data()) + | |
| phdr.p_vaddr + phdr.p_filesz; | |
| std::memset(bss_start_ptr, 0, bss_size); | |
| } | |
| } | |
| elf_end(e); | |
| close(fd); | |
| std::cout << "ELF file loaded successfully. Entry PC: 0x" << std::hex | |
| << entry_pc << std::endl; | |
| return true; | |
| } | |
| inline int32_t sign_extend(uint32_t val, int bits) { | |
| int32_t mask = 1 << (bits - 1); | |
| return (val ^ mask) - mask; | |
| } | |
| int main() { | |
| const size_t mem_size_words = 1024 * 1024; | |
| sycl::queue q(sycl::gpu_selector_v); | |
| std::cout | |
| << "Running on device: " | |
| << q.get_device().get_info<sycl::info::device::name>() << " [" | |
| << q.get_device().get_platform().get_info<sycl::info::platform::name>() | |
| << "]\n"; | |
| RV32I_State host_state{}; | |
| std::vector<uint32_t> host_memory(mem_size_words, 0); | |
| uint32_t entry = 0; | |
| if (!load_elf("test.o", host_memory, entry)) { | |
| std::cerr << "ELF load failed!\n"; | |
| return 1; | |
| } | |
| std::cout << "Loaded ELF successfully. Entry PC: 0x" << std::hex << entry | |
| << "\n"; | |
| host_state.pc = entry; | |
| sycl::buffer<RV32I_State> state_buf(&host_state, 1); | |
| sycl::buffer<uint32_t> memory_buf(host_memory.data(), | |
| sycl::range<1>(mem_size_words)); | |
| try { | |
| q.submit([&](sycl::handler &h) { | |
| sycl::stream os(1024, 128, h); | |
| auto state_acc = state_buf.get_access<sycl::access::mode::read_write>(h); | |
| auto memory_acc = | |
| memory_buf.get_access<sycl::access::mode::read_write>(h); | |
| h.single_task([=]() { | |
| RV32I_State &s = state_acc[0]; | |
| for (int cycle = 0; cycle < 100; ++cycle) { | |
| if (s.pc / 4 >= mem_size_words) | |
| break; | |
| uint32_t instr = memory_acc[s.pc / 4]; | |
| if (instr == 0xDEADBEEF) | |
| break; | |
| uint32_t opcode = instr & 0x7F; | |
| uint32_t rd = (instr >> 7) & 0x1F; | |
| uint32_t funct3 = (instr >> 12) & 0x7; | |
| uint32_t rs1 = (instr >> 15) & 0x1F; | |
| uint32_t rs2 = (instr >> 20) & 0x1F; | |
| uint32_t next_pc = s.pc + 4; | |
| os << "opcode: " << sycl::hex << opcode << ", rd: " << rd | |
| << ", funct3: " << funct3 << ", rs1: " << rs1 << ", rs2: " << rs2 | |
| << ", instr: " << instr << sycl::endl; | |
| int32_t imm_i = sign_extend(instr >> 20, 12); | |
| int32_t imm_s = | |
| sign_extend(((instr >> 25) << 5) | ((instr >> 7) & 0x1F), 12); | |
| switch (opcode) { | |
| case 0b0010011: // I-Type (ADDI) | |
| if (funct3 == 0b000 && rd != 0) { | |
| s.regs[rd] = s.regs[rs1] + imm_i; | |
| } | |
| break; | |
| case 0b0110011: // R-Type (ADD) | |
| if (funct3 == 0b000 && (instr >> 25) == 0b0000000 && rd != 0) { | |
| s.regs[rd] = s.regs[rs1] + s.regs[rs2]; | |
| } | |
| break; | |
| case 0b0000011: // Load-Type (LW) | |
| if (funct3 == 0b010) { | |
| int32_t addr = s.regs[rs1] + imm_i; | |
| if (rd != 0 && addr >= 0 && addr % 4 == 0 && | |
| (addr / 4) < mem_size_words) { | |
| s.regs[rd] = memory_acc[addr / 4]; | |
| } | |
| } | |
| break; | |
| case 0b0100011: // Store-Type (SW) | |
| if (funct3 == 0b010) { | |
| int32_t addr = s.regs[rs1] + imm_s; | |
| if (addr >= 0 && addr % 4 == 0 && (addr / 4) < mem_size_words) { | |
| memory_acc[addr / 4] = s.regs[rs2]; | |
| } | |
| } | |
| break; | |
| default: | |
| os << "Unrecognized instruction: " << instr << sycl::endl; | |
| break; | |
| } | |
| s.regs[0] = 0; | |
| s.pc = next_pc; | |
| } | |
| }); | |
| }).wait(); | |
| } catch (sycl::exception const &e) { | |
| std::cerr << "SYCL exception caught: " << e.what() << "\n"; | |
| return 1; | |
| } | |
| sycl::host_accessor final_state(state_buf); | |
| sycl::host_accessor final_memory(memory_buf); | |
| std::cout << "Final PC: " << final_state[0].pc << "\n"; | |
| std::cout << "Register Values:\n"; | |
| std::cout << "x1 (r1): " << final_state[0].regs[1] << " (Expected: 5)\n"; | |
| std::cout << "x2 (r2): " << final_state[0].regs[2] << " (Expected: 10)\n"; | |
| std::cout << "x3 (r3): " << final_state[0].regs[3] << " (Expected: 15)\n"; | |
| std::cout << "x4 (r4): " << final_state[0].regs[4] | |
| << " (Expected: 15 after LW)\n"; | |
| std::cout << "\nMemory Content:\n"; | |
| std::cout << "Memory[16]: " << final_memory[4] << " (Expected: 15)\n"; | |
| return 0; | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment