Skip to content

Instantly share code, notes, and snippets.

@purofle
Created June 28, 2025 16:12
Show Gist options
  • Select an option

  • Save purofle/7256c90daa3042d319a873921f58e79a to your computer and use it in GitHub Desktop.

Select an option

Save purofle/7256c90daa3042d319a873921f58e79a to your computer and use it in GitHub Desktop.
RISC-V in GPU!
#include "info/info_desc.hpp"
#include "stream.hpp"
#include <cstring>
#include <elf.h>
#include <fcntl.h>
#include <gelf.h>
#include <iostream>
#include <libelf.h>
#include <sycl/sycl.hpp>
#include <unistd.h>
#include <vector>
#include <cstring> // For std::memcpy and std::memset
#include <fcntl.h>
#include <gelf.h>
#include <iostream>
#include <libelf.h>
#include <string>
#include <unistd.h>
#include <vector>
// RV32I_State 结构体保持不变
struct RV32I_State {
uint32_t regs[32];
uint32_t pc;
};
bool load_elf(const std::string &filename, std::vector<uint32_t> &memory,
uint32_t &entry_pc) {
if (elf_version(EV_CURRENT) == EV_NONE) {
std::cerr << "ELF library initialization failed: " << elf_errmsg(-1)
<< "\n";
return false;
}
int fd = open(filename.c_str(), O_RDONLY);
if (fd < 0) {
perror("Failed to open file");
return false;
}
Elf *e = elf_begin(fd, ELF_C_READ, nullptr);
if (!e) {
std::cerr << "elf_begin() failed: " << elf_errmsg(-1) << "\n";
close(fd);
return false;
}
// 确保是 RISC-V 32-bit ELF 文件
GElf_Ehdr ehdr;
if (!gelf_getehdr(e, &ehdr)) {
std::cerr << "gelf_getehdr() failed: " << elf_errmsg(-1) << "\n";
elf_end(e);
close(fd);
return false;
}
// e_machine == EM_RISCV and e_ident[EI_CLASS] == ELFCLASS32
if (ehdr.e_machine != EM_RISCV) {
std::cerr << "Error: Not a RISC-V 32-bit ELF file.\n";
elf_end(e);
close(fd);
return false;
}
size_t phnum;
if (elf_getphdrnum(e, &phnum) != 0) {
std::cerr << "elf_getphdrnum() failed: " << elf_errmsg(-1) << "\n";
elf_end(e);
close(fd);
return false;
}
for (size_t i = 0; i < phnum; ++i) {
GElf_Phdr phdr;
if (!gelf_getphdr(e, i, &phdr)) {
std::cerr << "gelf_getphdr() failed for segment " << i << ": "
<< elf_errmsg(-1) << "\n";
continue;
}
// 关键修正 1: 加载所有类型为 PT_LOAD 的段,而不仅仅是可执行段
if (phdr.p_type != PT_LOAD) {
continue;
}
std::cout << "Loading segment: vaddr=0x" << std::hex << phdr.p_vaddr
<< ", filesz=" << std::dec << phdr.p_filesz
<< ", memsz=" << std::dec << phdr.p_memsz << "\n";
// 安全检查:确保段不会写出内存边界
// memory.size() 是以 uint32_t 为单位,所以总字节数是 memory.size() * 4
if (phdr.p_vaddr + phdr.p_memsz > memory.size() * 4) {
std::cerr << "Error: Segment is too large to fit in memory.\n";
// 不继续加载,因为程序镜像将不完整
elf_end(e);
close(fd);
return false;
}
// p_filesz > 0 才需要从文件读取
if (phdr.p_filesz > 0) {
// 将文件指针移动到段的偏移处
if (lseek(fd, phdr.p_offset, SEEK_SET) != (off_t)phdr.p_offset) {
perror("lseek failed");
continue; // 或者直接返回 false
}
// 关键修正 2: 直接将整个段数据完整地读入和复制
// 目标地址是 memory vector 的起始地址加上段的虚拟地址
char *mem_ptr = reinterpret_cast<char *>(memory.data()) + phdr.p_vaddr;
if (read(fd, mem_ptr, phdr.p_filesz) != (ssize_t)phdr.p_filesz) {
perror("read failed");
continue; // 或者直接返回 false
}
}
// 关键修正 3: 处理 .bss 段 (p_memsz > p_filesz)
// 将文件中没有但内存中需要的部分清零
if (phdr.p_memsz > phdr.p_filesz) {
size_t bss_size = phdr.p_memsz - phdr.p_filesz;
char *bss_start_ptr = reinterpret_cast<char *>(memory.data()) +
phdr.p_vaddr + phdr.p_filesz;
std::memset(bss_start_ptr, 0, bss_size);
}
}
elf_end(e);
close(fd);
std::cout << "ELF file loaded successfully. Entry PC: 0x" << std::hex
<< entry_pc << std::endl;
return true;
}
inline int32_t sign_extend(uint32_t val, int bits) {
int32_t mask = 1 << (bits - 1);
return (val ^ mask) - mask;
}
int main() {
const size_t mem_size_words = 1024 * 1024;
sycl::queue q(sycl::gpu_selector_v);
std::cout
<< "Running on device: "
<< q.get_device().get_info<sycl::info::device::name>() << " ["
<< q.get_device().get_platform().get_info<sycl::info::platform::name>()
<< "]\n";
RV32I_State host_state{};
std::vector<uint32_t> host_memory(mem_size_words, 0);
uint32_t entry = 0;
if (!load_elf("test.o", host_memory, entry)) {
std::cerr << "ELF load failed!\n";
return 1;
}
std::cout << "Loaded ELF successfully. Entry PC: 0x" << std::hex << entry
<< "\n";
host_state.pc = entry;
sycl::buffer<RV32I_State> state_buf(&host_state, 1);
sycl::buffer<uint32_t> memory_buf(host_memory.data(),
sycl::range<1>(mem_size_words));
try {
q.submit([&](sycl::handler &h) {
sycl::stream os(1024, 128, h);
auto state_acc = state_buf.get_access<sycl::access::mode::read_write>(h);
auto memory_acc =
memory_buf.get_access<sycl::access::mode::read_write>(h);
h.single_task([=]() {
RV32I_State &s = state_acc[0];
for (int cycle = 0; cycle < 100; ++cycle) {
if (s.pc / 4 >= mem_size_words)
break;
uint32_t instr = memory_acc[s.pc / 4];
if (instr == 0xDEADBEEF)
break;
uint32_t opcode = instr & 0x7F;
uint32_t rd = (instr >> 7) & 0x1F;
uint32_t funct3 = (instr >> 12) & 0x7;
uint32_t rs1 = (instr >> 15) & 0x1F;
uint32_t rs2 = (instr >> 20) & 0x1F;
uint32_t next_pc = s.pc + 4;
os << "opcode: " << sycl::hex << opcode << ", rd: " << rd
<< ", funct3: " << funct3 << ", rs1: " << rs1 << ", rs2: " << rs2
<< ", instr: " << instr << sycl::endl;
int32_t imm_i = sign_extend(instr >> 20, 12);
int32_t imm_s =
sign_extend(((instr >> 25) << 5) | ((instr >> 7) & 0x1F), 12);
switch (opcode) {
case 0b0010011: // I-Type (ADDI)
if (funct3 == 0b000 && rd != 0) {
s.regs[rd] = s.regs[rs1] + imm_i;
}
break;
case 0b0110011: // R-Type (ADD)
if (funct3 == 0b000 && (instr >> 25) == 0b0000000 && rd != 0) {
s.regs[rd] = s.regs[rs1] + s.regs[rs2];
}
break;
case 0b0000011: // Load-Type (LW)
if (funct3 == 0b010) {
int32_t addr = s.regs[rs1] + imm_i;
if (rd != 0 && addr >= 0 && addr % 4 == 0 &&
(addr / 4) < mem_size_words) {
s.regs[rd] = memory_acc[addr / 4];
}
}
break;
case 0b0100011: // Store-Type (SW)
if (funct3 == 0b010) {
int32_t addr = s.regs[rs1] + imm_s;
if (addr >= 0 && addr % 4 == 0 && (addr / 4) < mem_size_words) {
memory_acc[addr / 4] = s.regs[rs2];
}
}
break;
default:
os << "Unrecognized instruction: " << instr << sycl::endl;
break;
}
s.regs[0] = 0;
s.pc = next_pc;
}
});
}).wait();
} catch (sycl::exception const &e) {
std::cerr << "SYCL exception caught: " << e.what() << "\n";
return 1;
}
sycl::host_accessor final_state(state_buf);
sycl::host_accessor final_memory(memory_buf);
std::cout << "Final PC: " << final_state[0].pc << "\n";
std::cout << "Register Values:\n";
std::cout << "x1 (r1): " << final_state[0].regs[1] << " (Expected: 5)\n";
std::cout << "x2 (r2): " << final_state[0].regs[2] << " (Expected: 10)\n";
std::cout << "x3 (r3): " << final_state[0].regs[3] << " (Expected: 15)\n";
std::cout << "x4 (r4): " << final_state[0].regs[4]
<< " (Expected: 15 after LW)\n";
std::cout << "\nMemory Content:\n";
std::cout << "Memory[16]: " << final_memory[4] << " (Expected: 15)\n";
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment