From dc7d43e1aa0ecad89ead08a771b21f61816f3c84 Mon Sep 17 00:00:00 2001 From: Rahul Bramandlapalli Date: Wed, 8 Oct 2025 21:45:53 +0530 Subject: [PATCH 1/2] Support multi ctrlpkt sections in ELF Signed-off-by: Rahul Bramandlapalli --- src/runtime_src/core/common/api/module_int.h | 3 +- .../core/common/api/xrt_module.cpp | 118 +++++++++++++++++- 2 files changed, 119 insertions(+), 2 deletions(-) diff --git a/src/runtime_src/core/common/api/module_int.h b/src/runtime_src/core/common/api/module_int.h index b0116094880..d80bfe6fe87 100644 --- a/src/runtime_src/core/common/api/module_int.h +++ b/src/runtime_src/core/common/api/module_int.h @@ -29,7 +29,8 @@ enum class buf_type { ctrlpkt_pm = 5, // preemption ctrl pkt pad = 6, // scratchpad/control packet section name for next gen aie devices dump = 7, // dump section containing debug info for trace etc - buf_type_count = 8 // total number of buf types + ctrlpkt = 8, // control packet section in aie2ps/aie4 new ELFs + buf_type_count = 9 // total number of buf types }; } diff --git a/src/runtime_src/core/common/api/xrt_module.cpp b/src/runtime_src/core/common/api/xrt_module.cpp index 7aed7ba34db..62a82db8a79 100644 --- a/src/runtime_src/core/common/api/xrt_module.cpp +++ b/src/runtime_src/core/common/api/xrt_module.cpp @@ -184,7 +184,8 @@ struct patcher ".pdi", ".ctrlpkt.pm", ".pad", - ".dump" + ".dump", + ".ctrlpkt" }; return section_name_array[static_cast(bt)]; @@ -616,6 +617,16 @@ class module_impl throw std::runtime_error("Not supported"); } + // Returns map of ctrl pkt buffers map for given id + // key : ctrl pkt section name + // value : ctrl pkt buffer + // Applicable for newer aie2ps/aie4 ELFs + virtual const std::map& + get_ctrlpkt_buf_map(uint32_t /*id*/) const + { + throw std::runtime_error("Not supported"); + } + // Scratchpad memory is used to store contents of L2 memory // during preemption. So scratchpad memory size is specific // to device. @@ -1586,6 +1597,10 @@ class module_elf_aie2ps : public module_elf // key : control code id (grp sec idx), value : vector of column ctrlcodes std::map> m_ctrlcodes_map; + // map for holding multiple ctrl pkt sections data for each sub kernel + // key : control code id (grp sec idx), value : map of ctrl pkt section name and data + std::map> m_ctrlpkt_buf_map; + // map to hold .dump section of different sub kernels used for debug/trace std::map m_dump_buf_map; @@ -1715,6 +1730,24 @@ class module_elf_aie2ps : public module_elf } } + // Parse the ELF and extract all ctrlpkt sections data + // There can be multiple ctrlpkt sections for each sub kernel + // store them in map with key as section name and value as section data + void + initialize_ctrlpkt_bufs() + { + for (const auto& sec : m_elfio.sections) { + auto name = sec->get_name(); + if (name.find(patcher::to_string(xrt_core::patcher::buf_type::ctrlpkt)) == std::string::npos) + continue; + + buf ctrlpkt_buf; + ctrlpkt_buf.append_section_data(sec.get()); + auto grp_idx = m_sec_to_grp_map[sec->get_index()]; + m_ctrlpkt_buf_map[grp_idx][name] = std::move(ctrlpkt_buf); + } + } + void initialize_arg_patchers(const std::map>& pad_offsets) { @@ -1768,6 +1801,12 @@ class module_elf_aie2ps : public module_elf abs_offset += rela->r_offset; buf_type = xrt_core::patcher::buf_type::pad; } + else if (patch_sec_name.find(patcher::to_string(xrt_core::patcher::buf_type::ctrlpkt)) != std::string::npos) { + // section to patch is ctrlpkt + // this section is present in new ELFs + abs_offset += rela->r_offset; + buf_type = xrt_core::patcher::buf_type::ctrlpkt; + } else { // section to patch is ctrlcode // Get control code section referenced by the symbol, col, and page @@ -1844,6 +1883,7 @@ class module_elf_aie2ps : public module_elf { std::map> pad_offsets; initialize_column_ctrlcode(pad_offsets, ::is_group_elf(xrt_core::elf_int::get_elfio(elf))); + initialize_ctrlpkt_bufs(); initialize_arg_patchers(pad_offsets); initialize_dump_buf(); } @@ -1864,6 +1904,16 @@ class module_elf_aie2ps : public module_elf return empty_vec; } + const std::map& + get_ctrlpkt_buf_map(uint32_t id) const override + { + if (auto it = m_ctrlpkt_buf_map.find(id); it != m_ctrlpkt_buf_map.end()) + return it->second; + + static const std::map empty_map; + return empty_map; + } + const buf& get_dump_buf(uint32_t id) const override { @@ -1939,6 +1989,12 @@ class module_sram : public module_impl xrt::bo m_preempt_save_bo; xrt::bo m_preempt_restore_bo; + // map of ctrlpkt buffers + // This is for ELFs that have multiple ctrlpkt sections (aie2ps/aie4) + // key : ctrlpkt section name + // value : xrt::bo filled with corresponding section data + std::map m_ctrlpkt_bos; + // map of ctrlpkt preemption buffers // key : dynamic symbol patch name of ctrlpkt-pm // value : xrt::bo filled with corresponding section data @@ -2072,6 +2128,16 @@ class module_sram : public module_impl m_patched_args.insert(sym_name); offset += col_data[i].size(); } + + // Iterate over all control packets for this sub kernel and + // patch it in instruction buffer + for (const auto& [name, ctrlpktbo] : m_ctrlpkt_bos) { + // symbol name will be same as section name without the grp idx + auto sym_name = name.substr(0, name.rfind('.')); + if (patch_instr_value(m_buffer, sym_name, std::numeric_limits::max(), ctrlpktbo.address(), + xrt_core::patcher::buf_type::ctrltext, m_ctrl_code_id)) + m_patched_args.insert(sym_name); + } } void @@ -2243,6 +2309,33 @@ class module_sram : public module_impl } } + // Create control pkt buffers for all ctrlpkt sections of this sub kernel + // These buffer addresses are patched in instruction buffer + void + create_ctrlpkt_bufs(const module_impl* parent) + { + const auto& ctrlpkt_map = parent->get_ctrlpkt_buf_map(m_ctrl_code_id); + if (ctrlpkt_map.empty()) + return; // older Elfs have ctrlpkt in pad section + + // Create ctrlpkt bo's for all ctrlpkt sections + for (const auto& [name, buf] : ctrlpkt_map) { + m_ctrlpkt_bos[name] = xrt::ext::bo{ m_hwctx, buf.size() }; + fill_bo_with_data(m_ctrlpkt_bos[name], buf); + } + + if (is_dump_control_packet()) { + for (auto& [name, bo] : m_ctrlpkt_bos) { + std::string dump_file_name = name + "_pre_patch" + std::to_string(get_id()) + ".bin"; + dump_bo(bo, dump_file_name); + + std::stringstream ss; + ss << "dumped file " << dump_file_name; + xrt_core::message::send(xrt_core::message::severity_level::debug, "xrt_module", ss.str()); + } + } + } + // Create the instruction buffer with all columns data along with pad section void create_instruction_buffer(const module_impl* parent) @@ -2296,6 +2389,14 @@ class module_sram : public module_impl if (m_parent->patch_it(m_buffer, argnm, index, value, xrt_core::patcher::buf_type::pad, m_ctrl_code_id, m_first_patch)) patched = true; + + // New Elfs have multiple ctrlpkt sections + // Iterate over all ctrlpkt buffers and patch them + for (const auto& ctrlpkt : m_ctrlpkt_bos) { + if (m_parent->patch_it(ctrlpkt.second, argnm, index, value, xrt_core::patcher::buf_type::ctrlpkt, + m_ctrl_code_id, m_first_patch)) + patched = true; + } } if (patched) { @@ -2360,6 +2461,20 @@ class module_sram : public module_impl ss << "dumped file " << dump_file_name; xrt_core::message::send(xrt_core::message::severity_level::debug, "xrt_module", ss.str()); } + + for (auto& [name, bo] : m_ctrlpkt_bos) { + if (m_first_patch) + bo.sync(XCL_BO_SYNC_BO_TO_DEVICE); + + if (is_dump_control_packet()) { + std::string dump_file_name = name + "_pre_patch" + std::to_string(get_id()) + ".bin"; + dump_bo(bo, dump_file_name); + + std::stringstream ss; + ss << "dumped file " << dump_file_name; + xrt_core::message::send(xrt_core::message::severity_level::debug, "xrt_module", ss.str()); + } + } } else if (os_abi == Elf_Amd_Aie2p) { if (m_first_patch) @@ -2627,6 +2742,7 @@ class module_sram : public module_impl } else if (os_abi == Elf_Amd_Aie2ps || os_abi == Elf_Amd_Aie2ps_group) { initialize_dtrace_buf(m_parent.get()); + create_ctrlpkt_bufs(m_parent.get()); create_instruction_buffer(m_parent.get()); fill_column_bo_address(m_parent->get_data(m_ctrl_code_id)); } From 2141463dda88a2ac54b0b411d881e96eccf52ab2 Mon Sep 17 00:00:00 2001 From: Rahul Bramandlapalli Date: Tue, 14 Oct 2025 12:09:46 +0530 Subject: [PATCH 2/2] Add missing ctrlpkt patching in aie4 Signed-off-by: Rahul Bramandlapalli --- .../core/common/api/xrt_module.cpp | 26 ++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/src/runtime_src/core/common/api/xrt_module.cpp b/src/runtime_src/core/common/api/xrt_module.cpp index 62a82db8a79..8e90e83ca89 100644 --- a/src/runtime_src/core/common/api/xrt_module.cpp +++ b/src/runtime_src/core/common/api/xrt_module.cpp @@ -157,7 +157,8 @@ struct patcher shim_dma_aie4_base_addr_symbol_kind = 6, // patching scheme needed by AIE4 firmware control_packet_57 = 7, // patching scheme needed by firmware to patch control packet for aie2ps address_64 = 8, // patching scheme needed to patch pdi address - unknown_symbol_kind = 9 + control_packet_57_aie4 = 9, // patching scheme needed by firmware to patch control packet for aie4 + unknown_symbol_kind = 10 }; xrt_core::patcher::buf_type m_buf_type = xrt_core::patcher::buf_type::ctrltext; @@ -290,6 +291,20 @@ struct patcher bd_data_ptr[2] = (bd_data_ptr[2] & 0xFFFF0000) | (base_address >> 32); // NOLINT } + void + patch_ctrl57_aie4(uint32_t* bd_data_ptr, uint64_t patch) const + { + // This patching scheme is originated from NPU firmware + constexpr uint64_t ddr_aie_addr_offset = 0x80000000; + + // bd_data_ptr is a pointer to the header of the control code + uint64_t base_address = (((uint64_t)bd_data_ptr[1] & 0x1FFFFFF) << 32) | bd_data_ptr[2]; // NOLINT + + base_address += patch + ddr_aie_addr_offset; + bd_data_ptr[2] = (uint32_t)(base_address & 0xFFFFFFFF); // NOLINT + bd_data_ptr[1] = (bd_data_ptr[0] & 0xFE000000) | ((base_address >> 32) & 0x1FFFFFF); // NOLINT + } + template void patch_it_impl(T base_or_bo, uint64_t new_value, bool first) @@ -392,6 +407,15 @@ struct patcher sync(3 * sizeof(uint32_t)); // NOLINT } break; + case symbol_type::control_packet_57_aie4: + // new_value is a bo address + patch_ctrl57_aie4(bd_data_ptr, new_value + item.offset_to_base_bo_addr); + if (!first) { + // Data in this case is written till 2nd offset of bd_data_ptr + // so syncing 3 words + sync(3 * sizeof(uint32_t)); // NOLINT + } + break; default: throw std::runtime_error("Unsupported symbol type"); }