From 11223f09d3122ded895a5864b1ef63a27cc6715e Mon Sep 17 00:00:00 2001 From: zah Date: Tue, 20 May 2025 09:12:44 +0300 Subject: [PATCH] feat(native): match pure tracer output --- .../ext/native_tracer/src/lib.rs | 259 +++++++++++++++++- gems/native-tracer/lib/native_trace.rb | 34 ++- test/test_tracer.rb | 10 +- 3 files changed, 282 insertions(+), 21 deletions(-) diff --git a/gems/native-tracer/ext/native_tracer/src/lib.rs b/gems/native-tracer/ext/native_tracer/src/lib.rs index 832d2fa..cff3db4 100644 --- a/gems/native-tracer/ext/native_tracer/src/lib.rs +++ b/gems/native-tracer/ext/native_tracer/src/lib.rs @@ -10,15 +10,20 @@ use std::{ use rb_sys::{ rb_add_event_hook2, rb_remove_event_hook_with_data, rb_define_class, - rb_define_alloc_func, rb_define_method, + rb_define_alloc_func, rb_define_method, rb_funcall, rb_intern, rb_event_hook_flag_t::RUBY_EVENT_HOOK_FLAG_RAW_ARG, rb_event_flag_t, rb_trace_arg_t, - rb_tracearg_event_flag, rb_tracearg_lineno, rb_tracearg_path, - rb_cObject, VALUE, ID, RUBY_EVENT_LINE, - RSTRING_PTR, RSTRING_LEN, + rb_tracearg_event_flag, rb_tracearg_lineno, rb_tracearg_path, rb_tracearg_self, + rb_tracearg_binding, rb_tracearg_callee_id, rb_tracearg_return_value, + rb_tracearg_raised_exception, + rb_cObject, VALUE, ID, RUBY_EVENT_LINE, RUBY_EVENT_CALL, RUBY_EVENT_RETURN, + RUBY_EVENT_RAISE, rb_raise, rb_eIOError, + rb_sym2id, rb_id2name, rb_id2sym, rb_obj_classname, rb_num2long }; -use runtime_tracing::{Tracer, Line}; +use rb_sys::{RARRAY_LEN, RARRAY_CONST_PTR, RSTRING_LEN, RSTRING_PTR, RB_INTEGER_TYPE_P, RB_TYPE_P, RB_SYMBOL_P, NIL_P}; +use rb_sys::{Qtrue, Qfalse, Qnil}; +use runtime_tracing::{Tracer, Line, ValueRecord, TypeKind, EventLogKind, TraceLowLevelEvent, CallRecord, FullValueRecord, ReturnRecord, RecordEvent}; #[repr(C)] struct RTypedData { @@ -86,7 +91,17 @@ unsafe fn get_recorder(obj: VALUE) -> *mut Recorder { } unsafe extern "C" fn ruby_recorder_alloc(klass: VALUE) -> VALUE { - let recorder = Box::new(Recorder { tracer: Tracer::new("ruby", &vec![]), active: false }); + let mut tracer = Tracer::new("ruby", &vec![]); + // pre-register common types to match the pure Ruby tracer + tracer.ensure_type_id(TypeKind::Int, "Integer"); + tracer.ensure_type_id(TypeKind::String, "String"); + tracer.ensure_type_id(TypeKind::Bool, "Bool"); + tracer.ensure_type_id(TypeKind::String, "Symbol"); + tracer.ensure_type_id(TypeKind::Error, "No type"); + let path = Path::new(""); + let func_id = tracer.ensure_function_id("", path, Line(1)); + tracer.events.push(TraceLowLevelEvent::Call(CallRecord { function_id: func_id, args: vec![] })); + let recorder = Box::new(Recorder { tracer, active: false }); let ty = std::ptr::addr_of!(RECORDER_TYPE) as *const rb_data_type_t; rb_data_typed_object_wrap(klass, Box::into_raw(recorder) as *mut c_void, ty) } @@ -96,7 +111,12 @@ unsafe extern "C" fn enable_tracing(self_val: VALUE) -> VALUE { if !recorder.active { let raw_cb: unsafe extern "C" fn(VALUE, *mut rb_trace_arg_t) = event_hook_raw; let cb: unsafe extern "C" fn(rb_event_flag_t, VALUE, VALUE, ID, VALUE) = transmute(raw_cb); - rb_add_event_hook2(Some(cb), RUBY_EVENT_LINE, self_val, RUBY_EVENT_HOOK_FLAG_RAW_ARG); + rb_add_event_hook2( + Some(cb), + RUBY_EVENT_LINE | RUBY_EVENT_CALL | RUBY_EVENT_RETURN | RUBY_EVENT_RAISE, + self_val, + RUBY_EVENT_HOOK_FLAG_RAW_ARG, + ); recorder.active = true; } rb_sys::Qnil.into() @@ -124,6 +144,142 @@ fn flush_to_dir(tracer: &Tracer, dir: &Path) -> Result<(), Box Option { + if ptr.is_null() { + return None; + } + CStr::from_ptr(ptr).to_str().ok().map(|s| s.to_string()) +} + +unsafe fn value_to_string(val: VALUE) -> Option { + if RB_TYPE_P(val, rb_sys::ruby_value_type::RUBY_T_STRING) { + let ptr = RSTRING_PTR(val); + let len = RSTRING_LEN(val) as usize; + let slice = std::slice::from_raw_parts(ptr as *const u8, len); + return Some(String::from_utf8_lossy(slice).to_string()); + } + let to_s_id = rb_intern(b"to_s\0".as_ptr() as *const c_char); + let str_val = rb_funcall(val, to_s_id, 0); + let ptr = RSTRING_PTR(str_val); + let len = RSTRING_LEN(str_val) as usize; + let slice = std::slice::from_raw_parts(ptr as *const u8, len); + Some(String::from_utf8_lossy(slice).to_string()) +} + +unsafe fn to_value(tracer: &mut Tracer, val: VALUE, depth: usize) -> ValueRecord { + if depth == 0 { + let type_id = tracer.ensure_type_id(TypeKind::Error, "No type"); + return ValueRecord::None { type_id }; + } + if NIL_P(val) { + let type_id = tracer.ensure_type_id(TypeKind::Error, "No type"); + return ValueRecord::None { type_id }; + } + if val == (Qtrue as VALUE) || val == (Qfalse as VALUE) { + let type_id = tracer.ensure_type_id(TypeKind::Bool, "Bool"); + return ValueRecord::Bool { b: val == (Qtrue as VALUE), type_id }; + } + if RB_INTEGER_TYPE_P(val) { + let i = rb_num2long(val) as i64; + let type_id = tracer.ensure_type_id(TypeKind::Int, "Integer"); + return ValueRecord::Int { i, type_id }; + } + if RB_SYMBOL_P(val) { + let id = rb_sym2id(val); + let name = CStr::from_ptr(rb_id2name(id)).to_str().unwrap_or(""); + let type_id = tracer.ensure_type_id(TypeKind::String, "Symbol"); + return ValueRecord::String { text: name.to_string(), type_id }; + } + if RB_TYPE_P(val, rb_sys::ruby_value_type::RUBY_T_STRING) { + let ptr = RSTRING_PTR(val); + let len = RSTRING_LEN(val) as usize; + let slice = std::slice::from_raw_parts(ptr as *const u8, len); + let type_id = tracer.ensure_type_id(TypeKind::String, "String"); + return ValueRecord::String { text: String::from_utf8_lossy(slice).to_string(), type_id }; + } + if RB_TYPE_P(val, rb_sys::ruby_value_type::RUBY_T_ARRAY) { + let len = RARRAY_LEN(val) as usize; + let mut elements = Vec::new(); + let ptr = RARRAY_CONST_PTR(val); + for i in 0..len { + let elem = *ptr.add(i); + elements.push(to_value(tracer, elem, depth - 1)); + } + let type_id = tracer.ensure_type_id(TypeKind::Seq, "Array"); + return ValueRecord::Sequence { elements, is_slice: false, type_id }; + } + let class_name = cstr_to_string(rb_obj_classname(val)).unwrap_or_else(|| "Object".to_string()); + let text = value_to_string(val).unwrap_or_default(); + let type_id = tracer.ensure_type_id(TypeKind::Raw, &class_name); + ValueRecord::Raw { r: text, type_id } +} + +unsafe fn record_variables(tracer: &mut Tracer, binding: VALUE) -> Vec { + let mut result = Vec::new(); + let locals_id = rb_intern(b"local_variables\0".as_ptr() as *const c_char); + let get_id = rb_intern(b"local_variable_get\0".as_ptr() as *const c_char); + let vars = rb_funcall(binding, locals_id, 0); + let len = RARRAY_LEN(vars) as usize; + let ptr = RARRAY_CONST_PTR(vars); + for i in 0..len { + let sym = *ptr.add(i); + let id = rb_sym2id(sym); + let name = CStr::from_ptr(rb_id2name(id)).to_str().unwrap_or(""); + let value = rb_funcall(binding, get_id, 1, sym); + let val_rec = to_value(tracer, value, 10); + tracer.register_variable_with_full_value(name, val_rec.clone()); + let var_id = tracer.ensure_variable_id(name); + result.push(FullValueRecord { variable_id: var_id, value: val_rec }); + } + result +} + +unsafe fn record_parameters(tracer: &mut Tracer, binding: VALUE, defined_class: VALUE, mid: ID, register: bool) -> Vec { + let mut result = Vec::new(); + let inst_meth_id = rb_intern(b"instance_method\0".as_ptr() as *const c_char); + let parameters_id = rb_intern(b"parameters\0".as_ptr() as *const c_char); + let local_get_id = rb_intern(b"local_variable_get\0".as_ptr() as *const c_char); + let method_sym = rb_id2sym(mid); + let method_obj = rb_funcall(defined_class, inst_meth_id, 1, method_sym); + let params_ary = rb_funcall(method_obj, parameters_id, 0); + let params_len = RARRAY_LEN(params_ary) as usize; + let params_ptr = RARRAY_CONST_PTR(params_ary); + for i in 0..params_len { + let pair = *params_ptr.add(i); + if RARRAY_LEN(pair) < 2 { + continue; + } + let pair_ptr = RARRAY_CONST_PTR(pair); + let name_sym = *pair_ptr.add(1); + if NIL_P(name_sym) { + continue; + } + let name_id = rb_sym2id(name_sym); + let name_c = rb_id2name(name_id); + if name_c.is_null() { + continue; + } + let name = CStr::from_ptr(name_c).to_str().unwrap_or(""); + let value = rb_funcall(binding, local_get_id, 1, name_sym); + let val_rec = to_value(tracer, value, 10); + if register { + tracer.register_variable_with_full_value(name, val_rec.clone()); + let var_id = tracer.ensure_variable_id(name); + result.push(FullValueRecord { variable_id: var_id, value: val_rec }); + } + } + result +} + +unsafe fn record_event(tracer: &mut Tracer, path: &str, line: i64, content: &str) { + tracer.register_step(Path::new(path), Line(line)); + tracer.events.push(TraceLowLevelEvent::Event(RecordEvent { + kind: EventLogKind::Write, + metadata: String::new(), + content: content.to_string(), + })); +} + unsafe extern "C" fn flush_trace(self_val: VALUE, out_dir: VALUE) -> VALUE { let recorder_ptr = get_recorder(self_val); let recorder = &mut *recorder_ptr; @@ -143,6 +299,21 @@ unsafe extern "C" fn flush_trace(self_val: VALUE, out_dir: VALUE) -> VALUE { rb_sys::Qnil.into() } +unsafe extern "C" fn record_event_api(self_val: VALUE, path: VALUE, line: VALUE, content: VALUE) -> VALUE { + let recorder = &mut *get_recorder(self_val); + let path_str = if NIL_P(path) { + "".to_string() + } else { + let ptr = RSTRING_PTR(path); + let len = RSTRING_LEN(path) as usize; + String::from_utf8_lossy(std::slice::from_raw_parts(ptr as *const u8, len)).to_string() + }; + let line_num = rb_num2long(line) as i64; + let content_str = value_to_string(content).unwrap_or_default(); + record_event(&mut recorder.tracer, &path_str, line_num, &content_str); + rb_sys::Qnil.into() +} + /// Raw-argument callback (Ruby will call it when we set /// `RUBY_EVENT_HOOK_FLAG_RAW_ARG`). /// @@ -161,16 +332,76 @@ unsafe extern "C" fn event_hook_raw(data: VALUE, arg: *mut rb_trace_arg_t) { } let ev: rb_event_flag_t = rb_tracearg_event_flag(arg); - if (ev & RUBY_EVENT_LINE) == 0 { + let path_val = rb_tracearg_path(arg); + let line_val = rb_tracearg_lineno(arg); + let path = if NIL_P(path_val) { + "".to_string() + } else { + let ptr = RSTRING_PTR(path_val); + let len = RSTRING_LEN(path_val) as usize; + String::from_utf8_lossy(std::slice::from_raw_parts(ptr as *const u8, len)).to_string() + }; + let line = rb_num2long(line_val) as i64; + if path.contains("native_trace.rb") { return; } - let path_ptr = rb_tracearg_path(arg) as *const c_char; - let line = rb_tracearg_lineno(arg) as i64; + if (ev & RUBY_EVENT_LINE) != 0 { + let binding = rb_tracearg_binding(arg); + recorder.tracer.register_step(Path::new(&path), Line(line)); + if !NIL_P(binding) { + record_variables(&mut recorder.tracer, binding); + } + } else if (ev & RUBY_EVENT_CALL) != 0 { + let binding = rb_tracearg_binding(arg); + + let self_val = rb_tracearg_self(arg); + let mid_sym = rb_tracearg_callee_id(arg); + let mid = rb_sym2id(mid_sym); + let class_id = rb_intern(b"class\0".as_ptr() as *const c_char); + let defined_class = rb_funcall(self_val, class_id, 0); + if !NIL_P(binding) { + // register parameter types first + let _ = record_parameters(&mut recorder.tracer, binding, defined_class, mid, false); + } + let self_rec = to_value(&mut recorder.tracer, self_val, 10); + recorder + .tracer + .register_variable_with_full_value("self", self_rec.clone()); - if !path_ptr.is_null() { - if let Ok(path) = CStr::from_ptr(path_ptr).to_str() { - recorder.tracer.register_step(Path::new(path), Line(line)); + let mut args = if NIL_P(binding) { + vec![] + } else { + record_parameters(&mut recorder.tracer, binding, defined_class, mid, true) + }; + args.insert(0, recorder.tracer.arg("self", self_rec)); + recorder.tracer.register_step(Path::new(&path), Line(line)); + let name_c = rb_id2name(mid); + let mut name = if !name_c.is_null() { + CStr::from_ptr(name_c).to_str().unwrap_or("").to_string() + } else { + String::new() + }; + let class_name = cstr_to_string(rb_obj_classname(self_val)).unwrap_or_else(|| "Object".to_string()); + if class_name != "Object" { + name = format!("{}#{}", class_name, name); + } + let fid = recorder.tracer.ensure_function_id(&name, Path::new(&path), Line(line)); + recorder.tracer.events.push(TraceLowLevelEvent::Call(CallRecord { function_id: fid, args })); + } else if (ev & RUBY_EVENT_RETURN) != 0 { + recorder.tracer.register_step(Path::new(&path), Line(line)); + let ret = rb_tracearg_return_value(arg); + let val_rec = to_value(&mut recorder.tracer, ret, 10); + recorder.tracer.register_variable_with_full_value("", val_rec.clone()); + recorder.tracer.events.push(TraceLowLevelEvent::Return(ReturnRecord { return_value: val_rec })); + } else if (ev & RUBY_EVENT_RAISE) != 0 { + let exc = rb_tracearg_raised_exception(arg); + if let Some(msg) = value_to_string(exc) { + recorder.tracer.events.push(TraceLowLevelEvent::Event(RecordEvent { + kind: EventLogKind::Error, + metadata: String::new(), + content: msg, + })); } } } @@ -183,8 +414,10 @@ pub extern "C" fn Init_codetracer_ruby_recorder() { let enable_cb: unsafe extern "C" fn(VALUE) -> VALUE = enable_tracing; let disable_cb: unsafe extern "C" fn(VALUE) -> VALUE = disable_tracing; let flush_cb: unsafe extern "C" fn(VALUE, VALUE) -> VALUE = flush_trace; + let event_cb: unsafe extern "C" fn(VALUE, VALUE, VALUE, VALUE) -> VALUE = record_event_api; rb_define_method(class, b"enable_tracing\0".as_ptr() as *const c_char, Some(transmute(enable_cb)), 0); rb_define_method(class, b"disable_tracing\0".as_ptr() as *const c_char, Some(transmute(disable_cb)), 0); rb_define_method(class, b"flush_trace\0".as_ptr() as *const c_char, Some(transmute(flush_cb)), 1); + rb_define_method(class, b"record_event\0".as_ptr() as *const c_char, Some(transmute(event_cb)), 3); } } diff --git a/gems/native-tracer/lib/native_trace.rb b/gems/native-tracer/lib/native_trace.rb index b16939a..a60f1ce 100644 --- a/gems/native-tracer/lib/native_trace.rb +++ b/gems/native-tracer/lib/native_trace.rb @@ -49,12 +49,44 @@ begin require target_path recorder = RubyRecorder.new - recorder.enable_tracing + $recorder = recorder + + module Kernel + alias :old_p :p + alias :old_puts :puts + alias :old_print :print + + def p(*args) + if $recorder + loc = caller_locations(1,1).first + $recorder.record_event(loc.path, loc.lineno, args.join("\n")) + end + old_p(*args) + end + + def puts(*args) + if $recorder + loc = caller_locations(1,1).first + $recorder.record_event(loc.path, loc.lineno, args.join("\n")) + end + old_puts(*args) + end + + def print(*args) + if $recorder + loc = caller_locations(1,1).first + $recorder.record_event(loc.path, loc.lineno, args.join("\n")) + end + old_print(*args) + end + end + rescue Exception => e warn "native tracer unavailable: #{e}" end program = ARGV.shift +recorder.enable_tracing if recorder load program if recorder recorder.disable_tracing diff --git a/test/test_tracer.rb b/test/test_tracer.rb index 58cf564..b1c8475 100644 --- a/test/test_tracer.rb +++ b/test/test_tracer.rb @@ -52,16 +52,12 @@ def program_args(base) pure_trace, pure_out = run_trace('gems/pure-ruby-tracer/lib/trace.rb', "#{base}.rb", *program_args(base)) native_trace, native_out = run_trace('gems/native-tracer/lib/native_trace.rb', "#{base}.rb", *program_args(base)) - assert_equal expected_trace("#{base}.rb"), pure_trace + expected = expected_trace("#{base}.rb") + assert_equal expected, pure_trace + assert_equal expected, native_trace expected = expected_output("#{base}.rb") assert_equal expected, pure_out assert_equal expected, native_out - - # The native tracer doesn't yet match the pure Ruby tracer, but it should - # still generate some trace output. Fail early if the trace is empty to - # ensure the extension was loaded correctly. - refute_nil native_trace, 'native tracer did not produce a trace file' - refute_empty native_trace, 'native tracer produced an empty trace' end end end