diff --git a/.agents/codebase-insights.txt b/.agents/codebase-insights.txt index a886c88..c144ea7 100644 --- a/.agents/codebase-insights.txt +++ b/.agents/codebase-insights.txt @@ -1 +1,6 @@ -No insights yet. Please add content here and remove this line. +When the pure Ruby recorder traces a script that holds a reference to the +`PureRubyRecorder` instance in a local variable, the variable inspection code +would recursively serialise the tracer's internal state. This results in an +explosive amount of output and may appear as an infinite recursion when running +`examples/selective_tracing_pure.rb`. To avoid this, `load_variables` now skips +values that refer to the recorder or its `TraceRecord`. diff --git a/.agents/tasks/2025/05/30-1219-refactor-gems b/.agents/tasks/2025/05/30-1219-refactor-gems new file mode 100644 index 0000000..5de52e9 --- /dev/null +++ b/.agents/tasks/2025/05/30-1219-refactor-gems @@ -0,0 +1,16 @@ +A previous developer got the following task: + +Currently, the gems in the repo are structured in a weird way. We have a very think executable file for each gem that just calls into another ruby script that used to be the primary executable. The other script now serves as a library, but has special code to detect when it's being executed. Let's make the second script a pure library and move all the "executable" code in the main gem binaries. + +He made really good progress in commit d007872908d4fb5dbe862549f825eec98e7721f0, but +he hasn't tested his code. + +Please test his changes and fix any issues that you find. + +He tried to implement one new feature: + +Both gem binaries now allow the standard "--" separator that specifies where +the arguments of the executed program begin. + +Please add a test case that uses this notation to make sure its works correctly. +Of course, keep the existing tests that don't use this notation. diff --git a/.gitignore b/.gitignore index 4ed6b0c..4980dd5 100644 --- a/.gitignore +++ b/.gitignore @@ -13,5 +13,3 @@ pkg/ # Offline dependency sources .codex/deps_src/ .codex/internet_resources/ - -agents-workflow/ diff --git a/MAINTAINERS.md b/MAINTAINERS.md index 75389da..48412b2 100644 --- a/MAINTAINERS.md +++ b/MAINTAINERS.md @@ -23,7 +23,7 @@ just build-extension This compiles the extension in release mode using Cargo. The resulting shared library is placed under -`ext/native_tracer/target/release/` and is loaded by `gems/codetracer-ruby-recorder/lib/native_trace.rb`. +`ext/native_tracer/target/release/` and is loaded by `gems/codetracer-ruby-recorder/lib/codetracer_ruby_recorder.rb`. ## Running tests diff --git a/README.md b/README.md index f5f8a15..4b685a9 100644 --- a/README.md +++ b/README.md @@ -36,7 +36,7 @@ recorder.flush_trace(Dir.pwd) you can currently use it directly with ```bash -ruby gems/codetracer-pure-ruby-recorder/lib/trace.rb [--out-dir DIR] +ruby gems/codetracer-pure-ruby-recorder/bin/codetracer-pure-ruby-recorder [--out-dir DIR] # produces several trace json files in DIR, # or in `$CODETRACER_RUBY_RECORDER_OUT_DIR` if DIR is not provided. # Defaults to the current directory. @@ -47,7 +47,7 @@ You can also invoke a lightweight CLI that loads the native tracer extension directly: ```bash -ruby gems/codetracer-ruby-recorder/lib/native_trace.rb [--out-dir DIR] +ruby gems/codetracer-ruby-recorder/bin/codetracer-ruby-recorder [--out-dir DIR] # Uses DIR or `$CODETRACER_RUBY_RECORDER_OUT_DIR` to choose where traces are saved. ``` diff --git a/examples/selective_tracing.rb b/examples/selective_tracing.rb index edbc92a..47b6884 100755 --- a/examples/selective_tracing.rb +++ b/examples/selective_tracing.rb @@ -1,13 +1,9 @@ #!/usr/bin/env ruby -# Load the native extension only if RubyRecorder is not already available -# (e.g., when running directly without the codetracer wrapper) -unless defined?(RubyRecorder) - ext_base = File.expand_path('../gems/codetracer-ruby-recorder/ext/native_tracer/target/release/libcodetracer_ruby_recorder', __dir__) - require ext_base -end +ext_base = File.expand_path('../gems/codetracer-ruby-recorder/ext/native_tracer/target/release/libcodetracer_ruby_recorder', __dir__) +require ext_base -recorder = RubyRecorder.new +recorder = CodeTracer::RubyRecorder.new puts 'start trace' recorder.disable_tracing diff --git a/examples/selective_tracing_pure.rb b/examples/selective_tracing_pure.rb index fd6502b..2eda8a2 100755 --- a/examples/selective_tracing_pure.rb +++ b/examples/selective_tracing_pure.rb @@ -1,18 +1,15 @@ #!/usr/bin/env ruby -# Load the pure Ruby tracer library if RubyRecorder is not already defined -unless defined?(RubyRecorder) - lib_base = File.expand_path('../gems/codetracer-pure-ruby-recorder/lib/codetracer_pure_ruby_recorder', __dir__) - require lib_base -end +lib_base = File.expand_path('../gems/codetracer-pure-ruby-recorder/lib/codetracer_pure_ruby_recorder', __dir__) +require lib_base -recorder = RubyRecorder.new +recorder = CodeTracer::PureRubyRecorder.new puts 'start trace' -recorder.disable_tracing +recorder.stop puts 'this will not be traced' -recorder.enable_tracing +recorder.start puts 'this will be traced' -recorder.disable_tracing +recorder.stop puts 'tracing disabled' recorder.flush_trace(Dir.pwd) diff --git a/gems/codetracer-pure-ruby-recorder/bin/codetracer-pure-ruby-recorder b/gems/codetracer-pure-ruby-recorder/bin/codetracer-pure-ruby-recorder old mode 100644 new mode 100755 index 7adbdd7..35bc17c --- a/gems/codetracer-pure-ruby-recorder/bin/codetracer-pure-ruby-recorder +++ b/gems/codetracer-pure-ruby-recorder/bin/codetracer-pure-ruby-recorder @@ -1,4 +1,9 @@ #!/usr/bin/env ruby -require 'rbconfig' -script = File.expand_path('../lib/trace.rb', __dir__) -exec RbConfig.ruby, script, *ARGV +# SPDX-License-Identifier: MIT +# CLI for the pure Ruby tracer + +lib_dir = File.expand_path('../lib', __dir__) +$LOAD_PATH.unshift(lib_dir) unless $LOAD_PATH.include?(lib_dir) +require 'codetracer_pure_ruby_recorder' + +exit CodeTracer::PureRubyRecorder.parse_argv_and_trace_ruby_file(ARGV) diff --git a/gems/codetracer-pure-ruby-recorder/lib/codetracer/kernel_patches.rb b/gems/codetracer-pure-ruby-recorder/lib/codetracer/kernel_patches.rb index 7dc6689..06c5413 100644 --- a/gems/codetracer-pure-ruby-recorder/lib/codetracer/kernel_patches.rb +++ b/gems/codetracer-pure-ruby-recorder/lib/codetracer/kernel_patches.rb @@ -1,6 +1,6 @@ # SPDX-License-Identifier: MIT -module Codetracer +module CodeTracer module KernelPatches @@tracers = [] @@ -54,7 +54,6 @@ def self.uninstall(tracer) alias_method :p, :codetracer_original_p alias_method :puts, :codetracer_original_puts alias_method :print, :codetracer_original_print - remove_method :codetracer_original_p remove_method :codetracer_original_puts remove_method :codetracer_original_print diff --git a/gems/codetracer-pure-ruby-recorder/lib/codetracer_pure_ruby_recorder.rb b/gems/codetracer-pure-ruby-recorder/lib/codetracer_pure_ruby_recorder.rb index 115e0bf..b7ffc5b 100644 --- a/gems/codetracer-pure-ruby-recorder/lib/codetracer_pure_ruby_recorder.rb +++ b/gems/codetracer-pure-ruby-recorder/lib/codetracer_pure_ruby_recorder.rb @@ -1,43 +1,299 @@ -require_relative 'trace' - -# Ruby implementation of the RubyRecorder API used by the native tracer. -# Provides basic tracing controls and serialization using the pure Ruby tracer. -class RubyRecorder - def initialize(debug: ENV['CODETRACER_RUBY_RECORDER_DEBUG'] == '1') - @record = $codetracer_record - @tracer = Tracer.new(@record, debug: debug) - setup_defaults - end +# SPDX-License-Identifier: MIT +# Copyright (c) 2025 Metacraft Labs Ltd +# See LICENSE file in the project root for full license information. - # Enable tracing of Ruby code execution. - def enable_tracing - @tracer.activate - end +require 'json' +require 'optparse' +require_relative 'recorder' +require_relative 'codetracer/kernel_patches' - # Disable tracing without discarding collected data. - def disable_tracing - @tracer.deactivate - end +module CodeTracer + class PureRubyRecorder + attr_accessor :calls_tracepoint, :return_tracepoint, + :line_tracepoint, :raise_tracepoint, :tracing - # Serialize the trace to +out_dir+. - def flush_trace(out_dir) - @tracer.stop_tracing - @record.serialize('', out_dir) - end + attr_reader :ignore_list, :record, :debug - # Record a custom event at +path+ and +line+ with +content+. - def record_event(path, line, content) - @tracer.record_event(["#{path}:#{line}"], content) - end + def self.parse_argv_and_trace_ruby_file(argv) + require 'optparse' + lib_dir = File.expand_path('../lib', __dir__) + $LOAD_PATH.unshift(lib_dir) unless $LOAD_PATH.include?(lib_dir) + + options = {} + parser = OptionParser.new do |opts| + opts.banner = "usage: codetracer-pure-ruby-recorder [options] []" + opts.on('-o DIR', '--out-dir DIR', 'Directory to write trace files') { |dir| options[:out_dir] = dir } + opts.on('-h', '--help', 'Print this help') { puts opts; exit } + end + parser.order!(argv) + + program = argv.shift + if program.nil? + $stderr.puts parser + exit 1 + end + + # Remaining arguments after the program name are passed to the traced program + program_args = argv.dup + + out_dir = options[:out_dir] || ENV['CODETRACER_RUBY_RECORDER_OUT_DIR'] || Dir.pwd + trace_ruby_file(program, out_dir, program_args) + 0 + end + + def self.trace_ruby_file(program, out_dir, program_args = []) + tracer = PureRubyRecorder.new(debug: ENV['CODETRACER_RUBY_RECORDER_DEBUG'] == '1') + + tracer.record.register_call('', 1, '', []) + tracer.ignore('lib/ruby') + tracer.ignore('codetracer_pure_ruby_recoreder.rb') + tracer.ignore('recorder.rb') + tracer.ignore(' e + if tracer.debug + codetracer_original_puts '' + codetracer_original_puts '==== trace.rb error while tracing program ===' + codetracer_original_puts 'ERROR' + codetracer_original_puts e + codetracer_original_puts e.backtrace + codetracer_original_puts '=====================' + codetracer_original_puts '' + end + ensure + # Restore original ARGV + ARGV.clear + ARGV.concat(original_argv) + end + end + + tracer.record.serialize(program, out_dir) + 0 + end + + def initialize(debug: false) + @tracing = false + @record = TraceRecord.new + @ignore_list = [] + @debug = debug + @record.debug = debug if @record.respond_to?(:debug=) + setup_tracepoints + end + + def tracks_call?(tp) + tp.path.end_with?('.rb') && !@ignore_list.any? { |path| tp.path.include?(path) } + end + + def ignore(path) + @ignore_list << path + end + + def setup_tracepoints + @calls_tracepoint = TracePoint.new(:call) do |tp| + disable_tracepoints + record_call(tp) + enable_tracepoints + end + + @return_tracepoint = TracePoint.new(:return) do |tp| + disable_tracepoints + record_return(tp) + enable_tracepoints + end + + @line_tracepoint = TracePoint.new(:line) do |tp| + disable_tracepoints + record_step(tp) + enable_tracepoints + end + + @raise_tracepoint = TracePoint.new(:raise) do |tp| + disable_tracepoints + record_exception(tp) + enable_tracepoints + end + end + + def prepare_args(tp) + args_after_self = tp.parameters.map do |(kind, name)| + raw = if tp.binding.nil? || name.nil? + nil + else + begin + tp.binding.local_variable_get(name) + rescue + nil + end + end + + value = if raw.nil? + @record.nil_value + elsif raw.equal?(self) || raw.equal?(@record) + @record.raw_obj_value(raw.to_s, raw.class.name) + else + @record.to_value(raw) + end + + [name.to_sym, value] + end + + # can be class or module + module_name = tp.self.class.name + begin + args = [[:self, @record.raw_obj_value(tp.self.to_s, module_name)]] + args_after_self + rescue + # $stderr.write("error args\n") + args = [] + end + + args.each do |(name, value)| + @record.register_variable(name, value) + end + + arg_records = args.map do |(name, value)| + [@record.load_variable_id(name), value] + end + + arg_records + end + + def record_call(tp) + if self.tracks_call?(tp) + module_name = tp.self.class.name + method_name_prefix = module_name == 'Object' ? '' : "#{module_name}#" + method_name = "#{method_name_prefix}#{tp.method_id}" + if @debug + codetracer_original_puts "call #{method_name} with #{tp.parameters}" + end + arg_records = prepare_args(tp) + @record.register_step(tp.path, tp.lineno) + @record.register_call(tp.path, tp.lineno, method_name, arg_records) + else + end + end + + def record_return(tp) + if self.tracks_call?(tp) + if @debug + codetracer_original_puts 'return' + end + return_value = @record.to_value(tp.return_value) + @record.register_step(tp.path, tp.lineno) + # return value support inspired by existing IDE-s/envs like + # Visual Studio/JetBrains IIRC + # (Nikola Gamzakov showed me some examples) + @record.register_variable("", return_value) + @record.events << [:Return, ReturnRecord.new(return_value)] + end + end + + def record_step(tp) + if self.tracks_call?(tp) + @record.register_step(tp.path, tp.lineno) + variables = self.load_variables(tp.binding) + variables.each do |(name, value)| + @record.register_variable(name, value) + end + end + end + + def record_event(*args) + disable_tracepoints + if args.length == 2 + caller, content = args + begin + location = caller[0].split[0].split(':')[0..1] + path, line = location[0], location[1].to_i + @record.register_step(path, line) + rescue + # ignore for now + end + @record.events << [:Event, RecordEvent.new(EVENT_KIND_WRITE, content, "")] + elsif args.length == 3 + path, line, content = args + record_event(["#{path}:#{line}"], content) + else + raise ArgumentError, "wrong number of arguments" + end + enable_tracepoints + end + + def record_exception(tp) + @record.events << [:Event, RecordEvent.new(EVENT_KIND_ERROR, tp.raised_exception.to_s, "")] + end + + def start + ::CodeTracer::KernelPatches.install(self) + enable_tracepoints + end + + def stop + disable_tracepoints + ::CodeTracer::KernelPatches.uninstall(self) + end + + def trace_block(&block) + raise ArgumentError, "no block given" unless block_given? + + start + begin + yield + ensure + stop + end + end + + # Flush trace to output directory - compatible with native recorder API + def flush_trace(out_dir) + @record.serialize('', out_dir) + end + + private + + def enable_tracepoints + @calls_tracepoint.enable + @return_tracepoint.enable + @raise_tracepoint.enable + @tracing = true + # We intentionally enable the line tracepoint after the other tracepoints + # to avoid recording the initial activation call as a line event. + @line_tracepoint.enable + end + + def disable_tracepoints + # We disable the line tracepoint first to avoid recording the deactivation + # call as a line event. + @line_tracepoint.disable + @calls_tracepoint.disable + @return_tracepoint.disable + @raise_tracepoint.disable + @tracing = false + end + + # Collect local variables from the current binding and convert them + # into CodeTracer values. Variables that refer to the recorder itself + # (or its TraceRecord) are ignored to avoid serialising the entire + # tracer state, which quickly leads to deep recursion and huge traces. + def load_variables(binding) + return [] if binding.nil? + + binding.local_variables.filter_map do |name| + v = binding.local_variable_get(name) - private + next if v.equal?(self) || v.equal?(@record) - def setup_defaults - @record.register_call('', 1, '', []) - @tracer.ignore('lib/ruby') - @tracer.ignore('trace.rb') - @tracer.ignore('recorder.rb') - @tracer.ignore(' MAX_COUNT - # $stderr.write "array count ", v.count, "\n" - NOT_SUPPORTED_VALUE # TODO: non-expanded/other hint? - else - sequence_value(v.map do |element| - to_value(element, depth - 1) - end) + MAX_COUNT = 5000 + + def to_value(v, depth=10) + if depth <= 0 + return nil_value end - when Hash - if v.count > MAX_COUNT - NOT_SUPPORTED_VALUE - else - pairs = v.map do |k, val| + @value_count += 1 + if @value_count % 10_000 == 0 + $stderr.write("value #{@value_count}\n") if @debug + end + case v + when Integer + int_value(v) + when Float + float_value(v) + when String + string_value(v) + when Symbol + symbol_value(v) + when true + true_value + when false + false_value + when nil + nil_value + when Array + if v.count > MAX_COUNT + # $stderr.write "array count ", v.count, "\n" + not_supported_value # TODO: non-expanded/other hint? + else + sequence_value(v.map do |element| + to_value(element, depth - 1) + end) + end + when Hash + if true or v.count > MAX_COUNT + not_supported_value + else + pairs = v.map do |k, val| + struct_value('Pair', ['k', 'v'], [k, val], depth) + end + sequence_value(pairs, 'Hash') + end + when Range + struct_value('Range', ['begin', 'end'], [v.begin, v.end], depth) + when ->(o) { defined?(Set) && o.is_a?(Set) } + if v.size > MAX_COUNT + not_supported_value + else + sequence_value(v.to_a.map { |e| to_value(e, depth - 1) }, 'Set') + end + when Time + struct_value('Time', ['sec', 'nsec'], [v.to_i, v.nsec], depth) + when Regexp + struct_value('Regexp', ['source', 'options'], [v.source, v.options], depth) + when Struct + struct_value(v.class.name, v.members.map(&:to_s), v.values, depth) + when ->(o) { defined?(OpenStruct) && o.is_a?(OpenStruct) } + h = v.to_h + pairs = h.map do |k, val| struct_value('Pair', ['k', 'v'], [k, val], depth) end sequence_value(pairs, 'Hash') - end - when Range - struct_value('Range', ['begin', 'end'], [v.begin, v.end], depth) - when ->(o) { defined?(Set) && o.is_a?(Set) } - if v.size > MAX_COUNT - NOT_SUPPORTED_VALUE - else - sequence_value(v.to_a.map { |e| to_value(e, depth - 1) }, 'Set') - end - when Time - struct_value('Time', ['sec', 'nsec'], [v.to_i, v.nsec], depth) - when Regexp - struct_value('Regexp', ['source', 'options'], [v.source, v.options], depth) - when Struct - struct_value(v.class.name, v.members.map(&:to_s), v.values, depth) - when ->(o) { defined?(OpenStruct) && o.is_a?(OpenStruct) } - h = v.to_h - pairs = h.map do |k, val| - struct_value('Pair', ['k', 'v'], [k, val], depth) - end - sequence_value(pairs, 'Hash') - when Object - # NOT_SUPPORTED_VALUE - class_name = v.class.name - field_values = v.instance_variables.map do |name| - v.instance_variable_get(name) - end - field_names = nil - if $codetracer_record.type_id_for(class_name).nil? - field_names = v.instance_variables.map { |name| name.to_s[1..] } + when Object + # not_supported_value + class_name = v.class.name + field_values = v.instance_variables.map do |name| + v.instance_variable_get(name) + end + field_names = nil + if type_id_for(class_name).nil? + field_names = v.instance_variables.map { |name| name.to_s[1..] } + else + field_names = [] + end + struct_value(class_name, field_names, field_values, depth) else - field_names = [] + not_supported_value end - struct_value(class_name, field_names, field_values, depth) - else - NOT_SUPPORTED_VALUE end end +################## + +# IMPORTANT: sync with common_types.nim / runtime_tracing EventLogKind +EVENT_KIND_WRITE = 0 +EVENT_KIND_ERROR = 11 + NO_KEY = -1 NO_STEP = -1 diff --git a/gems/codetracer-pure-ruby-recorder/lib/trace.rb b/gems/codetracer-pure-ruby-recorder/lib/trace.rb deleted file mode 100644 index 9fd4820..0000000 --- a/gems/codetracer-pure-ruby-recorder/lib/trace.rb +++ /dev/null @@ -1,284 +0,0 @@ -# SPDX-License-Identifier: MIT -# Copyright (c) 2025 Metacraft Labs Ltd -# See LICENSE file in the project root for full license information. - -require 'json' -require 'optparse' -require_relative 'recorder' -require_relative 'codetracer/kernel_patches' - -# Helper to access the original +puts+ implementation when kernel -# methods are patched by {Codetracer::KernelPatches}. This avoids -# tracing debug output while still functioning even if the patches -# are not installed. -def codetracer_puts_no_trace(*args) - if Kernel.private_method_defined?(:codetracer_original_puts) - Kernel.send(:codetracer_original_puts, *args) - else - Kernel.puts(*args) - end -end - - -# Warning: -# probably related to our development env: -# if we hit an `incompatible library version` error, like -# `:136:in `require': incompatible library version - /home/alexander92/.local/share/gem/ruby/3.1.0/gems/strscan-3.1.0/lib/strscan.so (LoadError) -# or -# `:136:in `require': incompatible library version - /home/alexander92/.local/share/gem/ruby/3.1.0/gems/json-2.7.2/lib/json/ext/parser.so (LoadError)` -# -# it seems clearing `~/.local/share/gem` fixes things up -# however this seems as a risky solution, as it clears global gem state! -# BE CAREFUL if you have other ruby projects/data there! - -# class IO -# alias :old_write :write - -# def write(name, content="", offset=0, opt=nil) -# if $tracer.tracing -# $tracer.deactivate -# $tracer.record_event(caller, content) -# $tracer.activate -# end -# old_write(name, content, offset, opt) -# end -# end - -class Tracer - attr_accessor :calls_tracepoint, :return_tracepoint, - :line_tracepoint, :raise_tracepoint, :tracing - - attr_reader :ignore_list, :record, :debug - - def initialize(record, debug: false) - @tracing = false - @trace_stopped = false - @record = record - @ignore_list = [] - @debug = debug - @record.debug = debug if @record.respond_to?(:debug=) - setup_tracepoints - end - - def stop_tracing - @trace_stopped = true - @tracing = false - end - - def tracks_call?(tp) - tp.path.end_with?('.rb') && !@ignore_list.any? { |path| tp.path.include?(path) } - end - - def ignore(path) - @ignore_list << path - end - - def setup_tracepoints - @calls_tracepoint = TracePoint.new(:call) do |tp| - deactivate - record_call(tp) - activate - end - - @return_tracepoint = TracePoint.new(:return) do |tp| - deactivate - record_return(tp) - activate - end - - @line_tracepoint = TracePoint.new(:line) do |tp| - deactivate - record_step(tp) - activate - end - - @raise_tracepoint = TracePoint.new(:raise) do |tp| - deactivate - record_exception(tp) - activate - end - end - - def prepare_args(tp) - args_after_self = tp.parameters.map do |(kind, name)| - value = if tp.binding.nil? || name.nil? - NIL_VALUE - else - begin - to_value(tp.binding.local_variable_get(name)) - rescue - NIL_VALUE - end - end - [name.to_sym, value] - end - - # can be class or module - module_name = tp.self.class.name - begin - args = [[:self, raw_obj_value(tp.self.to_s, module_name)]] + args_after_self - rescue - # $stderr.write("error args\n") - args = [] - end - - args.each do |(name, value)| - @record.register_variable(name, value) - end - - arg_records = args.map do |(name, value)| - [@record.load_variable_id(name), value] - end - - arg_records - end - - def record_call(tp) - if self.tracks_call?(tp) - module_name = tp.self.class.name - method_name_prefix = module_name == 'Object' ? '' : "#{module_name}#" - method_name = "#{method_name_prefix}#{tp.method_id}" - - codetracer_puts_no_trace "call #{method_name} with #{tp.parameters}" if $tracer.debug - - arg_records = prepare_args(tp) - - @record.register_step(tp.path, tp.lineno) - @record.register_call(tp.path, tp.lineno, method_name, arg_records) - else - end - end - - def record_return(tp) - if self.tracks_call?(tp) - codetracer_puts_no_trace "return" if $tracer.debug - return_value = to_value(tp.return_value) - @record.register_step(tp.path, tp.lineno) - # return value support inspired by existing IDE-s/envs like - # Visual Studio/JetBrains IIRC - # (Nikola Gamzakov showed me some examples) - @record.register_variable("", return_value) - @record.events << [:Return, ReturnRecord.new(return_value)] - end - end - - def record_step(tp) - if self.tracks_call?(tp) - @record.register_step(tp.path, tp.lineno) - variables = self.load_variables(tp.binding) - variables.each do |(name, value)| - @record.register_variable(name, value) - end - end - end - - def record_event(*args) - if args.length == 2 - caller, content = args - begin - location = caller[0].split[0].split(':')[0..1] - path, line = location[0], location[1].to_i - @record.register_step(path, line) - rescue - # ignore for now - end - @record.events << [:Event, RecordEvent.new(EVENT_KIND_WRITE, content, "")] - elsif args.length == 3 - path, line, content = args - record_event(["#{path}:#{line}"], content) - else - raise ArgumentError, "wrong number of arguments" - end - end - - def record_exception(tp) - @record.events << [:Event, RecordEvent.new(EVENT_KIND_ERROR, tp.raised_exception.to_s, "")] - end - - def activate - if !@trace_stopped - @calls_tracepoint.enable - @return_tracepoint.enable - @line_tracepoint.enable - @raise_tracepoint.enable - @tracing = true - end - end - - def deactivate - @tracing = false - @calls_tracepoint.disable - @return_tracepoint.disable - @line_tracepoint.disable - @raise_tracepoint.disable - end - - private - - def load_variables(binding) - if !binding.nil? - # $stdout.write binding.local_variables - binding.local_variables.map do |name| - v = binding.local_variable_get(name) - out = to_value(v) - [name, out] - end - else - [] - end - end -end - -if __FILE__ == $PROGRAM_NAME - $tracer = Tracer.new($codetracer_record, debug: ENV['CODETRACER_RUBY_RECORDER_DEBUG'] == '1') - ::Codetracer::KernelPatches.install($tracer) - - options = {} - parser = OptionParser.new do |opts| - opts.banner = "usage: ruby trace.rb [options] [args]" - opts.on('-o DIR', '--out-dir DIR', 'Directory to write trace files') do |dir| - options[:out_dir] = dir - end - opts.on('-h', '--help', 'Print this help') do - puts opts - exit - end - end - parser.order! - - program = ARGV.shift - if program.nil? - $stderr.puts parser - exit 1 - end - - $tracer.record.register_call('', 1, '', []) - $tracer.ignore('lib/ruby') - $tracer.ignore('trace.rb') - $tracer.ignore('recorder.rb') - $tracer.ignore(' e - if $tracer.debug - codetracer_puts_no_trace '' - codetracer_puts_no_trace '==== trace.rb error while tracing program ===' - codetracer_puts_no_trace 'ERROR' - codetracer_puts_no_trace e - codetracer_puts_no_trace e.backtrace - codetracer_puts_no_trace '=====================' - codetracer_puts_no_trace '' - end - end - - $tracer.stop_tracing - - out_dir = options[:out_dir] || ENV['CODETRACER_RUBY_RECORDER_OUT_DIR'] || Dir.pwd - $tracer.record.serialize(program, out_dir) -end diff --git a/gems/codetracer-ruby-recorder/bin/codetracer-ruby-recorder b/gems/codetracer-ruby-recorder/bin/codetracer-ruby-recorder old mode 100644 new mode 100755 index ab00c17..5147868 --- a/gems/codetracer-ruby-recorder/bin/codetracer-ruby-recorder +++ b/gems/codetracer-ruby-recorder/bin/codetracer-ruby-recorder @@ -1,5 +1,9 @@ #!/usr/bin/env ruby -require 'rbconfig' -script = File.expand_path('../lib/native_trace.rb', __dir__) -exec RbConfig.ruby, script, *ARGV +# SPDX-License-Identifier: MIT +# CLI wrapper for the native tracer +lib_dir = File.expand_path('../lib', __dir__) +$LOAD_PATH.unshift(lib_dir) unless $LOAD_PATH.include?(lib_dir) +require 'codetracer_ruby_recorder' + +exit CodeTracer::RubyRecorder.parse_argv_and_trace_ruby_file(ARGV) diff --git a/gems/codetracer-ruby-recorder/ext/native_tracer/src/lib.rs b/gems/codetracer-ruby-recorder/ext/native_tracer/src/lib.rs index 1455a60..2dc8477 100644 --- a/gems/codetracer-ruby-recorder/ext/native_tracer/src/lib.rs +++ b/gems/codetracer-ruby-recorder/ext/native_tracer/src/lib.rs @@ -577,10 +577,10 @@ unsafe extern "C" fn event_hook_raw(data: VALUE, arg: *mut rb_trace_arg_t) { String::from_utf8_lossy(std::slice::from_raw_parts(ptr as *const u8, len)).to_string() }; let line = rb_num2long(line_val) as i64; - if path.contains("native_trace.rb") + if path.contains("codetracer_ruby_recorder.rb") || path.contains("lib/ruby") || path.contains("recorder.rb") - || path.contains("trace.rb") + || path.contains("codetracer_pure_ruby_recorder.rb") || path.contains("gems/") || path.starts_with(" VALUE = enable_tracing; let disable_cb: unsafe extern "C" fn(VALUE) -> VALUE = disable_tracing; diff --git a/gems/codetracer-ruby-recorder/lib/codetracer/kernel_patches.rb b/gems/codetracer-ruby-recorder/lib/codetracer/kernel_patches.rb index 7dc6689..f3225bd 100644 --- a/gems/codetracer-ruby-recorder/lib/codetracer/kernel_patches.rb +++ b/gems/codetracer-ruby-recorder/lib/codetracer/kernel_patches.rb @@ -1,6 +1,6 @@ # SPDX-License-Identifier: MIT -module Codetracer +module CodeTracer module KernelPatches @@tracers = [] @@ -54,10 +54,6 @@ def self.uninstall(tracer) alias_method :p, :codetracer_original_p alias_method :puts, :codetracer_original_puts alias_method :print, :codetracer_original_print - - remove_method :codetracer_original_p - remove_method :codetracer_original_puts - remove_method :codetracer_original_print end end end diff --git a/gems/codetracer-ruby-recorder/lib/codetracer_ruby_recorder.rb b/gems/codetracer-ruby-recorder/lib/codetracer_ruby_recorder.rb index f09f0b3..8b686ae 100644 --- a/gems/codetracer-ruby-recorder/lib/codetracer_ruby_recorder.rb +++ b/gems/codetracer-ruby-recorder/lib/codetracer_ruby_recorder.rb @@ -1,19 +1,138 @@ +# SPDX-License-Identifier: MIT +# Library providing a helper method to execute the native tracer. + +require 'optparse' require 'fileutils' require 'rbconfig' +require_relative 'codetracer/kernel_patches' + +module CodeTracer + class RubyRecorder + def self.parse_argv_and_trace_ruby_file(argv) + options = {} + parser = OptionParser.new do |opts| + opts.banner = 'usage: codetracer-ruby-recorder [options] [args]' + opts.on('-o DIR', '--out-dir DIR', 'Directory to write trace files') do |dir| + options[:out_dir] = dir + end + opts.on('-h', '--help', 'Print this help') do + puts opts + exit + end + end + parser.order!(argv) + + program = argv.shift + if program.nil? + $stderr.puts parser + exit 1 + end + + # Remaining arguments after the program name are passed to the traced program + program_args = argv.dup + + out_dir = options[:out_dir] || ENV['CODETRACER_RUBY_RECORDER_OUT_DIR'] || Dir.pwd + trace_ruby_file(program, out_dir, program_args) + 0 + end + + def self.trace_ruby_file(program, out_dir, program_args = []) + recorder = RubyRecorder.new + return 1 unless recorder.available? + + ENV['CODETRACER_RUBY_RECORDER_OUT_DIR'] = out_dir + + recorder.start + begin + # Set ARGV to contain the program arguments + original_argv = ARGV.dup + ARGV.clear + ARGV.concat(program_args) + + load program + ensure + # Restore original ARGV + ARGV.clear + ARGV.concat(original_argv) + + recorder.stop + recorder.flush_trace(out_dir) + end + 0 + end + + # Execute the native tracer CLI logic with the provided +argv+. + def self.execute(argv) + parse_argv_and_trace_ruby_file(argv) + end + + def initialize + @recorder = nil + @active = false + load_native_recorder + end + + # Start the recorder and install kernel patches + def start + return if @active || @recorder.nil? + + @recorder.enable_tracing + CodeTracer::KernelPatches.install(self) + @active = true + end + + # Stop the recorder and remove kernel patches + def stop + return unless @active + + CodeTracer::KernelPatches.uninstall(self) + @recorder.disable_tracing if @recorder + @active = false + end + + # Record event for kernel patches integration + def record_event(path, line, content) + @recorder.record_event(path, line, content) if @recorder + end + + # Flush trace to output directory + def flush_trace(out_dir) + @recorder.flush_trace(out_dir) if @recorder + end + + # Check if recorder is available + def available? + !@recorder.nil? + end + + private + + def load_native_recorder + begin + # Load native extension at module level + ext_dir = File.expand_path('../ext/native_tracer/target/release', __dir__) + dlext = RbConfig::CONFIG['DLEXT'] + target_path = File.join(ext_dir, "codetracer_ruby_recorder.#{dlext}") + unless File.exist?(target_path) + extensions = %w[so bundle dylib dll] + alt_path = extensions + .map { |ext| File.join(ext_dir, "libcodetracer_ruby_recorder.#{ext}") } + .find { |path| File.exist?(path) } + if alt_path + begin + File.symlink(alt_path, target_path) + rescue StandardError + FileUtils.cp(alt_path, target_path) + end + end + end -ext_dir = File.expand_path('../ext/native_tracer/target/release', __dir__) -dlext = RbConfig::CONFIG['DLEXT'] -lib = File.join(ext_dir, "codetracer_ruby_recorder.#{dlext}") -unless File.exist?(lib) - alt = %w[so bundle dylib dll] - .map { |ext| File.join(ext_dir, "libcodetracer_ruby_recorder.#{ext}") } - .find { |path| File.exist?(path) } - if alt - begin - File.symlink(alt, lib) - rescue StandardError - FileUtils.cp(alt, lib) + require target_path + @recorder = CodeTracerNativeRecorder.new + rescue Exception => e + warn "native tracer unavailable: #{e}" + @recorder = nil + end end end end -require lib diff --git a/gems/codetracer-ruby-recorder/lib/native_trace.rb b/gems/codetracer-ruby-recorder/lib/native_trace.rb deleted file mode 100644 index a43503b..0000000 --- a/gems/codetracer-ruby-recorder/lib/native_trace.rb +++ /dev/null @@ -1,68 +0,0 @@ -#!/usr/bin/env ruby -# SPDX-License-Identifier: MIT -# Simple utility loading the native tracer extension and executing a program. - -require 'optparse' -require 'fileutils' -require 'rbconfig' -require_relative 'codetracer/kernel_patches' - -options = {} -parser = OptionParser.new do |opts| - opts.banner = "usage: ruby native_trace.rb [options] [args]" - opts.on('-o DIR', '--out-dir DIR', 'Directory to write trace files') do |dir| - options[:out_dir] = dir - end - opts.on('-h', '--help', 'Print this help') do - puts opts - exit - end -end -parser.order! - -if ARGV.empty? - $stderr.puts parser - exit 1 -end - -out_dir = options[:out_dir] || ENV['CODETRACER_RUBY_RECORDER_OUT_DIR'] || Dir.pwd -ENV['CODETRACER_RUBY_RECORDER_OUT_DIR'] = out_dir - -# Path to the compiled native extension -ext_dir = File.expand_path('../ext/native_tracer/target/release', __dir__) -dlext = RbConfig::CONFIG['DLEXT'] -target_path = File.join(ext_dir, "codetracer_ruby_recorder.#{dlext}") -unless File.exist?(target_path) - extensions = %w[so bundle dylib dll] - alt_path = extensions - .map { |ext| File.join(ext_dir, "libcodetracer_ruby_recorder.#{ext}") } - .find { |path| File.exist?(path) } - if alt_path - begin - File.symlink(alt_path, target_path) - rescue StandardError - FileUtils.cp(alt_path, target_path) - end - end -end - -recorder = nil -begin - require target_path - recorder = RubyRecorder.new - $recorder = recorder - ::Codetracer::KernelPatches.install(recorder) - -rescue Exception => e - warn "native tracer unavailable: #{e}" -end - -program = ARGV.shift -recorder.enable_tracing if recorder -load program -if recorder - recorder.disable_tracing - ::Codetracer::KernelPatches.uninstall(recorder) - recorder.flush_trace(out_dir) -end - diff --git a/test/benchmarks/run_benchmarks.rb b/test/benchmarks/run_benchmarks.rb index ecb4e1e..b17a8df 100755 --- a/test/benchmarks/run_benchmarks.rb +++ b/test/benchmarks/run_benchmarks.rb @@ -53,7 +53,7 @@ def run_benchmark(name) native_dir = File.join(TMP_DIR, name, 'native') FileUtils.mkdir_p(native_dir) elapsed = Benchmark.realtime do - system(RbConfig.ruby, File.expand_path('../../gems/codetracer-ruby-recorder/lib/native_trace.rb', __dir__), + system(RbConfig.ruby, File.expand_path('../../gems/codetracer-ruby-recorder/bin/codetracer-ruby-recorder', __dir__), '--out-dir', native_dir, program) raise 'Native trace failed' unless $?.success? end @@ -64,7 +64,7 @@ def run_benchmark(name) pure_dir = File.join(TMP_DIR, name, 'pure') FileUtils.mkdir_p(pure_dir) elapsed = Benchmark.realtime do - system(RbConfig.ruby, File.expand_path('../../gems/codetracer-pure-ruby-recorder/lib/trace.rb', __dir__), + system(RbConfig.ruby, File.expand_path('../../gems/codetracer-pure-ruby-recorder/bin/codetracer-pure-ruby-recorder', __dir__), '--out-dir', pure_dir, program) raise 'Pure trace failed' unless $?.success? end diff --git a/test/test_kernel_patches.rb b/test/test_kernel_patches.rb index aa4ed6f..668aa55 100644 --- a/test/test_kernel_patches.rb +++ b/test/test_kernel_patches.rb @@ -26,34 +26,31 @@ def setup @tracer2 = MockTracer.new("tracer2") # Ensure a clean state before each test by attempting to clear any existing tracers # This is a bit of a hack, ideally KernelPatches would offer a reset or more direct access - current_tracers = Codetracer::KernelPatches.class_variable_get(:@@tracers).dup + current_tracers = CodeTracer::KernelPatches.class_variable_get(:@@tracers).dup current_tracers.each do |tracer| - Codetracer::KernelPatches.uninstall(tracer) + CodeTracer::KernelPatches.uninstall(tracer) end end def teardown # Ensure all tracers are uninstalled after each test - current_tracers = Codetracer::KernelPatches.class_variable_get(:@@tracers).dup + current_tracers = CodeTracer::KernelPatches.class_variable_get(:@@tracers).dup current_tracers.each do |tracer| - Codetracer::KernelPatches.uninstall(tracer) + CodeTracer::KernelPatches.uninstall(tracer) end # Verify that original methods are restored if no tracers are left - assert_empty Codetracer::KernelPatches.class_variable_get(:@@tracers), "Tracers should be empty after teardown" - refute Kernel.private_method_defined?(:codetracer_original_p), "Original method aliases should be removed" - refute Kernel.private_method_defined?(:codetracer_original_puts), "Original method aliases should be removed" - refute Kernel.private_method_defined?(:codetracer_original_print), "Original method aliases should be removed" + assert_empty CodeTracer::KernelPatches.class_variable_get(:@@tracers), "Tracers should be empty after teardown" end def test_patching_and_basic_event_recording - Codetracer::KernelPatches.install(@tracer1) + CodeTracer::KernelPatches.install(@tracer1) expected_line_p = __LINE__; p 'hello' expected_line_puts = __LINE__; puts 'world' expected_line_print = __LINE__; print 'test' assert_equal 3, @tracer1.events.size - + event_p = @tracer1.events[0] assert_equal __FILE__, event_p[:path] assert_equal expected_line_p, event_p[:lineno] @@ -69,12 +66,12 @@ def test_patching_and_basic_event_recording assert_equal expected_line_print, event_print[:lineno] assert_equal "test", event_print[:content] - Codetracer::KernelPatches.uninstall(@tracer1) + CodeTracer::KernelPatches.uninstall(@tracer1) end def test_multiple_tracers - Codetracer::KernelPatches.install(@tracer1) - Codetracer::KernelPatches.install(@tracer2) + CodeTracer::KernelPatches.install(@tracer1) + CodeTracer::KernelPatches.install(@tracer2) expected_line_multi = __LINE__; p 'multitest' @@ -91,12 +88,12 @@ def test_multiple_tracers assert_equal expected_line_multi, event2_multi[:lineno] assert_equal "\"multitest\"", event2_multi[:content] - Codetracer::KernelPatches.uninstall(@tracer1) + CodeTracer::KernelPatches.uninstall(@tracer1) @tracer1.clear_events @tracer2.clear_events expected_line_one_left = __LINE__; p 'one left' - + assert_empty @tracer1.events, "Tracer1 should have no events after being uninstalled" assert_equal 1, @tracer2.events.size @@ -105,12 +102,12 @@ def test_multiple_tracers assert_equal expected_line_one_left, event2_one_left[:lineno] assert_equal "\"one left\"", event2_one_left[:content] - Codetracer::KernelPatches.uninstall(@tracer2) + CodeTracer::KernelPatches.uninstall(@tracer2) end def test_restoration_of_original_methods - Codetracer::KernelPatches.install(@tracer1) - Codetracer::KernelPatches.uninstall(@tracer1) + CodeTracer::KernelPatches.install(@tracer1) + CodeTracer::KernelPatches.uninstall(@tracer1) # To truly test restoration, we'd capture stdout. Here, we focus on the tracer not being called. # If KernelPatches is working, uninstalling the last tracer should remove the patches. @@ -123,10 +120,10 @@ def test_restoration_of_original_methods end def test_correct_event_arguments - Codetracer::KernelPatches.install(@tracer1) + CodeTracer::KernelPatches.install(@tracer1) arg_obj = { key: "value", number: 123 } - + expected_line_p_detailed = __LINE__; p "detailed_p", arg_obj expected_line_puts_detailed = __LINE__; puts "detailed_puts", arg_obj.to_s expected_line_print_detailed = __LINE__; print "detailed_print", arg_obj.to_s @@ -152,7 +149,7 @@ def test_correct_event_arguments assert_equal expected_line_print_detailed, event_print[:lineno], "Line number for print mismatch" # print calls to_s on each argument and prints them sequentially assert_equal "detailed_print{:key=>\"value\", :number=>123}", event_print[:content], "Content for print mismatch" - - Codetracer::KernelPatches.uninstall(@tracer1) + + CodeTracer::KernelPatches.uninstall(@tracer1) end end diff --git a/test/test_tracer.rb b/test/test_tracer.rb index 023916b..69ac2a4 100644 --- a/test/test_tracer.rb +++ b/test/test_tracer.rb @@ -32,6 +32,24 @@ def run_trace(tracer_script, program_name, *args) end end + def run_trace_with_separator(tracer_script, program_name, *args) + base = File.basename(program_name, '.rb') + tracer_name = tracer_script.include?('native') ? 'native' : 'pure' + Dir.chdir(File.expand_path('..', __dir__)) do + program = File.join('test', 'programs', program_name) + out_dir = File.join('test', 'tmp', "#{base}_dashdash", tracer_name) + FileUtils.mkdir_p(out_dir) + stdout, stderr, status = Open3.capture3( + RbConfig.ruby, tracer_script, '--out-dir', out_dir, '--', program, *args + ) + raise "trace failed: #{stderr}" unless status.success? + trace_file = File.join(out_dir, 'trace.json') + trace = JSON.parse(File.read(trace_file)) if File.exist?(trace_file) + program_out = stdout.lines.reject { |l| l.start_with?('call ') || l.start_with?('return') }.join + [trace, program_out] + end + end + def expected_output(program_name) base = File.basename(program_name, '.rb') fixture = File.join(FIXTURE_DIR, "#{base}_output.txt") @@ -51,8 +69,8 @@ def program_args(base) Dir.glob(File.join(FIXTURE_DIR, '*_trace.json')).each do |fixture| base = File.basename(fixture, '_trace.json') define_method("test_#{base}") do - pure_trace, pure_out = run_trace('gems/codetracer-pure-ruby-recorder/lib/trace.rb', "#{base}.rb", *program_args(base)) - native_trace, native_out = run_trace('gems/codetracer-ruby-recorder/lib/native_trace.rb', "#{base}.rb", *program_args(base)) + pure_trace, pure_out = run_trace('gems/codetracer-pure-ruby-recorder/bin/codetracer-pure-ruby-recorder', "#{base}.rb", *program_args(base)) + native_trace, native_out = run_trace('gems/codetracer-ruby-recorder/bin/codetracer-ruby-recorder', "#{base}.rb", *program_args(base)) expected = expected_trace("#{base}.rb") assert_equal expected, pure_trace @@ -63,6 +81,19 @@ def program_args(base) end end + def test_args_sum_with_separator + base = 'args_sum' + pure_trace, pure_out = run_trace_with_separator('gems/codetracer-pure-ruby-recorder/bin/codetracer-pure-ruby-recorder', "#{base}.rb", *program_args(base)) + native_trace, native_out = run_trace_with_separator('gems/codetracer-ruby-recorder/bin/codetracer-ruby-recorder', "#{base}.rb", *program_args(base)) + + expected = expected_trace("#{base}.rb") + assert_equal expected, pure_trace + assert_equal expected, native_trace + expected = expected_output("#{base}.rb") + assert_equal expected, pure_out + assert_equal expected, native_out + end + def run_gem_installation_test(gem_bin, gem_module) Dir.chdir(File.expand_path('..', __dir__)) do gem_dir = File.join('gems', gem_bin) @@ -92,15 +123,22 @@ def run_gem_installation_test(gem_bin, gem_module) out_dir_lib = File.join('test', 'tmp', "gem_install_#{gem_bin.tr('-', '_')}_lib") FileUtils.rm_rf(out_dir_lib) + + recorder_class = if gem_bin == 'codetracer-ruby-recorder' + "CodeTracer::RubyRecorder" + else + "CodeTracer::PureRubyRecorder" + end + script = <<~RUBY require '#{gem_module}' - recorder = RubyRecorder.new + recorder = #{recorder_class}.new puts 'start trace' - recorder.disable_tracing + recorder.stop puts 'this will not be traced' - recorder.enable_tracing + recorder.start puts 'this will be traced' - recorder.disable_tracing + recorder.stop puts 'tracing disabled' recorder.flush_trace('#{out_dir_lib}') RUBY @@ -133,7 +171,7 @@ def test_pure_debug_smoke env = { 'CODETRACER_RUBY_RECORDER_DEBUG' => '1' } out_dir = File.join('test', 'tmp', 'debug_smoke') FileUtils.rm_rf(out_dir) - stdout, stderr, status = Open3.capture3(env, RbConfig.ruby, 'gems/codetracer-pure-ruby-recorder/lib/trace.rb', '--out-dir', out_dir, File.join('test', 'programs', 'addition.rb')) + stdout, stderr, status = Open3.capture3(env, RbConfig.ruby, 'gems/codetracer-pure-ruby-recorder/bin/codetracer-pure-ruby-recorder', '--out-dir', out_dir, File.join('test', 'programs', 'addition.rb')) raise "trace failed: #{stderr}" unless status.success? lines = stdout.lines.map(&:chomp)