diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 00000000..f5477ea3 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,6 @@ +[submodule "mspec"] + path = spec/mspec + url = git@github.com:ruby/mspec.git +[submodule "spec"] + path = spec/ruby + url = git@github.com:ruby/spec.git diff --git a/.rubocop.yml b/.rubocop.yml index daf5a824..1e3e2f83 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -7,7 +7,7 @@ AllCops: SuggestExtensions: false TargetRubyVersion: 2.7 Exclude: - - '{.git,.github,bin,coverage,pkg,test/fixtures,vendor,tmp}/**/*' + - '{.git,.github,bin,coverage,pkg,spec,test/fixtures,vendor,tmp}/**/*' - test.rb Layout/LineLength: @@ -43,6 +43,9 @@ Lint/NonLocalExitFromIterator: Lint/RedundantRequireStatement: Enabled: false +Lint/RescueException: + Enabled: false + Lint/SuppressedException: Enabled: false diff --git a/Rakefile b/Rakefile index 4973d45e..f06d8cf8 100644 --- a/Rakefile +++ b/Rakefile @@ -26,3 +26,10 @@ end SyntaxTree::Rake::CheckTask.new(&configure) SyntaxTree::Rake::WriteTask.new(&configure) + +desc "Run mspec tests using YARV emulation" +task :spec do + Dir["./spec/ruby/language/**/*_spec.rb"].each do |filepath| + sh "exe/yarv ./spec/mspec/bin/mspec-tag #{filepath}" + end +end diff --git a/exe/yarv b/exe/yarv new file mode 100755 index 00000000..3efb23ff --- /dev/null +++ b/exe/yarv @@ -0,0 +1,63 @@ +#!/usr/bin/env ruby +# frozen_string_literal: true + +$:.unshift(File.expand_path("../lib", __dir__)) + +require "syntax_tree" + +# Require these here so that we can run binding.irb without having them require +# anything that we've already patched. +require "irb" +require "irb/completion" +require "irb/color_printer" +require "readline" + +# First, create an instance of our virtual machine. +events = + if ENV["DEBUG"] + SyntaxTree::YARV::VM::STDOUTEvents.new + else + SyntaxTree::YARV::VM::NullEvents.new + end + +vm = SyntaxTree::YARV::VM.new(events) + +# Next, set up a bunch of aliases for methods that we're going to hook into in +# order to set up our virtual machine. +class << Kernel + alias yarv_require require + alias yarv_require_relative require_relative + alias yarv_load load + alias yarv_eval eval + alias yarv_throw throw + alias yarv_catch catch +end + +# Next, patch the methods that we just aliased so that they use our virtual +# machine's versions instead. This allows us to load Ruby files and have them +# execute in our virtual machine instead of the runtime environment. +[Kernel, Kernel.singleton_class].each do |klass| + klass.define_method(:require) { |filepath| vm.require(filepath) } + + klass.define_method(:load) { |filepath| vm.load(filepath) } + + # klass.define_method(:require_relative) do |filepath| + # vm.require_relative(filepath) + # end + + # klass.define_method(:eval) do | + # source, + # binding = TOPLEVEL_BINDING, + # filename = "(eval)", + # lineno = 1 + # | + # vm.eval(source, binding, filename, lineno) + # end + + # klass.define_method(:throw) { |tag, value = nil| vm.throw(tag, value) } + + # klass.define_method(:catch) { |tag, &block| vm.catch(tag, &block) } +end + +# Finally, require the file that we want to execute. +vm.require_resolved(ARGV.shift) diff --git a/lib/syntax_tree.rb b/lib/syntax_tree.rb index 1357e95f..39b55372 100644 --- a/lib/syntax_tree.rb +++ b/lib/syntax_tree.rb @@ -37,6 +37,7 @@ require_relative "syntax_tree/yarv/legacy" require_relative "syntax_tree/yarv/local_table" require_relative "syntax_tree/yarv/assembler" +require_relative "syntax_tree/yarv/vm" # Syntax Tree is a suite of tools built on top of the internal CRuby parser. It # provides the ability to generate a syntax tree from source, as well as the diff --git a/lib/syntax_tree/yarv.rb b/lib/syntax_tree/yarv.rb index 97592d4d..7e4da7bb 100644 --- a/lib/syntax_tree/yarv.rb +++ b/lib/syntax_tree/yarv.rb @@ -1,277 +1,8 @@ # frozen_string_literal: true -require "forwardable" - module SyntaxTree # This module provides an object representation of the YARV bytecode. module YARV - class VM - class Jump - attr_reader :name - - def initialize(name) - @name = name - end - end - - class Leave - attr_reader :value - - def initialize(value) - @value = value - end - end - - class Frame - attr_reader :iseq, :parent, :stack_index, :_self, :nesting, :svars - - def initialize(iseq, parent, stack_index, _self, nesting) - @iseq = iseq - @parent = parent - @stack_index = stack_index - @_self = _self - @nesting = nesting - @svars = {} - end - end - - class TopFrame < Frame - def initialize(iseq) - super(iseq, nil, 0, TOPLEVEL_BINDING.eval("self"), [Object]) - end - end - - class BlockFrame < Frame - def initialize(iseq, parent, stack_index) - super(iseq, parent, stack_index, parent._self, parent.nesting) - end - end - - class MethodFrame < Frame - attr_reader :name, :block - - def initialize(iseq, parent, stack_index, _self, name, block) - super(iseq, parent, stack_index, _self, parent.nesting) - @name = name - @block = block - end - end - - class ClassFrame < Frame - def initialize(iseq, parent, stack_index, _self) - super(iseq, parent, stack_index, _self, parent.nesting + [_self]) - end - end - - class FrozenCore - define_method("core#hash_merge_kwd") { |left, right| left.merge(right) } - - define_method("core#hash_merge_ptr") do |hash, *values| - hash.merge(values.each_slice(2).to_h) - end - - define_method("core#set_method_alias") do |clazz, new_name, old_name| - clazz.alias_method(new_name, old_name) - end - - define_method("core#set_variable_alias") do |new_name, old_name| - # Using eval here since there isn't a reflection API to be able to - # alias global variables. - eval("alias #{new_name} #{old_name}", binding, __FILE__, __LINE__) - end - - define_method("core#set_postexe") { |&block| END { block.call } } - - define_method("core#undef_method") do |clazz, name| - clazz.undef_method(name) - end - end - - FROZEN_CORE = FrozenCore.new.freeze - - extend Forwardable - - attr_reader :stack - def_delegators :stack, :push, :pop - - attr_reader :frame - def_delegators :frame, :_self - - def initialize - @stack = [] - @frame = nil - end - - ########################################################################## - # Helper methods for frames - ########################################################################## - - def run_frame(frame) - # First, set the current frame to the given value. - @frame = frame - - # Next, set up the local table for the frame. This is actually incorrect - # as it could use the values already on the stack, but for now we're - # just doing this for simplicity. - frame.iseq.local_table.size.times { push(nil) } - - # Yield so that some frame-specific setup can be done. - yield if block_given? - - # This hash is going to hold a mapping of label names to their - # respective indices in our instruction list. - labels = {} - - # This array is going to hold our instructions. - insns = [] - - # Here we're going to preprocess the instruction list from the - # instruction sequence to set up the labels hash and the insns array. - frame.iseq.insns.each do |insn| - case insn - when Integer, Symbol - # skip - when InstructionSequence::Label - labels[insn.name] = insns.length - else - insns << insn - end - end - - # Finally we can execute the instructions one at a time. If they return - # jumps or leaves we will handle those appropriately. - pc = 0 - while pc < insns.length - insn = insns[pc] - pc += 1 - - case (result = insn.call(self)) - when Jump - pc = labels[result.name] - when Leave - return result.value - end - end - ensure - @stack = stack[0...frame.stack_index] - @frame = frame.parent - end - - def run_top_frame(iseq) - run_frame(TopFrame.new(iseq)) - end - - def run_block_frame(iseq, *args, &block) - run_frame(BlockFrame.new(iseq, frame, stack.length)) do - locals = [*args, block] - iseq.local_table.size.times do |index| - local_set(index, 0, locals.shift) - end - end - end - - def run_class_frame(iseq, clazz) - run_frame(ClassFrame.new(iseq, frame, stack.length, clazz)) - end - - def run_method_frame(name, iseq, _self, *args, **kwargs, &block) - run_frame( - MethodFrame.new(iseq, frame, stack.length, _self, name, block) - ) do - locals = [*args, block] - - if iseq.argument_options[:keyword] - # First, set up the keyword bits array. - keyword_bits = - iseq.argument_options[:keyword].map do |config| - kwargs.key?(config.is_a?(Array) ? config[0] : config) - end - - iseq.local_table.locals.each_with_index do |local, index| - # If this is the keyword bits local, then set it appropriately. - if local.name == 2 - locals.insert(index, keyword_bits) - next - end - - # First, find the configuration for this local in the keywords - # list if it exists. - name = local.name - config = - iseq.argument_options[:keyword].find do |keyword| - keyword.is_a?(Array) ? keyword[0] == name : keyword == name - end - - # If the configuration doesn't exist, then the local is not a - # keyword local. - next unless config - - if !config.is_a?(Array) - # required keyword - locals.insert(index, kwargs.fetch(name)) - elsif !config[1].nil? - # optional keyword with embedded default value - locals.insert(index, kwargs.fetch(name, config[1])) - else - # optional keyword with expression default value - locals.insert(index, nil) - end - end - end - - iseq.local_table.size.times do |index| - local_set(index, 0, locals.shift) - end - end - end - - ########################################################################## - # Helper methods for instructions - ########################################################################## - - def const_base - frame.nesting.last - end - - def frame_at(level) - current = frame - level.times { current = current.parent } - current - end - - def frame_svar - current = frame - current = current.parent while current.is_a?(BlockFrame) - current - end - - def frame_yield - current = frame - current = current.parent until current.is_a?(MethodFrame) - current - end - - def frozen_core - FROZEN_CORE - end - - def jump(label) - Jump.new(label.name) - end - - def leave - Leave.new(pop) - end - - def local_get(index, level) - stack[frame_at(level).stack_index + index] - end - - def local_set(index, level, value) - stack[frame_at(level).stack_index + index] = value - end - end - # Compile the given source into a YARV instruction sequence. def self.compile(source, options = Compiler::Options.new) SyntaxTree.parse(source).accept(Compiler.new(options)) diff --git a/lib/syntax_tree/yarv/assembler.rb b/lib/syntax_tree/yarv/assembler.rb index efb179c1..ec467b58 100644 --- a/lib/syntax_tree/yarv/assembler.rb +++ b/lib/syntax_tree/yarv/assembler.rb @@ -69,7 +69,7 @@ def initialize(filepath) end def assemble - iseq = InstructionSequence.new(:top, "
", nil, Location.default) + iseq = InstructionSequence.new("
", "", 1, :top) assemble_iseq(iseq, File.readlines(filepath, chomp: true)) iseq.compile! @@ -138,7 +138,7 @@ def assemble_iseq(iseq, lines) name = parse_symbol(name_value) flags = parse_number(flags_value) - class_iseq = iseq.class_child_iseq(name.to_s, Location.default) + class_iseq = iseq.class_child_iseq(name.to_s, 1) assemble_iseq(class_iseq, body) iseq.defineclass(name, class_iseq, flags) when "defined" @@ -153,7 +153,7 @@ def assemble_iseq(iseq, lines) line_index += body.length name = parse_symbol(operands) - method_iseq = iseq.method_child_iseq(name.to_s, Location.default) + method_iseq = iseq.method_child_iseq(name.to_s, 1) assemble_iseq(method_iseq, body) iseq.definemethod(name, method_iseq) @@ -162,7 +162,7 @@ def assemble_iseq(iseq, lines) line_index += body.length name = parse_symbol(operands) - method_iseq = iseq.method_child_iseq(name.to_s, Location.default) + method_iseq = iseq.method_child_iseq(name.to_s, 1) assemble_iseq(method_iseq, body) iseq.definesmethod(name, method_iseq) @@ -221,7 +221,7 @@ def assemble_iseq(iseq, lines) body = parse_nested(lines[line_index..]) line_index += body.length - block_iseq = iseq.block_child_iseq(Location.default) + block_iseq = iseq.block_child_iseq(1) assemble_iseq(block_iseq, body) block_iseq end @@ -249,7 +249,7 @@ def assemble_iseq(iseq, lines) body = parse_nested(lines[line_index..]) line_index += body.length - block_iseq = iseq.block_child_iseq(Location.default) + block_iseq = iseq.block_child_iseq(1) assemble_iseq(block_iseq, body) block_iseq end @@ -354,7 +354,7 @@ def assemble_iseq(iseq, lines) body = parse_nested(lines[line_index..]) line_index += body.length - block_iseq = iseq.block_child_iseq(Location.default) + block_iseq = iseq.block_child_iseq(1) assemble_iseq(block_iseq, body) block_iseq end diff --git a/lib/syntax_tree/yarv/bf.rb b/lib/syntax_tree/yarv/bf.rb index f642fb2f..21bc2982 100644 --- a/lib/syntax_tree/yarv/bf.rb +++ b/lib/syntax_tree/yarv/bf.rb @@ -13,7 +13,7 @@ def initialize(source) def compile # Set up the top-level instruction sequence that will be returned. - iseq = InstructionSequence.new(:top, "", nil, location) + iseq = InstructionSequence.new("", "", 1, :top) # Set up the $tape global variable that will hold our state. iseq.duphash({ 0 => 0 }) @@ -80,19 +80,6 @@ def compile private - # This is the location of the top instruction sequence, derived from the - # source string. - def location - Location.new( - start_line: 1, - start_char: 0, - start_column: 0, - end_line: source.count("\n") + 1, - end_char: source.size, - end_column: source.size - (source.rindex("\n") || 0) - 1 - ) - end - # $tape[$cursor] += value def change_by(iseq, value) iseq.getglobal(:$tape) @@ -111,6 +98,7 @@ def change_by(iseq, value) end iseq.send(YARV.calldata(:[]=, 2)) + iseq.pop end # $cursor += value @@ -138,6 +126,7 @@ def output_char(iseq) iseq.send(YARV.calldata(:chr)) iseq.send(YARV.calldata(:putc, 1)) + iseq.pop end # $tape[$cursor] = $stdin.getc.ord @@ -150,6 +139,7 @@ def input_char(iseq) iseq.send(YARV.calldata(:ord)) iseq.send(YARV.calldata(:[]=, 2)) + iseq.pop end # unless $tape[$cursor] == 0 @@ -164,14 +154,21 @@ def loop_start(iseq) iseq.putobject(0) iseq.send(YARV.calldata(:==, 1)) - iseq.branchunless(end_label) + iseq.branchif(end_label) [start_label, end_label] end # Jump back to the start of the loop. def loop_end(iseq, start_label, end_label) - iseq.jump(start_label) + iseq.getglobal(:$tape) + iseq.getglobal(:$cursor) + iseq.send(YARV.calldata(:[], 1)) + + iseq.putobject(0) + iseq.send(YARV.calldata(:==, 1)) + iseq.branchunless(start_label) + iseq.push(end_label) end end diff --git a/lib/syntax_tree/yarv/compiler.rb b/lib/syntax_tree/yarv/compiler.rb index 4af5d6f0..4c9a4d50 100644 --- a/lib/syntax_tree/yarv/compiler.rb +++ b/lib/syntax_tree/yarv/compiler.rb @@ -304,10 +304,11 @@ def visit_CHAR(node) end def visit_END(node) + start_line = node.location.start_line once_iseq = - with_child_iseq(iseq.block_child_iseq(node.location)) do + with_child_iseq(iseq.block_child_iseq(start_line)) do postexe_iseq = - with_child_iseq(iseq.block_child_iseq(node.location)) do + with_child_iseq(iseq.block_child_iseq(start_line)) do iseq.event(:RUBY_EVENT_B_CALL) *statements, last_statement = node.statements.body @@ -567,7 +568,7 @@ def visit_binary(node) end def visit_block(node) - with_child_iseq(iseq.block_child_iseq(node.location)) do + with_child_iseq(iseq.block_child_iseq(node.location.start_line)) do iseq.event(:RUBY_EVENT_B_CALL) visit(node.block_var) visit(node.bodystmt) @@ -751,7 +752,9 @@ def visit_case(node) def visit_class(node) name = node.constant.constant.value.to_sym class_iseq = - with_child_iseq(iseq.class_child_iseq(name, node.location)) do + with_child_iseq( + iseq.class_child_iseq(name, node.location.start_line) + ) do iseq.event(:RUBY_EVENT_CLASS) visit(node.bodystmt) iseq.event(:RUBY_EVENT_END) @@ -818,7 +821,8 @@ def visit_const_path_ref(node) def visit_def(node) name = node.name.value.to_sym - method_iseq = iseq.method_child_iseq(name.to_s, node.location) + method_iseq = + iseq.method_child_iseq(name.to_s, node.location.start_line) with_child_iseq(method_iseq) do visit(node.params) if node.params @@ -939,7 +943,9 @@ def visit_for(node) iseq.local_table.plain(name) block_iseq = - with_child_iseq(iseq.block_child_iseq(node.statements.location)) do + with_child_iseq( + iseq.block_child_iseq(node.statements.location.start_line) + ) do iseq.argument_options[:lead_num] ||= 0 iseq.argument_options[:lead_num] += 1 iseq.argument_options[:ambiguous_param0] = true @@ -1076,7 +1082,7 @@ def visit_label(node) def visit_lambda(node) lambda_iseq = - with_child_iseq(iseq.block_child_iseq(node.location)) do + with_child_iseq(iseq.block_child_iseq(node.location.start_line)) do iseq.event(:RUBY_EVENT_B_CALL) visit(node.params) visit(node.statements) @@ -1127,7 +1133,9 @@ def visit_mlhs(node) def visit_module(node) name = node.constant.constant.value.to_sym module_iseq = - with_child_iseq(iseq.module_child_iseq(name, node.location)) do + with_child_iseq( + iseq.module_child_iseq(name, node.location.start_line) + ) do iseq.event(:RUBY_EVENT_CLASS) visit(node.bodystmt) iseq.event(:RUBY_EVENT_END) @@ -1375,10 +1383,11 @@ def visit_program(node) top_iseq = InstructionSequence.new( - :top, "", + "", + 1, + :top, nil, - node.location, options ) @@ -1543,7 +1552,9 @@ def visit_sclass(node) iseq.putnil singleton_iseq = - with_child_iseq(iseq.singleton_class_child_iseq(node.location)) do + with_child_iseq( + iseq.singleton_class_child_iseq(node.location.start_line) + ) do iseq.event(:RUBY_EVENT_CLASS) visit(node.bodystmt) iseq.event(:RUBY_EVENT_END) @@ -2018,7 +2029,7 @@ def visit_pattern(node, end_label) if node.constant iseq.dup visit(node.constant) - iseq.checkmatch(CheckMatch::TYPE_CASE) + iseq.checkmatch(CheckMatch::VM_CHECKMATCH_TYPE_CASE) iseq.branchunless(match_failure_label) end @@ -2078,7 +2089,7 @@ def visit_pattern(node, end_label) iseq.setlocal(lookup.index, lookup.level) else visit(required) - iseq.checkmatch(CheckMatch::TYPE_CASE) + iseq.checkmatch(CheckMatch::VM_CHECKMATCH_TYPE_CASE) iseq.branchunless(match_failure_label) end diff --git a/lib/syntax_tree/yarv/decompiler.rb b/lib/syntax_tree/yarv/decompiler.rb index a6a567fb..47d2a2df 100644 --- a/lib/syntax_tree/yarv/decompiler.rb +++ b/lib/syntax_tree/yarv/decompiler.rb @@ -64,6 +64,13 @@ def decompile(iseq) clauses[label] = clause clause = [] label = insn.name + when BranchIf + body = [ + Assign(block_label.field, node_for(insn.label.name)), + Next(Args([])) + ] + + clause << UnlessNode(clause.pop, Statements(body), nil) when BranchUnless body = [ Assign(block_label.field, node_for(insn.label.name)), @@ -157,6 +164,8 @@ def decompile(iseq) ) end end + when Pop + # skip when PutObject case insn.object when Float diff --git a/lib/syntax_tree/yarv/disassembler.rb b/lib/syntax_tree/yarv/disassembler.rb index 033b6d3d..d303bcb7 100644 --- a/lib/syntax_tree/yarv/disassembler.rb +++ b/lib/syntax_tree/yarv/disassembler.rb @@ -4,7 +4,8 @@ module SyntaxTree module YARV class Disassembler attr_reader :output, :queue - attr_reader :current_prefix, :current_iseq + attr_reader :current_prefix + attr_accessor :current_iseq def initialize @output = StringIO.new @@ -114,7 +115,7 @@ def format_iseq(iseq) output << "#{current_prefix}== disasm: " output << "#:1 " - location = iseq.location + location = Location.fixed(line: iseq.line, char: 0, column: 0) output << "(#{location.start_line},#{location.start_column})-" output << "(#{location.end_line},#{location.end_column})" output << "> " diff --git a/lib/syntax_tree/yarv/instruction_sequence.rb b/lib/syntax_tree/yarv/instruction_sequence.rb index 48305be6..c284221b 100644 --- a/lib/syntax_tree/yarv/instruction_sequence.rb +++ b/lib/syntax_tree/yarv/instruction_sequence.rb @@ -116,18 +116,18 @@ def inspect end end - # The type of the instruction sequence. - attr_reader :type - # The name of the instruction sequence. attr_reader :name + # The source location of the instruction sequence. + attr_reader :file, :line + + # The type of the instruction sequence. + attr_reader :type + # The parent instruction sequence, if there is one. attr_reader :parent_iseq - # The location of the root node of this instruction sequence. - attr_reader :location - # This is the list of information about the arguments to this # instruction sequence. attr_accessor :argument_size @@ -157,16 +157,18 @@ def inspect attr_reader :options def initialize( - type, name, - parent_iseq, - location, + file, + line, + type, + parent_iseq = nil, options = Compiler::Options.new ) - @type = type @name = name + @file = file + @line = line + @type = type @parent_iseq = parent_iseq - @location = location @argument_size = 0 @argument_options = {} @@ -256,9 +258,9 @@ def to_a node_ids: [-1] * insns.length }, name, + file, "", - "", - location.start_line, + line, type, local_table.names, dumped_options, @@ -278,6 +280,12 @@ def disasm def compile! specialize_instructions! if options.specialized_instruction? + catch_table.each do |catch_entry| + if !catch_entry.is_a?(CatchBreak) && catch_entry.iseq + catch_entry.iseq.compile! + end + end + length = 0 insns.each do |insn| case insn @@ -416,30 +424,30 @@ def specialize_instructions! # Child instruction sequence methods ########################################################################## - def child_iseq(type, name, location) - InstructionSequence.new(type, name, self, location, options) + def child_iseq(name, line, type) + InstructionSequence.new(name, file, line, type, self, options) end - def block_child_iseq(location) + def block_child_iseq(line) current = self current = current.parent_iseq while current.type == :block - child_iseq(:block, "block in #{current.name}", location) + child_iseq("block in #{current.name}", line, :block) end - def class_child_iseq(name, location) - child_iseq(:class, "", location) + def class_child_iseq(name, line) + child_iseq("", line, :class) end - def method_child_iseq(name, location) - child_iseq(:method, name, location) + def method_child_iseq(name, line) + child_iseq(name, line, :method) end - def module_child_iseq(name, location) - child_iseq(:class, "", location) + def module_child_iseq(name, line) + child_iseq("", line, :class) end - def singleton_class_child_iseq(location) - child_iseq(:class, "singleton class", location) + def singleton_class_child_iseq(line) + child_iseq("singleton class", line, :class) end ########################################################################## @@ -447,19 +455,39 @@ def singleton_class_child_iseq(location) ########################################################################## class CatchEntry - attr_reader :iseq, :begin_label, :end_label, :exit_label + attr_reader :iseq, :begin_label, :end_label, :exit_label, :restore_sp - def initialize(iseq, begin_label, end_label, exit_label) + def initialize(iseq, begin_label, end_label, exit_label, restore_sp) @iseq = iseq @begin_label = begin_label @end_label = end_label @exit_label = exit_label + @restore_sp = restore_sp end end class CatchBreak < CatchEntry def to_a - [:break, iseq.to_a, begin_label.name, end_label.name, exit_label.name] + [ + :break, + iseq.to_a, + begin_label.name, + end_label.name, + exit_label.name, + restore_sp + ] + end + end + + class CatchEnsure < CatchEntry + def to_a + [ + :ensure, + iseq.to_a, + begin_label.name, + end_label.name, + exit_label.name + ] end end @@ -493,24 +521,64 @@ def to_a end end - def catch_break(iseq, begin_label, end_label, exit_label) - catch_table << CatchBreak.new(iseq, begin_label, end_label, exit_label) - end - - def catch_next(begin_label, end_label, exit_label) - catch_table << CatchNext.new(nil, begin_label, end_label, exit_label) - end - - def catch_redo(begin_label, end_label, exit_label) - catch_table << CatchRedo.new(nil, begin_label, end_label, exit_label) - end - - def catch_rescue(iseq, begin_label, end_label, exit_label) - catch_table << CatchRescue.new(iseq, begin_label, end_label, exit_label) - end - - def catch_retry(begin_label, end_label, exit_label) - catch_table << CatchRetry.new(nil, begin_label, end_label, exit_label) + def catch_break(iseq, begin_label, end_label, exit_label, restore_sp) + catch_table << CatchBreak.new( + iseq, + begin_label, + end_label, + exit_label, + restore_sp + ) + end + + def catch_ensure(iseq, begin_label, end_label, exit_label, restore_sp) + catch_table << CatchEnsure.new( + iseq, + begin_label, + end_label, + exit_label, + restore_sp + ) + end + + def catch_next(begin_label, end_label, exit_label, restore_sp) + catch_table << CatchNext.new( + nil, + begin_label, + end_label, + exit_label, + restore_sp + ) + end + + def catch_redo(begin_label, end_label, exit_label, restore_sp) + catch_table << CatchRedo.new( + nil, + begin_label, + end_label, + exit_label, + restore_sp + ) + end + + def catch_rescue(iseq, begin_label, end_label, exit_label, restore_sp) + catch_table << CatchRescue.new( + iseq, + begin_label, + end_label, + exit_label, + restore_sp + ) + end + + def catch_retry(begin_label, end_label, exit_label, restore_sp) + catch_table << CatchRetry.new( + nil, + begin_label, + end_label, + exit_label, + restore_sp + ) end ########################################################################## @@ -895,7 +963,8 @@ def toregexp(options, length) # This method will create a new instruction sequence from a serialized # RubyVM::InstructionSequence object. def self.from(source, options = Compiler::Options.new, parent_iseq = nil) - iseq = new(source[9], source[5], parent_iseq, Location.default, options) + iseq = + new(source[5], source[6], source[8], source[9], parent_iseq, options) # set up the labels object so that the labels are shared between the # location in the instruction sequence and the instructions that @@ -914,45 +983,9 @@ def self.from(source, options = Compiler::Options.new, parent_iseq = nil) iseq.argument_options[:opt].map! { |opt| labels[opt] } end - # set up the catch table - source[12].each do |entry| - case entry[0] - when :break - iseq.catch_break( - from(entry[1]), - labels[entry[2]], - labels[entry[3]], - labels[entry[4]] - ) - when :next - iseq.catch_next( - labels[entry[2]], - labels[entry[3]], - labels[entry[4]] - ) - when :rescue - iseq.catch_rescue( - from(entry[1]), - labels[entry[2]], - labels[entry[3]], - labels[entry[4]] - ) - when :redo - iseq.catch_redo( - labels[entry[2]], - labels[entry[3]], - labels[entry[4]] - ) - when :retry - iseq.catch_retry( - labels[entry[2]], - labels[entry[3]], - labels[entry[4]] - ) - else - raise "unknown catch type: #{entry[0]}" - end - end + # track the child block iseqs so that our catch table can point to the + # correctly created iseqs + block_iseqs = [] # set up all of the instructions source[13].each do |insn| @@ -1135,6 +1168,7 @@ def self.from(source, options = Compiler::Options.new, parent_iseq = nil) iseq.putspecialobject(opnds[0]) when :send block_iseq = opnds[1] ? from(opnds[1], options, iseq) : nil + block_iseqs << block_iseq if block_iseq iseq.send(CallData.from(opnds[0]), block_iseq) when :setclassvariable iseq.push(SetClassVariable.new(opnds[0], opnds[1])) @@ -1163,6 +1197,76 @@ def self.from(source, options = Compiler::Options.new, parent_iseq = nil) end end + # set up the catch table + source[12].each do |entry| + case entry[0] + when :break + if entry[1] + break_iseq = + block_iseqs.find do |block_iseq| + block_iseq.name == entry[1][5] && + block_iseq.file == entry[1][6] && + block_iseq.line == entry[1][8] + end + + iseq.catch_break( + break_iseq || from(entry[1], options, iseq), + labels[entry[2]], + labels[entry[3]], + labels[entry[4]], + entry[5] + ) + else + iseq.catch_break( + nil, + labels[entry[2]], + labels[entry[3]], + labels[entry[4]], + entry[5] + ) + end + when :ensure + iseq.catch_ensure( + from(entry[1], options, iseq), + labels[entry[2]], + labels[entry[3]], + labels[entry[4]], + entry[5] + ) + when :next + iseq.catch_next( + labels[entry[2]], + labels[entry[3]], + labels[entry[4]], + entry[5] + ) + when :rescue + iseq.catch_rescue( + from(entry[1], options, iseq), + labels[entry[2]], + labels[entry[3]], + labels[entry[4]], + entry[5] + ) + when :redo + iseq.catch_redo( + labels[entry[2]], + labels[entry[3]], + labels[entry[4]], + entry[5] + ) + when :retry + iseq.catch_retry( + labels[entry[2]], + labels[entry[3]], + labels[entry[4]], + entry[5] + ) + else + raise "unknown catch type: #{entry[0]}" + end + end + iseq.compile! if iseq.type == :top iseq end diff --git a/lib/syntax_tree/yarv/instructions.rb b/lib/syntax_tree/yarv/instructions.rb index 288edb16..5e1d116b 100644 --- a/lib/syntax_tree/yarv/instructions.rb +++ b/lib/syntax_tree/yarv/instructions.rb @@ -399,9 +399,11 @@ def call(vm) # ~~~ # class CheckMatch - TYPE_WHEN = 1 - TYPE_CASE = 2 - TYPE_RESCUE = 3 + VM_CHECKMATCH_TYPE_WHEN = 1 + VM_CHECKMATCH_TYPE_CASE = 2 + VM_CHECKMATCH_TYPE_RESCUE = 3 + VM_CHECKMATCH_TYPE_MASK = 0x03 + VM_CHECKMATCH_ARRAY = 0x04 attr_reader :type @@ -434,7 +436,32 @@ def canonical end def call(vm) - raise NotImplementedError, "checkmatch" + target, pattern = vm.pop(2) + + vm.push( + if type & VM_CHECKMATCH_ARRAY > 0 + pattern.any? { |item| check?(item, target) } + else + check?(pattern, target) + end + ) + end + + private + + def check?(pattern, target) + case type & VM_CHECKMATCH_TYPE_MASK + when VM_CHECKMATCH_TYPE_WHEN + pattern + when VM_CHECKMATCH_TYPE_CASE + pattern === target + when VM_CHECKMATCH_TYPE_RESCUE + unless pattern.is_a?(Module) + raise TypeError, "class or module required for rescue clause" + end + + pattern === target + end end end @@ -762,12 +789,26 @@ def canonical def call(vm) object, superclass = vm.pop(2) - iseq = class_iseq - clazz = Class.new(superclass || Object) - vm.push(vm.run_class_frame(iseq, clazz)) + if name == :singletonclass + vm.push(vm.run_class_frame(class_iseq, object.singleton_class)) + elsif object.const_defined?(name) + vm.push(vm.run_class_frame(class_iseq, object.const_get(name))) + elsif flags & TYPE_MODULE > 0 + clazz = Module.new + object.const_set(name, clazz) + vm.push(vm.run_class_frame(class_iseq, clazz)) + else + clazz = + if flags & FLAG_HAS_SUPERCLASS > 0 + Class.new(superclass) + else + Class.new + end - object.const_set(name, clazz) + object.const_set(name, clazz) + vm.push(vm.run_class_frame(class_iseq, clazz)) + end end end @@ -882,17 +923,19 @@ def call(vm) when TYPE_NIL, TYPE_SELF, TYPE_TRUE, TYPE_FALSE, TYPE_ASGN, TYPE_EXPR message when TYPE_IVAR - message if vm._self.instance_variable_defined?(name) + message if vm.frame._self.instance_variable_defined?(name) when TYPE_LVAR raise NotImplementedError, "defined TYPE_LVAR" when TYPE_GVAR message if global_variables.include?(name) when TYPE_CVAR - clazz = vm._self + clazz = vm.frame._self clazz = clazz.singleton_class unless clazz.is_a?(Module) message if clazz.class_variable_defined?(name) when TYPE_CONST - raise NotImplementedError, "defined TYPE_CONST" + clazz = vm.frame._self + clazz = clazz.singleton_class unless clazz.is_a?(Module) + message if clazz.const_defined?(name) when TYPE_METHOD raise NotImplementedError, "defined TYPE_METHOD" when TYPE_YIELD @@ -904,7 +947,9 @@ def call(vm) when TYPE_FUNC message if object.respond_to?(name, true) when TYPE_CONST_FROM - raise NotImplementedError, "defined TYPE_CONST_FROM" + defined = + vm.frame.nesting.any? { |scope| scope.const_defined?(name, true) } + message if defined end vm.push(result) @@ -962,12 +1007,22 @@ def canonical def call(vm) name = method_name + nesting = vm.frame.nesting iseq = method_iseq vm + .frame ._self .__send__(:define_method, name) do |*args, **kwargs, &block| - vm.run_method_frame(name, iseq, self, *args, **kwargs, &block) + vm.run_method_frame( + name, + nesting, + iseq, + self, + *args, + **kwargs, + &block + ) end end end @@ -1024,12 +1079,22 @@ def canonical def call(vm) name = method_name + nesting = vm.frame.nesting iseq = method_iseq vm + .frame ._self .__send__(:define_singleton_method, name) do |*args, **kwargs, &block| - vm.run_method_frame(name, iseq, self, *args, **kwargs, &block) + vm.run_method_frame( + name, + nesting, + iseq, + self, + *args, + **kwargs, + &block + ) end end end @@ -1259,7 +1324,42 @@ def canonical end def call(vm) - raise NotImplementedError, "expandarray" + object = vm.pop + object = + if Array === object + object.dup + elsif object.respond_to?(:to_ary, true) + object.to_ary + else + [object] + end + + splat_flag = flags & 0x01 > 0 + postarg_flag = flags & 0x02 > 0 + + if number == 0 && splat_flag == 0 + # no space left on stack + elsif postarg_flag + values = [] + + if number > object.size + (number - object.size).times { values.push(nil) } + end + [number, object.size].min.times { values.push(object.pop) } + values.push(object.to_a) if splat_flag + + values.each { |item| vm.push(item) } + else + values = [] + + [number, object.size].min.times { values.push(object.shift) } + if number > values.size + (number - values.size).times { values.push(nil) } + end + values.push(object.to_a) if splat_flag + + values.reverse_each { |item| vm.push(item) } + end end end @@ -1424,7 +1524,7 @@ def canonical end def call(vm) - clazz = vm._self + clazz = vm.frame._self clazz = clazz.class unless clazz.is_a?(Class) vm.push(clazz.class_variable_get(name)) end @@ -1474,14 +1574,20 @@ def canonical end def call(vm) - # const_base, allow_nil = - vm.pop(2) + const_base, allow_nil = vm.pop(2) - vm.frame.nesting.reverse_each do |clazz| - if clazz.const_defined?(name) - vm.push(clazz.const_get(name)) + if const_base + if const_base.const_defined?(name) + vm.push(const_base.const_get(name)) return end + elsif const_base.nil? && allow_nil + vm.frame.nesting.reverse_each do |clazz| + if clazz.const_defined?(name) + vm.push(clazz.const_get(name)) + return + end + end end raise NameError, "uninitialized constant #{name}" @@ -1590,7 +1696,7 @@ def canonical def call(vm) method = Object.instance_method(:instance_variable_get) - vm.push(method.bind(vm._self).call(name)) + vm.push(method.bind(vm.frame._self).call(name)) end end @@ -1948,8 +2054,9 @@ def canonical def call(vm) block = if (iseq = block_iseq) + frame = vm.frame ->(*args, **kwargs, &blk) do - vm.run_block_frame(iseq, *args, **kwargs, &blk) + vm.run_block_frame(iseq, frame, *args, **kwargs, &blk) end end @@ -2396,7 +2503,7 @@ def canonical def call(vm) return if @executed - vm.push(vm.run_block_frame(iseq)) + vm.push(vm.run_block_frame(iseq, vm.frame)) @executed = true end end @@ -2960,7 +3067,7 @@ def canonical end def call(vm) - current = vm._self + current = vm.frame._self current = current.class unless current.is_a?(Class) names.each do |name| @@ -4254,7 +4361,7 @@ def canonical end def call(vm) - vm.push(vm._self) + vm.push(vm.frame._self) end end @@ -4310,7 +4417,7 @@ def call(vm) when OBJECT_VMCORE vm.push(vm.frozen_core) when OBJECT_CBASE - value = vm._self + value = vm.frame._self value = value.singleton_class unless value.is_a?(Class) vm.push(value) when OBJECT_CONST_BASE @@ -4418,9 +4525,12 @@ def canonical def call(vm) block = if (iseq = block_iseq) + frame = vm.frame ->(*args, **kwargs, &blk) do - vm.run_block_frame(iseq, *args, **kwargs, &blk) + vm.run_block_frame(iseq, frame, *args, **kwargs, &blk) end + elsif calldata.flag?(CallData::CALL_ARGS_BLOCKARG) + vm.pop end keywords = @@ -4542,7 +4652,7 @@ def canonical end def call(vm) - clazz = vm._self + clazz = vm.frame._self clazz = clazz.class unless clazz.is_a?(Class) clazz.class_variable_set(name, vm.pop) end @@ -4698,7 +4808,7 @@ def canonical def call(vm) method = Object.instance_method(:instance_variable_set) - method.bind(vm._self).call(name, vm.pop) + method.bind(vm.frame._self).call(name, vm.pop) end end @@ -4946,7 +5056,7 @@ def canonical def call(vm) case key when GetSpecial::SVAR_LASTLINE - raise NotImplementedError, "svar SVAR_LASTLINE" + raise NotImplementedError, "setspecial SVAR_LASTLINE" when GetSpecial::SVAR_BACKREF raise NotImplementedError, "setspecial SVAR_BACKREF" when GetSpecial::SVAR_FLIPFLOP_START @@ -4999,7 +5109,27 @@ def canonical end def call(vm) - vm.push(*vm.pop) + value = vm.pop + + vm.push( + if Array === value + value.instance_of?(Array) ? value.dup : Array[*value] + elsif value.nil? + value.to_a + else + if value.respond_to?(:to_a, true) + result = value.to_a + + if result.nil? + [value] + elsif !result.is_a?(Array) + raise TypeError, "expected to_a to return an Array" + end + else + [value] + end + end + ) end end @@ -5061,15 +5191,18 @@ def call(vm) # ~~~ # class Throw - TAG_NONE = 0x0 - TAG_RETURN = 0x1 - TAG_BREAK = 0x2 - TAG_NEXT = 0x3 - TAG_RETRY = 0x4 - TAG_REDO = 0x5 - TAG_RAISE = 0x6 - TAG_THROW = 0x7 - TAG_FATAL = 0x8 + RUBY_TAG_NONE = 0x0 + RUBY_TAG_RETURN = 0x1 + RUBY_TAG_BREAK = 0x2 + RUBY_TAG_NEXT = 0x3 + RUBY_TAG_RETRY = 0x4 + RUBY_TAG_REDO = 0x5 + RUBY_TAG_RAISE = 0x6 + RUBY_TAG_THROW = 0x7 + RUBY_TAG_FATAL = 0x8 + + VM_THROW_NO_ESCAPE_FLAG = 0x8000 + VM_THROW_STATE_MASK = 0xff attr_reader :type @@ -5102,7 +5235,43 @@ def canonical end def call(vm) - raise NotImplementedError, "throw" + state = type & VM_THROW_STATE_MASK + value = vm.pop + + case state + when RUBY_TAG_NONE + case value + when nil + # do nothing + when Exception + raise value + else + raise NotImplementedError + end + when RUBY_TAG_RETURN + raise VM::ReturnError.new(value, error_backtrace(vm)) + when RUBY_TAG_BREAK + raise VM::BreakError.new(value, error_backtrace(vm)) + when RUBY_TAG_NEXT + raise VM::NextError.new(value, error_backtrace(vm)) + else + raise NotImplementedError, "Unknown throw kind #{state}" + end + end + + private + + def error_backtrace(vm) + backtrace = [] + current = vm.frame + + while current + backtrace << "#{current.iseq.file}:#{current.line}:in" \ + "`#{current.iseq.name}'" + current = current.parent + end + + [*backtrace, *caller] end end diff --git a/lib/syntax_tree/yarv/legacy.rb b/lib/syntax_tree/yarv/legacy.rb index 30a95437..b2e33290 100644 --- a/lib/syntax_tree/yarv/legacy.rb +++ b/lib/syntax_tree/yarv/legacy.rb @@ -45,6 +45,14 @@ def pops def pushes 1 end + + def canonical + YARV::GetClassVariable.new(name, nil) + end + + def call(vm) + canonical.call(vm) + end end # ### Summary @@ -94,6 +102,10 @@ def pushes 1 end + def canonical + self + end + def call(vm) vm.push(nil) end @@ -102,8 +114,8 @@ def call(vm) # ### Summary # # `opt_setinlinecache` sets an inline cache for a constant lookup. It pops - # the value it should set off the top of the stack. It then pushes that - # value back onto the top of the stack. + # the value it should set off the top of the stack. It uses this value to + # set the cache. It then pushes that value back onto the top of the stack. # # This instruction is no longer used since in Ruby 3.2 it was replaced by # the consolidated `opt_getconstant_path` instruction. @@ -141,8 +153,11 @@ def pushes 1 end + def canonical + self + end + def call(vm) - vm.push(vm.pop) end end @@ -186,6 +201,14 @@ def pops def pushes 0 end + + def canonical + YARV::SetClassVariable.new(name, nil) + end + + def call(vm) + canonical.call(vm) + end end end end diff --git a/lib/syntax_tree/yarv/vm.rb b/lib/syntax_tree/yarv/vm.rb new file mode 100644 index 00000000..1bbb82ed --- /dev/null +++ b/lib/syntax_tree/yarv/vm.rb @@ -0,0 +1,624 @@ +# frozen_string_literal: true + +require "forwardable" + +module SyntaxTree + # This module provides an object representation of the YARV bytecode. + module YARV + class VM + class Jump + attr_reader :label + + def initialize(label) + @label = label + end + end + + class Leave + attr_reader :value + + def initialize(value) + @value = value + end + end + + class Frame + attr_reader :iseq, :parent, :stack_index, :_self, :nesting, :svars + attr_accessor :line, :pc + + def initialize(iseq, parent, stack_index, _self, nesting) + @iseq = iseq + @parent = parent + @stack_index = stack_index + @_self = _self + @nesting = nesting + + @svars = {} + @line = iseq.line + @pc = 0 + end + end + + class TopFrame < Frame + def initialize(iseq) + super(iseq, nil, 0, TOPLEVEL_BINDING.eval("self"), [Object]) + end + end + + class BlockFrame < Frame + def initialize(iseq, parent, stack_index) + super(iseq, parent, stack_index, parent._self, parent.nesting) + end + end + + class MethodFrame < Frame + attr_reader :name, :block + + def initialize(iseq, nesting, parent, stack_index, _self, name, block) + super(iseq, parent, stack_index, _self, nesting) + @name = name + @block = block + end + end + + class ClassFrame < Frame + def initialize(iseq, parent, stack_index, _self) + super(iseq, parent, stack_index, _self, parent.nesting + [_self]) + end + end + + class RescueFrame < Frame + def initialize(iseq, parent, stack_index) + super(iseq, parent, stack_index, parent._self, parent.nesting) + end + end + + class ThrownError < StandardError + attr_reader :value + + def initialize(value, backtrace) + super("This error was thrown by the Ruby VM.") + @value = value + set_backtrace(backtrace) + end + end + + class ReturnError < ThrownError + end + + class BreakError < ThrownError + end + + class NextError < ThrownError + end + + class FrozenCore + define_method("core#hash_merge_kwd") { |left, right| left.merge(right) } + + define_method("core#hash_merge_ptr") do |hash, *values| + hash.merge(values.each_slice(2).to_h) + end + + define_method("core#set_method_alias") do |clazz, new_name, old_name| + clazz.alias_method(new_name, old_name) + end + + define_method("core#set_variable_alias") do |new_name, old_name| + # Using eval here since there isn't a reflection API to be able to + # alias global variables. + eval("alias #{new_name} #{old_name}", binding, __FILE__, __LINE__) + end + + define_method("core#set_postexe") { |&block| END { block.call } } + + define_method("core#undef_method") do |clazz, name| + clazz.undef_method(name) + nil + end + end + + # This is the main entrypoint for events firing in the VM, which allows + # us to implement tracing. + class NullEvents + def publish_frame_change(frame) + end + + def publish_instruction(iseq, insn) + end + + def publish_stack_change(stack) + end + + def publish_tracepoint(event) + end + end + + # This is a simple implementation of tracing that prints to STDOUT. + class STDOUTEvents + attr_reader :disassembler + + def initialize + @disassembler = Disassembler.new + end + + def publish_frame_change(frame) + puts "%-16s %s" % ["frame-change", "#{frame.iseq.file}@#{frame.line}"] + end + + def publish_instruction(iseq, insn) + disassembler.current_iseq = iseq + puts "%-16s %s" % ["instruction", insn.disasm(disassembler)] + end + + def publish_stack_change(stack) + puts "%-16s %s" % ["stack-change", stack.values.inspect] + end + + def publish_tracepoint(event) + puts "%-16s %s" % ["tracepoint", event.inspect] + end + end + + # This represents the global VM stack. It effectively is an array, but + # wraps mutating functions with instrumentation. + class Stack + attr_reader :events, :values + + def initialize(events) + @events = events + @values = [] + end + + def concat(...) + values.concat(...).tap { events.publish_stack_change(self) } + end + + def last + values.last + end + + def length + values.length + end + + def push(...) + values.push(...).tap { events.publish_stack_change(self) } + end + + def pop(...) + values.pop(...).tap { events.publish_stack_change(self) } + end + + def slice!(...) + values.slice!(...).tap { events.publish_stack_change(self) } + end + + def [](...) + values.[](...) + end + + def []=(...) + values.[]=(...).tap { events.publish_stack_change(self) } + end + end + + FROZEN_CORE = FrozenCore.new.freeze + + extend Forwardable + + attr_reader :events + + attr_reader :stack + def_delegators :stack, :push, :pop + + attr_reader :frame + + def initialize(events = NullEvents.new) + @events = events + @stack = Stack.new(events) + @frame = nil + end + + ########################################################################## + # Helper methods for frames + ########################################################################## + + def run_frame(frame) + # First, set the current frame to the given value. + previous = @frame + @frame = frame + events.publish_frame_change(@frame) + + # Next, set up the local table for the frame. This is actually incorrect + # as it could use the values already on the stack, but for now we're + # just doing this for simplicity. + stack.concat(Array.new(frame.iseq.local_table.size)) + + # Yield so that some frame-specific setup can be done. + start_label = yield if block_given? + frame.pc = frame.iseq.insns.index(start_label) if start_label + + # Finally we can execute the instructions one at a time. If they return + # jumps or leaves we will handle those appropriately. + loop do + case (insn = frame.iseq.insns[frame.pc]) + when Integer + frame.line = insn + frame.pc += 1 + when Symbol + events.publish_tracepoint(insn) + frame.pc += 1 + when InstructionSequence::Label + # skip labels + frame.pc += 1 + else + begin + events.publish_instruction(frame.iseq, insn) + result = insn.call(self) + rescue ReturnError => error + raise if frame.iseq.type != :method + + stack.slice!(frame.stack_index..) + @frame = frame.parent + events.publish_frame_change(@frame) + + return error.value + rescue BreakError => error + raise if frame.iseq.type != :block + + catch_entry = + find_catch_entry(frame, InstructionSequence::CatchBreak) + raise unless catch_entry + + stack.slice!( + ( + frame.stack_index + frame.iseq.local_table.size + + catch_entry.restore_sp + ).. + ) + @frame = frame + events.publish_frame_change(@frame) + + frame.pc = frame.iseq.insns.index(catch_entry.exit_label) + push(result = error.value) + rescue NextError => error + raise if frame.iseq.type != :block + + catch_entry = + find_catch_entry(frame, InstructionSequence::CatchNext) + raise unless catch_entry + + stack.slice!( + ( + frame.stack_index + frame.iseq.local_table.size + + catch_entry.restore_sp + ).. + ) + @frame = frame + events.publish_frame_change(@frame) + + frame.pc = frame.iseq.insns.index(catch_entry.exit_label) + push(result = error.value) + rescue Exception => error + catch_entry = + find_catch_entry(frame, InstructionSequence::CatchRescue) + raise unless catch_entry + + stack.slice!( + ( + frame.stack_index + frame.iseq.local_table.size + + catch_entry.restore_sp + ).. + ) + @frame = frame + events.publish_frame_change(@frame) + + frame.pc = frame.iseq.insns.index(catch_entry.exit_label) + push(result = run_rescue_frame(catch_entry.iseq, frame, error)) + end + + case result + when Jump + frame.pc = frame.iseq.insns.index(result.label) + 1 + when Leave + # this shouldn't be necessary, but is because we're not handling + # the stack correctly at the moment + stack.slice!(frame.stack_index..) + + # restore the previous frame + @frame = previous || frame.parent + events.publish_frame_change(@frame) if @frame + + return result.value + else + frame.pc += 1 + end + end + end + end + + def find_catch_entry(frame, type) + iseq = frame.iseq + iseq.catch_table.find do |catch_entry| + next unless catch_entry.is_a?(type) + + begin_pc = iseq.insns.index(catch_entry.begin_label) + end_pc = iseq.insns.index(catch_entry.end_label) + + (begin_pc...end_pc).cover?(frame.pc) + end + end + + def run_top_frame(iseq) + run_frame(TopFrame.new(iseq)) + end + + def run_block_frame(iseq, frame, *args, **kwargs, &block) + run_frame(BlockFrame.new(iseq, frame, stack.length)) do + setup_arguments(iseq, args, kwargs, block) + end + end + + def run_class_frame(iseq, clazz) + run_frame(ClassFrame.new(iseq, frame, stack.length, clazz)) + end + + def run_method_frame(name, nesting, iseq, _self, *args, **kwargs, &block) + run_frame( + MethodFrame.new( + iseq, + nesting, + frame, + stack.length, + _self, + name, + block + ) + ) { setup_arguments(iseq, args, kwargs, block) } + end + + def run_rescue_frame(iseq, frame, error) + run_frame(RescueFrame.new(iseq, frame, stack.length)) do + local_set(0, 0, error) + nil + end + end + + def setup_arguments(iseq, args, kwargs, block) + locals = [*args] + local_index = 0 + start_label = nil + + # First, set up all of the leading arguments. These are positional and + # required arguments at the start of the argument list. + if (lead_num = iseq.argument_options[:lead_num]) + lead_num.times do + local_set(local_index, 0, locals.shift) + local_index += 1 + end + end + + # Next, set up all of the optional arguments. The opt array contains + # the labels that the frame should start at if the optional is + # present. The last element of the array is the label that the frame + # should start at if all of the optional arguments are present. + if (opt = iseq.argument_options[:opt]) + opt[0...-1].each do |label| + if locals.empty? + start_label = label + break + else + local_set(local_index, 0, locals.shift) + local_index += 1 + end + + start_label = opt.last if start_label.nil? + end + end + + # If there is a splat argument, then we'll set that up here. It will + # grab up all of the remaining positional arguments. + if (rest_start = iseq.argument_options[:rest_start]) + if (post_start = iseq.argument_options[:post_start]) + length = post_start - rest_start + local_set(local_index, 0, locals[0...length]) + locals = locals[length..] + else + local_set(local_index, 0, locals.dup) + locals.clear + end + local_index += 1 + end + + # Next, set up any post arguments. These are positional arguments that + # come after the splat argument. + if (post_num = iseq.argument_options[:post_num]) + post_num.times do + local_set(local_index, 0, locals.shift) + local_index += 1 + end + end + + if (keyword_option = iseq.argument_options[:keyword]) + # First, set up the keyword bits array. + keyword_bits = + keyword_option.map do |config| + kwargs.key?(config.is_a?(Array) ? config[0] : config) + end + + iseq.local_table.locals.each_with_index do |local, index| + # If this is the keyword bits local, then set it appropriately. + if local.name.is_a?(Integer) + local_set(index, 0, keyword_bits) + next + end + + # First, find the configuration for this local in the keywords + # list if it exists. + name = local.name + config = + keyword_option.find do |keyword| + keyword.is_a?(Array) ? keyword[0] == name : keyword == name + end + + # If the configuration doesn't exist, then the local is not a + # keyword local. + next unless config + + if !config.is_a?(Array) + # required keyword + local_set(index, 0, kwargs.fetch(name)) + elsif !config[1].nil? + # optional keyword with embedded default value + local_set(index, 0, kwargs.fetch(name, config[1])) + else + # optional keyword with expression default value + local_set(index, 0, kwargs[name]) + end + end + end + + local_set(local_index, 0, block) if iseq.argument_options[:block_start] + + start_label + end + + ########################################################################## + # Helper methods for instructions + ########################################################################## + + def const_base + frame.nesting.last + end + + def frame_at(level) + current = frame + level.times { current = current.parent } + current + end + + def frame_svar + current = frame + current = current.parent while current.is_a?(BlockFrame) + current + end + + def frame_yield + current = frame + current = current.parent until current.is_a?(MethodFrame) + current + end + + def frozen_core + FROZEN_CORE + end + + def jump(label) + Jump.new(label) + end + + def leave + Leave.new(pop) + end + + def local_get(index, level) + stack[frame_at(level).stack_index + index] + end + + def local_set(index, level, value) + stack[frame_at(level).stack_index + index] = value + end + + ########################################################################## + # Methods for overriding runtime behavior + ########################################################################## + + DLEXT = ".#{RbConfig::CONFIG["DLEXT"]}" + SOEXT = ".#{RbConfig::CONFIG["SOEXT"]}" + + def require_resolved(filepath) + $LOADED_FEATURES << filepath + iseq = RubyVM::InstructionSequence.compile_file(filepath) + run_top_frame(InstructionSequence.from(iseq.to_a)) + end + + def require_internal(filepath, loading: false) + case (extname = File.extname(filepath)) + when "" + # search for all the extensions + searching = filepath + extensions = ["", ".rb", DLEXT, SOEXT] + when ".rb", DLEXT, SOEXT + # search only for the given extension name + searching = File.basename(filepath, extname) + extensions = [extname] + else + # we don't handle these extensions, raise a load error + raise LoadError, "cannot load such file -- #{filepath}" + end + + if filepath.start_with?("/") + # absolute path, search only in the given directory + directories = [File.dirname(searching)] + searching = File.basename(searching) + else + # relative path, search in the load path + directories = $LOAD_PATH + end + + directories.each do |directory| + extensions.each do |extension| + absolute_path = File.join(directory, "#{searching}#{extension}") + next unless File.exist?(absolute_path) + + if !loading && $LOADED_FEATURES.include?(absolute_path) + return false + elsif extension == ".rb" + require_resolved(absolute_path) + return true + elsif loading + return Kernel.send(:yarv_load, filepath) + else + return Kernel.send(:yarv_require, filepath) + end + end + end + + if loading + Kernel.send(:yarv_load, filepath) + else + Kernel.send(:yarv_require, filepath) + end + end + + def require(filepath) + require_internal(filepath, loading: false) + end + + def require_relative(filepath) + Kernel.yarv_require_relative(filepath) + end + + def load(filepath) + require_internal(filepath, loading: true) + end + + def eval( + source, + binding = TOPLEVEL_BINDING, + filename = "(eval)", + lineno = 1 + ) + Kernel.yarv_eval(source, binding, filename, lineno) + end + + def throw(tag, value = nil) + Kernel.throw(tag, value) + end + + def catch(tag, &block) + Kernel.catch(tag, &block) + end + end + end +end diff --git a/spec/mspec b/spec/mspec new file mode 160000 index 00000000..4877d58d --- /dev/null +++ b/spec/mspec @@ -0,0 +1 @@ +Subproject commit 4877d58dff577641bc1ecd1bf3d3c3daa93b423f diff --git a/spec/ruby b/spec/ruby new file mode 160000 index 00000000..71873ae4 --- /dev/null +++ b/spec/ruby @@ -0,0 +1 @@ +Subproject commit 71873ae4421f5b551a5af0f3427e901414736835 diff --git a/test/yarv_test.rb b/test/yarv_test.rb index f8e0ffdb..6f60d74e 100644 --- a/test/yarv_test.rb +++ b/test/yarv_test.rb @@ -41,9 +41,253 @@ def test_bf ">>.>---.+++++++..+++.>>.<-.<.+++.------.--------.>>+.>++." iseq = YARV::Bf.new(hello_world).compile + stdout, = capture_io { iseq.eval } + assert_equal "Hello World!\n", stdout + Formatter.format(hello_world, YARV::Decompiler.new(iseq).to_ruby) end + # rubocop:disable Layout/LineLength + EMULATION_CASES = { + # adjuststack + "x = [true]; x[0] ||= nil; x[0]" => true, + # anytostring + "\"\#{5}\"" => "5", + "class A2Str; def to_s; 1; end; end; \"\#{A2Str.new}\"" => + "#", + # branchif + "x = true; x ||= \"foo\"; x" => true, + # branchnil + "x = nil; if x&.to_s; 'hi'; else; 'bye'; end" => "bye", + # branchunless + "if 2 + 3; 'hi'; else; 'bye'; end" => "hi", + # checkkeyword + # "def evaluate(value: rand); value.floor; end; evaluate" => 0, + # checkmatch + "'foo' in String" => true, + "case 1; when *[1, 2, 3]; true; end" => true, + # checktype + "['foo'] in [String]" => true, + # concatarray + "[1, *2]" => [1, 2], + # concatstrings + "\"\#{7}\"" => "7", + # defineclass + "class DefineClass; def bar; end; end" => :bar, + "module DefineModule; def bar; end; end" => :bar, + "class << self; self; end" => + TOPLEVEL_BINDING.eval("self").singleton_class, + # defined + "defined?(1)" => "expression", + "defined?(foo = 1)" => "assignment", + "defined?(Object)" => "constant", + # definemethod + "def definemethod = 5; definemethod" => 5, + # definesmethod + "def self.definesmethod = 5; self.definesmethod" => 5, + # dup + "$global = 5" => 5, + # duparray + "[true]" => [true], + # duphash + "{ a: 1 }" => { + a: 1 + }, + # dupn + "Object::X ||= true" => true, + # expandarray + "x, = [true, false, nil]" => [true, false, nil], + "*, x = [true, false, nil]" => [true, false, nil], + # getblockparam + "def getblockparam(&block); block; end; getblockparam { 1 }.call" => 1, + # getblockparamproxy + "def getblockparamproxy(&block); block.call; end; getblockparamproxy { 1 }" => + 1, + # getclassvariable + "class CVar; @@foo = 5; end; class << CVar; @@foo; end" => 5, + # getconstant + "Object" => Object, + # getglobal + "$$" => $$, + # getinstancevariable + "@foo = 5; @foo" => 5, + # getlocal + "value = 5; self.then { self.then { self.then { value } } }" => 5, + # getlocalwc0 + "value = 5; value" => 5, + # getlocalwc1 + "value = 5; self.then { value }" => 5, + # getspecial + "1 if (2 == 2) .. (3 == 3)" => 1, + # intern + ":\"foo\#{1}\"" => :foo1, + # invokeblock + "def invokeblock = yield; invokeblock { 1 }" => 1, + # invokesuper + <<~RUBY => 2, + class Parent + def value + 1 + end + end + + class Child < Parent + def value + super + 1 + end + end + + Child.new.value + RUBY + # jump + "x = 0; if x == 0 then 1 else 2 end" => 1, + # newarray + "[\"value\"]" => ["value"], + # newarraykwsplat + "[\"string\", **{ foo: \"bar\" }]" => ["string", { foo: "bar" }], + # newhash + "def newhash(key, value) = { key => value }; newhash(1, 2)" => { + 1 => 2 + }, + # newrange + "x = 0; y = 1; (x..y).to_a" => [0, 1], + # nop + # objtostring + "\"\#{6}\"" => "6", + # once + "/\#{1}/o" => /1/o, + # opt_and + "0b0110 & 0b1011" => 0b0010, + # opt_aref + "x = [1, 2, 3]; x[1]" => 2, + # opt_aref_with + "x = { \"a\" => 1 }; x[\"a\"]" => 1, + # opt_aset + "x = [1, 2, 3]; x[1] = 4; x" => [1, 4, 3], + # opt_aset_with + "x = { \"a\" => 1 }; x[\"a\"] = 2; x" => { + "a" => 2 + }, + # opt_case_dispatch + <<~RUBY => "foo", + case 1 + when 1 + "foo" + else + "bar" + end + RUBY + # opt_div + "5 / 2" => 2, + # opt_empty_p + "[].empty?" => true, + # opt_eq + "1 == 1" => true, + # opt_ge + "1 >= 1" => true, + # opt_getconstant_path + "::Object" => Object, + # opt_gt + "1 > 1" => false, + # opt_le + "1 <= 1" => true, + # opt_length + "[1, 2, 3].length" => 3, + # opt_lt + "1 < 1" => false, + # opt_ltlt + "\"\" << 2" => "\u0002", + # opt_minus + "1 - 1" => 0, + # opt_mod + "5 % 2" => 1, + # opt_mult + "5 * 2" => 10, + # opt_neq + "1 != 1" => false, + # opt_newarray_max + "def opt_newarray_max(a, b, c) = [a, b, c].max; opt_newarray_max(1, 2, 3)" => + 3, + # opt_newarray_min + "def opt_newarray_min(a, b, c) = [a, b, c].min; opt_newarray_min(1, 2, 3)" => + 1, + # opt_nil_p + "nil.nil?" => true, + # opt_not + "!true" => false, + # opt_or + "0b0110 | 0b1011" => 0b1111, + # opt_plus + "1 + 1" => 2, + # opt_regexpmatch2 + "/foo/ =~ \"~~~foo\"" => 3, + # opt_send_without_block + "5.to_s" => "5", + # opt_size + "[1, 2, 3].size" => 3, + # opt_str_freeze + "\"foo\".freeze" => "foo", + # opt_str_uminus + "-\"foo\"" => -"foo", + # opt_succ + "1.succ" => 2, + # pop + "a ||= 2; a" => 2, + # putnil + "[nil]" => [nil], + # putobject + "2" => 2, + # putobject_INT2FIX_0_ + "0" => 0, + # putobject_INT2FIX_1_ + "1" => 1, + # putself + "self" => TOPLEVEL_BINDING.eval("self"), + # putspecialobject + "[class Undef; def foo = 1; undef foo; end]" => [nil], + # putstring + "\"foo\"" => "foo", + # send + "\"hello\".then { |value| value }" => "hello", + # setblockparam + "def setblockparam(&bar); bar = -> { 1 }; bar.call; end; setblockparam" => + 1, + # setclassvariable + "class CVarSet; @@foo = 1; end; class << CVarSet; @@foo = 10; end" => 10, + # setconstant + "SetConstant = 1" => 1, + # setglobal + "$global = 10" => 10, + # setinstancevariable + "@ivar = 5" => 5, + # setlocal + "x = 5; tap { tap { tap { x = 10 } } }; x" => 10, + # setlocal_WC_0 + "x = 5; x" => 5, + # setlocal_WC_1 + "x = 5; tap { x = 10 }; x" => 10, + # setn + "{}[:key] = 'value'" => "value", + # setspecial + "1 if (1 == 1) .. (2 == 2)" => 1, + # splatarray + "x = *(5)" => [5], + # swap + "!!defined?([[]])" => true, + # throw + # topn + "case 3; when 1..5; 'foo'; end" => "foo", + # toregexp + "/abc \#{1 + 2} def/" => /abc 3 def/ + }.freeze + # rubocop:enable Layout/LineLength + + EMULATION_CASES.each do |source, expected| + define_method("test_emulate_#{source}") do + assert_emulates(expected, source) + end + end + private def assert_decompiles(expected, source) @@ -51,5 +295,41 @@ def assert_decompiles(expected, source) actual = Formatter.format(source, ruby) assert_equal expected, actual end + + def assert_emulates(expected, source) + ruby_iseq = RubyVM::InstructionSequence.compile(source) + yarv_iseq = YARV::InstructionSequence.from(ruby_iseq.to_a) + + exercise_iseq(yarv_iseq) + result = SyntaxTree::YARV::VM.new.run_top_frame(yarv_iseq) + assert_equal(expected, result) + end + + def exercise_iseq(iseq) + iseq.disasm + iseq.to_a + + iseq.insns.each do |insn| + case insn + when YARV::InstructionSequence::Label, Integer, Symbol + next + end + + insn.pushes + insn.pops + insn.canonical + + case insn + when YARV::DefineClass + exercise_iseq(insn.class_iseq) + when YARV::DefineMethod, YARV::DefineSMethod + exercise_iseq(insn.method_iseq) + when YARV::InvokeSuper, YARV::Send + exercise_iseq(insn.block_iseq) if insn.block_iseq + when YARV::Once + exercise_iseq(insn.iseq) + end + end + end end end