From 3150301e24cd496c57428c5daa146580b4fe3aa6 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Sat, 15 Oct 2022 21:18:47 -0400 Subject: [PATCH 01/15] Tons of various performance tweaks --- bin/profile | 11 +- lib/syntax_tree.rb | 4 +- lib/syntax_tree/formatter.rb | 60 +++- lib/syntax_tree/node.rb | 598 ++++++++++++++++++++--------------- lib/syntax_tree/parser.rb | 160 +++++++--- test/cli_test.rb | 2 +- test/node_test.rb | 2 +- test/quotes_test.rb | 15 + 8 files changed, 539 insertions(+), 313 deletions(-) create mode 100644 test/quotes_test.rb diff --git a/bin/profile b/bin/profile index 0a1b6ade..15bd28ae 100755 --- a/bin/profile +++ b/bin/profile @@ -6,22 +6,21 @@ require "bundler/inline" gemfile do source "https://rubygems.org" gem "stackprof" + gem "prettier_print" end $:.unshift(File.expand_path("../lib", __dir__)) require "syntax_tree" -GC.disable - StackProf.run(mode: :cpu, out: "tmp/profile.dump", raw: true) do - filepath = File.expand_path("../lib/syntax_tree/node.rb", __dir__) - SyntaxTree.format(File.read(filepath)) + Dir[File.join(RbConfig::CONFIG["libdir"], "**/*.rb")].each do |filepath| + SyntaxTree.format(SyntaxTree.read(filepath)) + end end -GC.enable - File.open("tmp/flamegraph.html", "w") do |file| report = Marshal.load(IO.binread("tmp/profile.dump")) + StackProf::Report.new(report).print_text StackProf::Report.new(report).print_d3_flamegraph(file) end diff --git a/lib/syntax_tree.rb b/lib/syntax_tree.rb index 88c66369..29ed048c 100644 --- a/lib/syntax_tree.rb +++ b/lib/syntax_tree.rb @@ -1,6 +1,5 @@ # frozen_string_literal: true -require "delegate" require "etc" require "json" require "pp" @@ -10,7 +9,6 @@ require_relative "syntax_tree/formatter" require_relative "syntax_tree/node" -require_relative "syntax_tree/parser" require_relative "syntax_tree/version" require_relative "syntax_tree/basic_visitor" @@ -20,6 +18,8 @@ require_relative "syntax_tree/visitor/match_visitor" require_relative "syntax_tree/visitor/pretty_print_visitor" +require_relative "syntax_tree/parser" + # Syntax Tree is a suite of tools built on top of the internal CRuby parser. It # provides the ability to generate a syntax tree from source, as well as the # tools necessary to inspect and manipulate that syntax tree. It can be used to diff --git a/lib/syntax_tree/formatter.rb b/lib/syntax_tree/formatter.rb index 4c7a00db..dc124fbc 100644 --- a/lib/syntax_tree/formatter.rb +++ b/lib/syntax_tree/formatter.rb @@ -4,6 +4,26 @@ module SyntaxTree # A slightly enhanced PP that knows how to format recursively including # comments. class Formatter < PrettierPrint + # It's very common to use seplist with ->(q) { q.breakable_return }. We wrap + # that pattern into an object to cut down on having to create a bunch of + # lambdas all over the place. + class BreakableReturnSeparator + def call(q) + q.breakable_return + end + end + + # Similar to the previous, it's common to ->(q) { q.breakable_space }. We + # also wrap that pattern into an object to cut down on lambdas. + class BreakableSpaceSeparator + def call(q) + q.breakable_space + end + end + + BREAKABLE_RETURN_SEPARATOR = BreakableReturnSeparator.new + BREAKABLE_SPACE_SEPARATOR = BreakableSpaceSeparator.new + # We want to minimize as much as possible the number of options that are # available in syntax tree. For the most part, if users want non-default # formatting, they should override the format methods on the specific nodes @@ -75,8 +95,7 @@ def format(node, stackable: true) doc = if leading.last&.ignore? range = source[node.location.start_char...node.location.end_char] - separator = -> { breakable(indent: false, force: true) } - seplist(range.split(/\r?\n/, -1), separator) { |line| text(line) } + seplist(range.split(/\r?\n/, -1), Formatter::BREAKABLE_RETURN_SEPARATOR) { |line| text(line) } else node.format(self) end @@ -108,5 +127,42 @@ def parent def parents stack[0...-1].reverse_each end + + # This is a simplified version of prettyprint's group. It doesn't provide + # any of the more advanced options because we don't need them and they take + # up expensive computation time. + def group + contents = [] + doc = Group.new(0, contents: contents) + + groups << doc + target << doc + + with_target(contents) { yield } + groups.pop + doc + end + + # A similar version to the super, except that it calls back into the + # separator proc with the instance of `self`. + def seplist(list, sep = nil, iter_method = :each) # :yield: element + first = true + list.__send__(iter_method) do |*v| + if first + first = false + elsif sep + sep.call(self) + else + comma_breakable + end + yield(*v) + end + end + + # This is a much simplified version of prettyprint's text. It avoids + # calculating width by pushing the string directly onto the target. + def text(string) + target << string + end end end diff --git a/lib/syntax_tree/node.rb b/lib/syntax_tree/node.rb index 7ecd69ff..2aa51fd8 100644 --- a/lib/syntax_tree/node.rb +++ b/lib/syntax_tree/node.rb @@ -177,10 +177,10 @@ def format(q) q.text("BEGIN ") q.format(lbrace) q.indent do - q.breakable + q.breakable_space q.format(statements) end - q.breakable + q.breakable_space q.text("}") end end @@ -280,10 +280,10 @@ def format(q) q.text("END ") q.format(lbrace) q.indent do - q.breakable + q.breakable_space q.format(statements) end - q.breakable + q.breakable_space q.text("}") end end @@ -327,10 +327,8 @@ def deconstruct_keys(_keys) def format(q) q.text("__END__") - q.breakable(force: true) - - separator = -> { q.breakable(indent: false, force: true) } - q.seplist(value.split(/\r?\n/, -1), separator) { |line| q.text(line) } + q.breakable_force + q.seplist(value.split(/\r?\n/, -1), Formatter::BREAKABLE_RETURN_SEPARATOR) { |line| q.text(line) } end end @@ -412,7 +410,7 @@ def format(q) q.format(left_argument, stackable: false) q.group do q.nest(keyword.length) do - q.breakable(force: left_argument.comments.any?) + left_argument.comments.any? ? q.breakable_force : q.breakable_space q.format(AliasArgumentFormatter.new(right), stackable: false) end end @@ -476,10 +474,10 @@ def format(q) if index q.indent do - q.breakable("") + q.breakable_empty q.format(index) end - q.breakable("") + q.breakable_empty end q.text("]") @@ -537,10 +535,10 @@ def format(q) if index q.indent do - q.breakable("") + q.breakable_empty q.format(index) end - q.breakable("") + q.breakable_empty end q.text("]") @@ -593,14 +591,16 @@ def format(q) return end - q.group(0, "(", ")") do + q.text("(") + q.group do q.indent do - q.breakable("") + q.breakable_empty q.format(arguments) q.if_break { q.text(",") } if q.trailing_comma? && trailing_comma? end - q.breakable("") + q.breakable_empty end + q.text(")") end private @@ -800,10 +800,11 @@ def initialize(contents) end def format(q) - q.group(0, "%w[", "]") do + q.text("%w[") + q.group do q.indent do - q.breakable("") - q.seplist(contents.parts, -> { q.breakable }) do |part| + q.breakable_empty + q.seplist(contents.parts, Formatter::BREAKABLE_SPACE_SEPARATOR) do |part| if part.is_a?(StringLiteral) q.format(part.parts.first) else @@ -811,8 +812,9 @@ def format(q) end end end - q.breakable("") + q.breakable_empty end + q.text("]") end end @@ -826,15 +828,17 @@ def initialize(contents) end def format(q) - q.group(0, "%i[", "]") do + q.text("%i[") + q.group do q.indent do - q.breakable("") - q.seplist(contents.parts, -> { q.breakable }) do |part| + q.breakable_empty + q.seplist(contents.parts, Formatter::BREAKABLE_SPACE_SEPARATOR) do |part| q.format(part.value) end end - q.breakable("") + q.breakable_empty end + q.text("]") end end @@ -861,6 +865,13 @@ def format(q) # # provided the line length was hit between `bar` and `baz`. class VarRefsFormatter + class Separator + def call(q) + q.text(",") + q.fill_breakable + end + end + # [Args] the contents of the array attr_reader :contents @@ -869,20 +880,16 @@ def initialize(contents) end def format(q) - q.group(0, "[", "]") do + q.text("[") + q.group do q.indent do - q.breakable("") - - separator = -> do - q.text(",") - q.fill_breakable - end - - q.seplist(contents.parts, separator) { |part| q.format(part) } + q.breakable_empty + q.seplist(contents.parts, Separator.new) { |part| q.format(part) } q.if_break { q.text(",") } if q.trailing_comma? end - q.breakable("") + q.breakable_empty end + q.text("]") end end @@ -902,11 +909,11 @@ def format(q) q.text("[") q.indent do lbracket.comments.each do |comment| - q.breakable(force: true) + q.breakable_force comment.format(q) end end - q.breakable(force: true) + q.breakable_force q.text("]") end end @@ -973,13 +980,13 @@ def format(q) if contents q.indent do - q.breakable("") + q.breakable_empty q.format(contents) q.if_break { q.text(",") } if q.trailing_comma? end end - q.breakable("") + q.breakable_empty q.text("]") end end @@ -1127,7 +1134,7 @@ def format(q) q.format(constant) if constant q.text("[") q.indent do - q.breakable("") + q.breakable_empty parts = [*requireds] parts << RestFormatter.new(rest) if rest @@ -1135,7 +1142,7 @@ def format(q) q.seplist(parts) { |part| q.format(part) } end - q.breakable("") + q.breakable_empty q.text("]") end end @@ -1145,13 +1152,13 @@ def format(q) module AssignFormatting def self.skip_indent?(value) case value - in ArrayLiteral | HashLiteral | Heredoc | Lambda | QSymbols | QWords | - Symbols | Words + when ArrayLiteral, HashLiteral, Heredoc, Lambda, QSymbols, QWords, + Symbols, Words true - in Call[receiver:] - skip_indent?(receiver) - in DynaSymbol[quote:] - quote.start_with?("%s") + when Call + skip_indent?(value.receiver) + when DynaSymbol + value.quote.start_with?("%s") else false end @@ -1206,7 +1213,7 @@ def format(q) q.format(value) else q.indent do - q.breakable + q.breakable_space q.format(value) end end @@ -1277,7 +1284,7 @@ def format_contents(q) q.format(value) else q.indent do - q.breakable + q.breakable_space q.format(value) end end @@ -1544,12 +1551,12 @@ def format(q) unless bodystmt.empty? q.indent do - q.breakable(force: true) unless bodystmt.statements.empty? + q.breakable_force unless bodystmt.statements.empty? q.format(bodystmt) end end - q.breakable(force: true) + q.breakable_force q.text("end") end end @@ -1592,10 +1599,10 @@ def format(q) q.text("^(") q.nest(1) do q.indent do - q.breakable("") + q.breakable_empty q.format(statement) end - q.breakable("") + q.breakable_empty q.text(")") end end @@ -1669,7 +1676,7 @@ def format(q) q.text(operator.to_s) q.indent do - q.breakable(power ? "" : " ") + power ? q.breakable_empty : q.breakable_space q.format(right) end end @@ -1716,15 +1723,29 @@ def deconstruct_keys(_keys) { params: params, locals: locals, location: location, comments: comments } end + # Within the pipes of the block declaration, we don't want any spaces. So + # we'll separate the parameters with a comma and space but no breakables. + class Separator + def call(q) + q.text(", ") + end + end + + # We'll keep a single instance of this separator around for all block vars + # to cut down on allocations. + SEPARATOR = Separator.new + def format(q) - q.group(0, "|", "|") do + q.text("|") + q.group do q.remove_breaks(q.format(params)) if locals.any? q.text("; ") - q.seplist(locals, -> { q.text(", ") }) { |local| q.format(local) } + q.seplist(locals, SEPARATOR) { |local| q.format(local) } end end + q.text("|") end end @@ -1816,10 +1837,8 @@ def bind(start_char, start_column, end_char, end_column) end_column: end_column ) - parts = [rescue_clause, else_clause, ensure_clause] - # Here we're going to determine the bounds for the statements - consequent = parts.compact.first + consequent = rescue_clause || else_clause || ensure_clause statements.bind( start_char, start_column, @@ -1829,7 +1848,7 @@ def bind(start_char, start_column, end_char, end_column) # Next we're going to determine the rescue clause if there is one if rescue_clause - consequent = parts.drop(1).compact.first + consequent = else_clause || ensure_clause rescue_clause.bind_end( consequent ? consequent.location.start_char : end_char, consequent ? consequent.location.start_column : end_column @@ -1868,26 +1887,26 @@ def format(q) if rescue_clause q.nest(-2) do - q.breakable(force: true) + q.breakable_force q.format(rescue_clause) end end if else_clause q.nest(-2) do - q.breakable(force: true) + q.breakable_force q.format(else_keyword) end unless else_clause.empty? - q.breakable(force: true) + q.breakable_force q.format(else_clause) end end if ensure_clause q.nest(-2) do - q.breakable(force: true) + q.breakable_force q.format(ensure_clause) end end @@ -2004,22 +2023,16 @@ def forced_do_end_bounds?(q) # If we're the predicate of a loop or conditional, then we're going to have # to go with the {..} bounds. def forced_brace_bounds?(q) - parents = q.parents.to_a - parents.each_with_index.any? do |parent, index| - # If we hit certain breakpoints then we know we're safe. - break false if [Paren, Statements].include?(parent.class) - - [ - If, - IfMod, - IfOp, - Unless, - UnlessMod, - While, - WhileMod, - Until, - UntilMod - ].include?(parent.class) && parent.predicate == parents[index - 1] + previous = nil + q.parents.any? do |parent| + case parent + when Paren, Statements + # If we hit certain breakpoints then we know we're safe. + return false + when If, IfMod, IfOp, Unless, UnlessMod, While, WhileMod, Until, UntilMod + return true if parent.predicate == previous + previous = parent + end end end @@ -2034,12 +2047,12 @@ def format_break(q, opening, closing) unless statements.empty? q.indent do - q.breakable + q.breakable_space q.format(statements) end end - q.breakable + q.breakable_space q.text(closing) end @@ -2048,17 +2061,17 @@ def format_flat(q, opening, closing) q.format(BlockOpenFormatter.new(opening, block_open), stackable: false) if node.block_var - q.breakable + q.breakable_space q.format(node.block_var) - q.breakable + q.breakable_space end if statements.empty? q.text(" ") if opening == "do" else - q.breakable unless node.block_var + q.breakable_space unless node.block_var q.format(statements) - q.breakable + q.breakable_space end q.text(closing) @@ -2241,20 +2254,20 @@ def format(q) def format_array_contents(q, array) q.if_break { q.text("[") } q.indent do - q.breakable("") + q.breakable_empty q.format(array.contents) end - q.breakable("") + q.breakable_empty q.if_break { q.text("]") } end def format_arguments(q, opening, closing) q.if_break { q.text(opening) } q.indent do - q.breakable(" ") + q.breakable_space q.format(node.arguments) end - q.breakable("") + q.breakable_empty q.if_break { q.text(closing) } end @@ -2446,7 +2459,7 @@ def format_chain(q, children) in Call # If we're at a Call node and not a MethodAddBlock node in the # chain then we're going to add a newline so it indents properly. - q.breakable("") + q.breakable_empty else end @@ -2530,7 +2543,7 @@ def format_child( # them out here since we're bypassing the normal comment printing. if child.comments.any? && !skip_comments child.comments.each do |comment| - comment.inline? ? q.text(" ") : q.breakable + comment.inline? ? q.text(" ") : q.breakable_space comment.format(q) end @@ -2605,7 +2618,8 @@ def format(q) # If we're at the top of a call chain, then we're going to do some # specialized printing in case we can print it nicely. We _only_ do this # at the top of the chain to avoid weird recursion issues. - if !CallChainFormatter.chained?(q.parent) && + if !ENV["STREE_SKIP_CALL_CHAIN"] && + !CallChainFormatter.chained?(q.parent) && CallChainFormatter.chained?(receiver) q.group do q @@ -2642,7 +2656,7 @@ def format_contents(q) q.group do q.indent do if receiver.comments.any? || call_operator.comments.any? - q.breakable(force: true) + q.breakable_force end if call_operator.comments.empty? @@ -2719,9 +2733,9 @@ def format(q) q.format(value) end - q.breakable(force: true) + q.breakable_force q.format(consequent) - q.breakable(force: true) + q.breakable_force q.text("end") end @@ -2788,7 +2802,7 @@ def format(q) else q.group do q.indent do - q.breakable + q.breakable_space q.format(pattern) end end @@ -2887,7 +2901,7 @@ def format(q) if bodystmt.empty? q.group do declaration.call - q.breakable(force: true) + q.breakable_force q.text("end") end else @@ -2895,11 +2909,11 @@ def format(q) declaration.call q.indent do - q.breakable(force: true) + q.breakable_force q.format(bodystmt) end - q.breakable(force: true) + q.breakable_force q.text("end") end end @@ -3069,7 +3083,7 @@ def format(q) if message.comments.any?(&:leading?) q.format(CallOperatorFormatter.new(operator), stackable: false) q.indent do - q.breakable("") + q.breakable_empty q.format(message) end else @@ -3155,7 +3169,7 @@ def trailing? end def ignore? - value[1..].strip == "stree-ignore" + value.match?(/\A#\s*stree-ignore\s*\z/) end def comments @@ -3455,12 +3469,12 @@ def format(q) unless bodystmt.empty? q.indent do - q.breakable(force: true) + q.breakable_force q.format(bodystmt) end end - q.breakable(force: true) + q.breakable_force q.text("end") end end @@ -3549,7 +3563,7 @@ def format(q) q.text(" =") q.group do q.indent do - q.breakable + q.breakable_space q.format(statement) end end @@ -3590,13 +3604,15 @@ def deconstruct_keys(_keys) end def format(q) - q.group(0, "defined?(", ")") do + q.text("defined?(") + q.group do q.indent do - q.breakable("") + q.breakable_empty q.format(value) end - q.breakable("") + q.breakable_empty end + q.text(")") end end @@ -3678,12 +3694,12 @@ def format(q) unless bodystmt.empty? q.indent do - q.breakable(force: true) + q.breakable_force q.format(bodystmt) end end - q.breakable(force: true) + q.breakable_force q.text("end") end end @@ -3948,12 +3964,12 @@ def deconstruct_keys(_keys) def format(q) opening_quote, closing_quote = quotes(q) - q.group(0, opening_quote, closing_quote) do + q.text(opening_quote) + q.group do parts.each do |part| if part.is_a?(TStringContent) value = Quotes.normalize(part.value, closing_quote) - separator = -> { q.breakable(force: true, indent: false) } - q.seplist(value.split(/\r?\n/, -1), separator) do |text| + q.seplist(value.split(/\r?\n/, -1), Formatter::BREAKABLE_RETURN_SEPARATOR) do |text| q.text(text) end else @@ -3961,6 +3977,7 @@ def format(q) end end end + q.text(closing_quote) end private @@ -4056,7 +4073,7 @@ def format(q) unless statements.empty? q.indent do - q.breakable(force: true) + q.breakable_force q.format(statements) end end @@ -4126,14 +4143,14 @@ def format(q) unless statements.empty? q.indent do - q.breakable(force: true) + q.breakable_force q.format(statements) end end if consequent q.group do - q.breakable(force: true) + q.breakable_force q.format(consequent) end end @@ -4329,7 +4346,7 @@ def format(q) unless statements.empty? q.indent do - q.breakable(force: true) + q.breakable_force q.format(statements) end end @@ -4588,7 +4605,7 @@ def format(q) q.text("[") q.indent do - q.breakable("") + q.breakable_empty q.text("*") q.format(left) @@ -4601,7 +4618,7 @@ def format(q) q.format(right) end - q.breakable("") + q.breakable_empty q.text("]") end end @@ -4663,12 +4680,12 @@ def format(q) unless statements.empty? q.indent do - q.breakable(force: true) + q.breakable_force q.format(statements) end end - q.breakable(force: true) + q.breakable_force q.text("end") end end @@ -4731,11 +4748,11 @@ def format(q) q.text("{") q.indent do lbrace.comments.each do |comment| - q.breakable(force: true) + q.breakable_force comment.format(q) end end - q.breakable(force: true) + q.breakable_force q.text("}") end end @@ -4800,14 +4817,14 @@ def format_contents(q) q.format(lbrace) if assocs.empty? - q.breakable("") + q.breakable_empty else q.indent do - q.breakable + q.breakable_space q.seplist(assocs) { |assoc| q.format(assoc) } q.if_break { q.text(",") } if q.trailing_comma? end - q.breakable + q.breakable_space end q.text("}") @@ -4873,22 +4890,32 @@ def deconstruct_keys(_keys) } end - def format(q) - # This is a very specific behavior where you want to force a newline, but - # don't want to force the break parent. - breakable = -> { q.breakable(indent: false, force: :skip_break_parent) } + # This is a very specific behavior where you want to force a newline, but + # don't want to force the break parent. + class Separator + DOC = PrettierPrint::Breakable.new(" ", 1, indent: false, force: true) + def call(q) + q.target << DOC + end + end + + # We're going to keep an instance around so we don't have to allocate a new + # one every time we format a heredoc. + SEPARATOR = Separator.new + + def format(q) q.group do q.format(beginning) q.line_suffix(priority: Formatter::HEREDOC_PRIORITY) do q.group do - breakable.call + SEPARATOR.call(q) parts.each do |part| if part.is_a?(TStringContent) texts = part.value.split(/\r?\n/, -1) - q.seplist(texts, breakable) { |text| q.text(text) } + q.seplist(texts, SEPARATOR) { |text| q.text(text) } else q.format(part) end @@ -5097,10 +5124,10 @@ def format(q) q.format(constant) q.text("[") q.indent do - q.breakable("") + q.breakable_empty contents.call end - q.breakable("") + q.breakable_empty q.text("]") end return @@ -5124,14 +5151,14 @@ def format(q) q.group do q.text("{") q.indent do - q.breakable + q.breakable_space contents.call end if q.target_ruby_version < Gem::Version.new("2.7.3") q.text(" }") else - q.breakable + q.breakable_space q.text("}") end end @@ -5188,8 +5215,12 @@ def self.call(parent) queue = [parent] while (node = queue.shift) - return true if [Assign, MAssign, OpAssign].include?(node.class) - queue += node.child_nodes.compact + case node + when Assign, MAssign, OpAssign + return true + else + node.child_nodes.each { |child| queue << child if child } + end end false @@ -5311,17 +5342,17 @@ def format_break(q, force:) unless node.statements.empty? q.indent do - q.breakable(force: force) + force ? q.breakable_force : q.breakable_space q.format(node.statements) end end if node.consequent - q.breakable(force: force) + force ? q.breakable_force : q.breakable_space q.format(node.consequent) end - q.breakable(force: force) + force ? q.breakable_force : q.breakable_space q.text("end") end @@ -5333,11 +5364,11 @@ def format_ternary(q) q.nest(keyword.length + 1) { q.format(node.predicate) } q.indent do - q.breakable + q.breakable_space q.format(node.statements) end - q.breakable + q.breakable_space q.group do q.format(node.consequent.keyword) q.indent do @@ -5351,7 +5382,7 @@ def format_ternary(q) end end - q.breakable + q.breakable_space q.text("end") end .if_flat do @@ -5507,19 +5538,19 @@ def format_break(q) q.nest("if ".length) { q.format(predicate) } q.indent do - q.breakable + q.breakable_space q.format(truthy) end - q.breakable + q.breakable_space q.text("else") q.indent do - q.breakable + q.breakable_space q.format(falsy) end - q.breakable + q.breakable_space q.text("end") end end @@ -5529,11 +5560,11 @@ def format_flat(q) q.text(" ?") q.indent do - q.breakable + q.breakable_space q.format(truthy) q.text(" :") - q.breakable + q.breakable_space q.format(falsy) end end @@ -5566,10 +5597,10 @@ def format_break(q) q.text("#{keyword} ") q.nest(keyword.length + 1) { q.format(node.predicate) } q.indent do - q.breakable + q.breakable_space q.format(node.statement) end - q.breakable + q.breakable_space q.text("end") end @@ -5720,13 +5751,13 @@ def format(q) unless statements.empty? q.indent do - q.breakable(force: true) + q.breakable_force q.format(statements) end end if consequent - q.breakable(force: true) + q.breakable_force q.format(consequent) end end @@ -6013,7 +6044,8 @@ def deconstruct_keys(_keys) end def format(q) - q.group(0, "->") do + q.text("->") + q.group do if params.is_a?(Paren) q.format(params) unless params.contents.empty? elsif params.empty? && params.comments.any? @@ -6039,10 +6071,10 @@ def format(q) unless statements.empty? q.indent do - q.breakable + q.breakable_space q.format(statements) end - q.breakable + q.breakable_space end q.text("}") @@ -6051,12 +6083,12 @@ def format(q) unless statements.empty? q.indent do - q.breakable + q.breakable_space q.format(statements) end end - q.breakable + q.breakable_space q.text("end") end end @@ -6123,7 +6155,7 @@ def format(q) if locals.any? q.text("; ") - q.seplist(locals, -> { q.text(", ") }) { |local| q.format(local) } + q.seplist(locals, BlockVar::SEPARATOR) { |local| q.format(local) } end end end @@ -6277,7 +6309,7 @@ def format(q) q.group { q.format(target) } q.text(" =") q.indent do - q.breakable + q.breakable_space q.format(value) end end @@ -6323,7 +6355,8 @@ def format(q) # If we're at the top of a call chain, then we're going to do some # specialized printing in case we can print it nicely. We _only_ do this # at the top of the chain to avoid weird recursion issues. - if !CallChainFormatter.chained?(q.parent) && + if !ENV["STREE_SKIP_CALL_CHAIN"] && + !CallChainFormatter.chained?(q.parent) && CallChainFormatter.chained?(call) q.group do q @@ -6431,15 +6464,17 @@ def format(q) q.format(contents) q.text(",") if comma else - q.group(0, "(", ")") do + q.text("(") + q.group do q.indent do - q.breakable("") + q.breakable_empty q.format(contents) end q.text(",") if comma - q.breakable("") + q.breakable_empty end + q.text(")") end end end @@ -6496,7 +6531,7 @@ def format(q) if bodystmt.empty? q.group do declaration.call - q.breakable(force: true) + q.breakable_force q.text("end") end else @@ -6504,11 +6539,11 @@ def format(q) declaration.call q.indent do - q.breakable(force: true) + q.breakable_force q.format(bodystmt) end - q.breakable(force: true) + q.breakable_force q.text("end") end end @@ -6696,7 +6731,7 @@ def format(q) q.format(value) else q.indent do - q.breakable + q.breakable_space q.format(value) end end @@ -6767,10 +6802,10 @@ def self.break(q) q.text("(") q.indent do - q.breakable("") + q.breakable_empty yield end - q.breakable("") + q.breakable_empty q.text(")") end end @@ -6970,12 +7005,16 @@ def format(q) if ![Def, Defs, DefEndless].include?(q.parent.class) || parts.empty? q.nest(0, &contents) else - q.group(0, "(", ")") do - q.indent do - q.breakable("") - contents.call + q.nest(0) do + q.text("(") + q.group do + q.indent do + q.breakable_empty + contents.call + end + q.breakable_empty end - q.breakable("") + q.text(")") end end end @@ -7029,12 +7068,12 @@ def format(q) if contents && (!contents.is_a?(Params) || !contents.empty?) q.indent do - q.breakable("") + q.breakable_empty q.format(contents) end end - q.breakable("") + q.breakable_empty q.text(")") end end @@ -7108,7 +7147,7 @@ def format(q) # We're going to put a newline on the end so that it always has one unless # it ends with the special __END__ syntax. In that case we want to # replicate the text exactly so we will just let it be. - q.breakable(force: true) unless statements.body.last.is_a?(EndContent) + q.breakable_force unless statements.body.last.is_a?(EndContent) end end @@ -7160,15 +7199,17 @@ def format(q) closing = Quotes.matching(opening[2]) end - q.group(0, opening, closing) do + q.text(opening) + q.group do q.indent do - q.breakable("") - q.seplist(elements, -> { q.breakable }) do |element| + q.breakable_empty + q.seplist(elements, Formatter::BREAKABLE_SPACE_SEPARATOR) do |element| q.format(element) end end - q.breakable("") + q.breakable_empty end + q.text(closing) end end @@ -7251,15 +7292,17 @@ def format(q) closing = Quotes.matching(opening[2]) end - q.group(0, opening, closing) do + q.text(opening) + q.group do q.indent do - q.breakable("") - q.seplist(elements, -> { q.breakable }) do |element| + q.breakable_empty + q.seplist(elements, Formatter::BREAKABLE_SPACE_SEPARATOR) do |element| q.format(element) end end - q.breakable("") + q.breakable_empty end + q.text(closing) end end @@ -7781,13 +7824,13 @@ def format(q) unless statements.empty? q.indent do - q.breakable(force: true) + q.breakable_force q.format(statements) end end if consequent - q.breakable(force: true) + q.breakable_force q.format(consequent) end end @@ -7835,19 +7878,21 @@ def deconstruct_keys(_keys) end def format(q) - q.group(0, "begin", "end") do + q.text("begin") + q.group do q.indent do - q.breakable(force: true) + q.breakable_force q.format(statement) end - q.breakable(force: true) + q.breakable_force q.text("rescue StandardError") q.indent do - q.breakable(force: true) + q.breakable_force q.format(value) end - q.breakable(force: true) + q.breakable_force end + q.text("end") end end @@ -8066,14 +8111,16 @@ def deconstruct_keys(_keys) end def format(q) - q.group(0, "class << ", "end") do + q.text("class << ") + q.group do q.format(target) q.indent do - q.breakable(force: true) + q.breakable_force q.format(bodystmt) end - q.breakable(force: true) + q.breakable_force end + q.text("end") end end @@ -8179,37 +8226,34 @@ def format(q) end end - access_controls = - Hash.new do |hash, node| - hash[node] = node.is_a?(VCall) && - %w[private protected public].include?(node.value.value) - end - - body.each_with_index do |statement, index| + previous = nil + body.each do |statement| next if statement.is_a?(VoidStmt) if line.nil? q.format(statement) elsif (statement.location.start_line - line) > 1 - q.breakable(force: true) - q.breakable(force: true) + q.breakable_force + q.breakable_force q.format(statement) - elsif access_controls[statement] || access_controls[body[index - 1]] - q.breakable(force: true) - q.breakable(force: true) + elsif (statement.is_a?(VCall) && statement.access_control?) || + (previous.is_a?(VCall) && previous.access_control?) + q.breakable_force + q.breakable_force q.format(statement) elsif statement.location.start_line != line - q.breakable(force: true) + q.breakable_force q.format(statement) elsif !q.parent.is_a?(StringEmbExpr) - q.breakable(force: true) + q.breakable_force q.format(statement) else q.text("; ") q.format(statement) end - + line = statement.location.end_line + previous = statement end end @@ -8327,7 +8371,7 @@ def format(q) q.format(left) q.text(" \\") q.indent do - q.breakable(force: true) + q.breakable_force q.format(right) end end @@ -8413,15 +8457,21 @@ def format(q) # same line in the source, then we're going to leave them in place and # assume that's the way the developer wanted this expression # represented. - q.remove_breaks(q.group(0, '#{', "}") { q.format(statements) }) + q.remove_breaks( + q.group do + q.text('#{') + q.format(statements) + q.text("}") + end + ) else q.group do q.text('#{') q.indent do - q.breakable("") + q.breakable_empty q.format(statements) end - q.breakable("") + q.breakable_empty q.text("}") end end @@ -8479,12 +8529,12 @@ def format(q) [quote, quote] end - q.group(0, opening_quote, closing_quote) do + q.text(opening_quote) + q.group do parts.each do |part| if part.is_a?(TStringContent) value = Quotes.normalize(part.value, closing_quote) - separator = -> { q.breakable(force: true, indent: false) } - q.seplist(value.split(/\r?\n/, -1), separator) do |text| + q.seplist(value.split(/\r?\n/, -1), Formatter::BREAKABLE_RETURN_SEPARATOR) do |text| q.text(text) end else @@ -8492,6 +8542,7 @@ def format(q) end end end + q.text(closing_quote) end end @@ -8698,15 +8749,17 @@ def format(q) closing = Quotes.matching(opening[2]) end - q.group(0, opening, closing) do + q.text(opening) + q.group do q.indent do - q.breakable("") - q.seplist(elements, -> { q.breakable }) do |element| + q.breakable_empty + q.seplist(elements, Formatter::BREAKABLE_SPACE_SEPARATOR) do |element| q.format(element) end end - q.breakable("") + q.breakable_empty end + q.text(closing) end end @@ -9031,27 +9084,26 @@ def deconstruct_keys(_keys) end def format(q) - parent = q.parents.take(2)[1] - ternary = - (parent.is_a?(If) || parent.is_a?(Unless)) && - Ternaryable.call(q, parent) - q.text("not") if parentheses q.text("(") - elsif ternary - q.if_break { q.text(" ") }.if_flat { q.text("(") } - else - q.text(" ") - end - - q.format(statement) if statement - - if parentheses + q.format(statement) if statement q.text(")") - elsif ternary - q.if_flat { q.text(")") } + else + parent = q.parents.take(2)[1] + ternary = + (parent.is_a?(If) || parent.is_a?(Unless)) && + Ternaryable.call(q, parent) + + if ternary + q.if_break { q.text(" ") }.if_flat { q.text("(") } + q.format(statement) if statement + q.if_flat { q.text(")") } if ternary + else + q.text(" ") + q.format(statement) if statement + end end end end @@ -9316,10 +9368,10 @@ def format_break(q) q.text("#{keyword} ") q.nest(keyword.length + 1) { q.format(node.predicate) } q.indent do - q.breakable("") + q.breakable_empty q.format(statements) end - q.breakable("") + q.breakable_empty q.text("end") end end @@ -9372,7 +9424,7 @@ def format(q) q.group do q.text(keyword) q.nest(keyword.length) { q.format(predicate) } - q.breakable(force: true) + q.breakable_force q.text("end") end else @@ -9572,6 +9624,27 @@ def deconstruct_keys(_keys) def format(q) q.format(value) end + + # Oh man I hate this so much. Basically, ripper doesn't provide enough + # functionality to actually know where pins are within an expression. So we + # have to walk the tree ourselves and insert more information. In doing so, + # we have to replace this node by a pinned node when necessary. + # + # To be clear, this method should just not exist. It's not good. It's a + # place of shame. But it's necessary for now, so I'm keeping it. + def pin(parent) + replace = PinnedVarRef.new(value: value, location: location) + + parent.deconstruct_keys([]).each do |key, value| + if value == self + parent.instance_variable_set(:"@#{key}", replace) + break + elsif value.is_a?(Array) && (index = value.index(self)) + parent.public_send(key)[index] = replace + break + end + end + end end # PinnedVarRef represents a pinned variable reference within a pattern @@ -9653,6 +9726,10 @@ def deconstruct_keys(_keys) def format(q) q.format(value) end + + def access_control? + @access_control ||= %w[private protected public].include?(value.value) + end end # VoidStmt represents an empty lexical block of code. @@ -9742,6 +9819,22 @@ def deconstruct_keys(_keys) } end + # We have a special separator here for when clauses which causes them to + # fill as much of the line as possible as opposed to everything breaking + # into its own line as soon as you hit the print limit. + class Separator + def call(q) + q.group do + q.text(",") + q.breakable_space + end + end + end + + # We're going to keep a single instance of this separator around so we don't + # have to allocate a new one every time we format a when clause. + SEPARATOR = Separator.new + def format(q) keyword = "when " @@ -9752,8 +9845,7 @@ def format(q) if arguments.comments.any? q.format(arguments) else - separator = -> { q.group { q.comma_breakable } } - q.seplist(arguments.parts, separator) { |part| q.format(part) } + q.seplist(arguments.parts, SEPARATOR) { |part| q.format(part) } end # Very special case here. If you're inside of a when clause and the @@ -9768,13 +9860,13 @@ def format(q) unless statements.empty? q.indent do - q.breakable(force: true) + q.breakable_force q.format(statements) end end if consequent - q.breakable(force: true) + q.breakable_force q.format(consequent) end end @@ -9829,7 +9921,7 @@ def format(q) q.group do q.text(keyword) q.nest(keyword.length) { q.format(predicate) } - q.breakable(force: true) + q.breakable_force q.text("end") end else @@ -9995,15 +10087,17 @@ def format(q) closing = Quotes.matching(opening[2]) end - q.group(0, opening, closing) do + q.text(opening) + q.group do q.indent do - q.breakable("") - q.seplist(elements, -> { q.breakable }) do |element| + q.breakable_empty + q.seplist(elements, Formatter::BREAKABLE_SPACE_SEPARATOR) do |element| q.format(element) end end - q.breakable("") + q.breakable_empty end + q.text(closing) end end @@ -10147,10 +10241,10 @@ def format(q) else q.if_break { q.text("(") }.if_flat { q.text(" ") } q.indent do - q.breakable("") + q.breakable_empty q.format(arguments) end - q.breakable("") + q.breakable_empty q.if_break { q.text(")") } end end diff --git a/lib/syntax_tree/parser.rb b/lib/syntax_tree/parser.rb index 94ce115a..9ca26155 100644 --- a/lib/syntax_tree/parser.rb +++ b/lib/syntax_tree/parser.rb @@ -60,29 +60,46 @@ def [](byteindex) # This represents all of the tokens coming back from the lexer. It is # replacing a simple array because it keeps track of the last deleted token # from the list for better error messages. - class TokenList < SimpleDelegator - attr_reader :last_deleted + class TokenList + attr_reader :tokens, :last_deleted - def initialize(object) - super + def initialize + @tokens = [] @last_deleted = nil end + def <<(token) + tokens << token + end + + def [](index) + tokens[index] + end + + def any?(&block) + tokens.any?(&block) + end + + def reverse_each(&block) + tokens.reverse_each(&block) + end + + def rindex(&block) + tokens.rindex(&block) + end + def delete(value) - @last_deleted = super || @last_deleted + @last_deleted = tokens.delete(value) || @last_deleted end def delete_at(index) - @last_deleted = super + @last_deleted = tokens.delete_at(index) end end # [String] the source being parsed attr_reader :source - # [Array[ String ]] the list of lines in the source - attr_reader :lines - # [Array[ SingleByteString | MultiByteString ]] the list of objects that # represent the start of each line in character offsets attr_reader :line_counts @@ -105,12 +122,6 @@ def initialize(source, *) # example. @source = source - # Similarly, we keep the lines of the source string around to be able to - # check if certain lines contain certain characters. For example, we'll - # use this to generate the content that goes after the __END__ keyword. - # Or we'll use this to check if a comment has other content on its line. - @lines = source.split(/\r?\n/) - # This is the full set of comments that have been found by the parser. # It's a running list. At the end of every block of statements, they will # go in and attempt to grab any comments that are on their own line and @@ -144,7 +155,7 @@ def initialize(source, *) # Most of the time, when a parser event consumes one of these events, it # will be deleted from the list. So ideally, this list stays pretty short # over the course of parsing a source string. - @tokens = TokenList.new([]) + @tokens = TokenList.new # Here we're going to build up a list of SingleByteString or # MultiByteString objects. They're each going to represent a string in the @@ -283,13 +294,18 @@ def find_colon2_before(const) # By finding the next non-space character, we can make sure that the bounds # of the statement list are correct. def find_next_statement_start(position) - remaining = source[position..] - - if remaining.sub(/\A +/, "")[0] == "#" - return position + remaining.index("\n") + maximum = source.length + + position.upto(maximum) do |pound_index| + case source[pound_index] + when "#" + return source.index("\n", pound_index + 1) || maximum + when " " + # continue + else + return position + end end - - position end # -------------------------------------------------------------------------- @@ -567,6 +583,56 @@ def on_array(contents) end end + # Ugh... I really do not like this class. Basically, ripper doesn't provide + # enough information about where pins are located in the tree. It only gives + # events for ^ ops and var_ref nodes. You have to piece it together + # yourself. + # + # Note that there are edge cases here that we straight up do not address, + # because I honestly think it's going to be faster to write a new parser + # than to address them. For example, this will not work properly: + # + # foo in ^((bar = 0; bar; baz)) + # + # If someone actually does something like that, we'll have to find another + # way to make this work. + class PinVisitor < Visitor + attr_reader :pins, :stack + + def initialize(pins) + @pins = pins + @stack = [] + end + + def visit(node) + return if pins.empty? + stack << node + super + stack.pop + end + + def visit_var_ref(node) + pins.shift + node.pin(stack[-2]) + end + + def self.visit(node, tokens) + start_char = node.location.start_char + allocated = [] + + tokens.reverse_each do |token| + char = token.location.start_char + break if char <= start_char + + if token.is_a?(Op) && token.value == "^" + allocated.unshift(tokens.delete(token)) + end + end + + new(allocated).visit(node) if allocated.any? + end + end + # :call-seq: # on_aryptn: ( # (nil | VarRef) constant, @@ -917,12 +983,15 @@ def on_case(value, consequent) find_token(Op, "=>") end - RAssign.new( + node = RAssign.new( value: value, operator: operator, pattern: consequent, location: value.location.to(consequent.location) ) + + PinVisitor.visit(node, tokens) + node end end @@ -1004,20 +1073,20 @@ def on_command_call(receiver, operator, message, arguments) # :call-seq: # on_comment: (String value) -> Comment def on_comment(value) - line = lineno - comment = - Comment.new( - value: value.chomp, - inline: value.strip != lines[line - 1].strip, - location: - Location.token( - line: line, - char: char_pos, - column: current_column, - size: value.size - 1 - ) + char = char_pos + location = + Location.token( + line: lineno, + char: char, + column: current_column, + size: value.size - 1 ) + index = source.rindex(/[^\t ]/, char - 1) if char != 0 + inline = index && (source[index] != "\n") + comment = + Comment.new(value: value.chomp, inline: inline, location: location) + @comments << comment comment end @@ -1878,12 +1947,15 @@ def on_in(pattern, statements, consequent) ending.location.start_column ) - In.new( + node = In.new( pattern: pattern, statements: statements, consequent: consequent, location: beginning.location.to(ending.location) ) + + PinVisitor.visit(node, tokens) + node end # :call-seq: @@ -2551,13 +2623,13 @@ def on_period(value) # :call-seq: # on_program: (Statements statements) -> Program def on_program(statements) - last_column = source.length - line_counts[lines.length - 1].start + last_column = source.length - line_counts.last.start location = Location.new( start_line: 1, start_char: 0, start_column: 0, - end_line: lines.length, + end_line: line_counts.length - 1, end_char: source.length, end_column: last_column ) @@ -3569,17 +3641,7 @@ def on_var_field(value) # :call-seq: # on_var_ref: ((Const | CVar | GVar | Ident | IVar | Kw) value) -> VarRef def on_var_ref(value) - pin = find_token(Op, "^", consume: false) - - if pin && pin.location.start_char == value.location.start_char - 1 - tokens.delete(pin) - PinnedVarRef.new( - value: value, - location: pin.location.to(value.location) - ) - else - VarRef.new(value: value, location: value.location) - end + VarRef.new(value: value, location: value.location) end # :call-seq: diff --git a/test/cli_test.rb b/test/cli_test.rb index 3734e734..03293333 100644 --- a/test/cli_test.rb +++ b/test/cli_test.rb @@ -139,7 +139,7 @@ def test_inline_script def test_multiple_inline_scripts stdio, = capture_io { SyntaxTree::CLI.run(%w[format -e 1+1 -e 2+2]) } - assert_equal("1 + 1\n2 + 2\n", stdio) + assert_equal(["1 + 1", "2 + 2"], stdio.split("\n").sort) end def test_generic_error diff --git a/test/node_test.rb b/test/node_test.rb index 07c2fe26..1a5af125 100644 --- a/test/node_test.rb +++ b/test/node_test.rb @@ -951,7 +951,7 @@ def test_var_field guard_version("3.1.0") do def test_pinned_var_ref source = "foo in ^bar" - at = location(chars: 7..11) + at = location(chars: 8..11) assert_node(PinnedVarRef, source, at: at, &:pattern) end diff --git a/test/quotes_test.rb b/test/quotes_test.rb new file mode 100644 index 00000000..2e2e0243 --- /dev/null +++ b/test/quotes_test.rb @@ -0,0 +1,15 @@ +# frozen_string_literal: true + +require_relative "test_helper" + +module SyntaxTree + class QuotesTest < Minitest::Test + def test_normalize + content = "'aaa' \"bbb\" \\'ccc\\' \\\"ddd\\\"" + enclosing = "\"" + + result = Quotes.normalize(content, enclosing) + assert_equal "'aaa' \\\"bbb\\\" \\'ccc\\' \\\"ddd\\\"", result + end + end +end From ed0c4754aee57a80a23d030f30df76df6d710435 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Sat, 15 Oct 2022 21:45:43 -0400 Subject: [PATCH 02/15] Split up find_token options into more explicit variants --- lib/syntax_tree/parser.rb | 327 ++++++++++++++++++++------------------ 1 file changed, 173 insertions(+), 154 deletions(-) diff --git a/lib/syntax_tree/parser.rb b/lib/syntax_tree/parser.rb index 9ca26155..132780b6 100644 --- a/lib/syntax_tree/parser.rb +++ b/lib/syntax_tree/parser.rb @@ -244,28 +244,49 @@ def find_token_error(location) # "module" (which would happen to be the innermost keyword). Then the outer # one would only be able to grab the first one. In this way all of the # tokens act as their own stack. - def find_token(type, value = :any, consume: true, location: nil) + # + # If we're expecting to be able to find a token and consume it, but can't + # actually find it, then we need to raise an error. This is _usually_ caused + # by a syntax error in the source that we're printing. It could also be + # caused by accidentally attempting to consume a token twice by two + # different parser event handlers. + def find_token(type) + index = tokens.rindex { |token| token.is_a?(type) } + tokens[index] if index + end + + def find_token_value(type, value) index = tokens.rindex do |token| - token.is_a?(type) && (value == :any || (token.value == value)) + token.is_a?(type) && (token.value == value) end - if consume - # If we're expecting to be able to find a token and consume it, but - # can't actually find it, then we need to raise an error. This is - # _usually_ caused by a syntax error in the source that we're printing. - # It could also be caused by accidentally attempting to consume a token - # twice by two different parser event handlers. - unless index - token = value == :any ? type.name.split("::", 2).last : value - message = "Cannot find expected #{token}" - raise ParseError.new(message, *find_token_error(location)) + tokens[index] if index + end + + def consume_token(type, location: nil) + index = tokens.rindex { |token| token.is_a?(type) } + + unless index + message = "Cannot find expected #{type.name.split("::", 2).last}" + raise ParseError.new(message, *find_token_error(location)) + end + + tokens.delete_at(index) + end + + def consume_token_value(type, value) + index = + tokens.rindex do |token| + token.is_a?(type) && (token.value == value) end - tokens.delete_at(index) - elsif index - tokens[index] + unless index + message = "Cannot find expected #{value}" + raise ParseError.new(message, *find_token_error(nil)) end + + tokens.delete_at(index) end # A helper function to find a :: operator. We do special handling instead of @@ -316,8 +337,8 @@ def find_next_statement_start(position) # :call-seq: # on_BEGIN: (Statements statements) -> BEGINBlock def on_BEGIN(statements) - lbrace = find_token(LBrace) - rbrace = find_token(RBrace) + lbrace = consume_token(LBrace) + rbrace = consume_token(RBrace) start_char = find_next_statement_start(lbrace.location.end_char) statements.bind( @@ -327,7 +348,7 @@ def on_BEGIN(statements) rbrace.location.start_column ) - keyword = find_token(Kw, "BEGIN") + keyword = consume_token_value(Kw, "BEGIN") BEGINBlock.new( lbrace: lbrace, @@ -354,8 +375,8 @@ def on_CHAR(value) # :call-seq: # on_END: (Statements statements) -> ENDBlock def on_END(statements) - lbrace = find_token(LBrace) - rbrace = find_token(RBrace) + lbrace = consume_token(LBrace) + rbrace = consume_token(RBrace) start_char = find_next_statement_start(lbrace.location.end_char) statements.bind( @@ -365,7 +386,7 @@ def on_END(statements) rbrace.location.start_column ) - keyword = find_token(Kw, "END") + keyword = consume_token_value(Kw, "END") ENDBlock.new( lbrace: lbrace, @@ -396,7 +417,7 @@ def on___end__(value) # (DynaSymbol | SymbolLiteral) right # ) -> Alias def on_alias(left, right) - keyword = find_token(Kw, "alias") + keyword = consume_token_value(Kw, "alias") Alias.new( left: left, @@ -408,8 +429,8 @@ def on_alias(left, right) # :call-seq: # on_aref: (untyped collection, (nil | Args) index) -> ARef def on_aref(collection, index) - find_token(LBracket) - rbracket = find_token(RBracket) + consume_token(LBracket) + rbracket = consume_token(RBracket) ARef.new( collection: collection, @@ -424,8 +445,8 @@ def on_aref(collection, index) # (nil | Args) index # ) -> ARefField def on_aref_field(collection, index) - find_token(LBracket) - rbracket = find_token(RBracket) + consume_token(LBracket) + rbracket = consume_token(RBracket) ARefField.new( collection: collection, @@ -443,8 +464,8 @@ def on_aref_field(collection, index) # (nil | Args | ArgsForward) arguments # ) -> ArgParen def on_arg_paren(arguments) - lparen = find_token(LParen) - rparen = find_token(RParen) + lparen = consume_token(LParen) + rparen = consume_token(RParen) # If the arguments exceed the ending of the parentheses, then we know we # have a heredoc in the arguments, and we need to use the bounds of the @@ -489,7 +510,7 @@ def on_args_add_block(arguments, block) # First, see if there is an & operator that could potentially be # associated with the block part of this args_add_block. If there is not, # then just return the arguments. - operator = find_token(Op, "&", consume: false) + operator = find_token_value(Op, "&") return arguments unless operator # If there are any arguments and the operator we found from the list is @@ -521,7 +542,7 @@ def on_args_add_block(arguments, block) # :call-seq: # on_args_add_star: (Args arguments, untyped star) -> Args def on_args_add_star(arguments, argument) - beginning = find_token(Op, "*") + beginning = consume_token_value(Op, "*") ending = argument || beginning location = @@ -543,7 +564,7 @@ def on_args_add_star(arguments, argument) # :call-seq: # on_args_forward: () -> ArgsForward def on_args_forward - op = find_token(Op, "...") + op = consume_token_value(Op, "...") ArgsForward.new(value: op.value, location: op.location) end @@ -563,8 +584,8 @@ def on_args_new # ArrayLiteral | QSymbols | QWords | Symbols | Words def on_array(contents) if !contents || contents.is_a?(Args) - lbracket = find_token(LBracket) - rbracket = find_token(RBracket) + lbracket = consume_token(LBracket) + rbracket = consume_token(RBracket) ArrayLiteral.new( lbracket: lbracket, @@ -573,7 +594,7 @@ def on_array(contents) ) else tstring_end = - find_token(TStringEnd, location: contents.beginning.location) + consume_token(TStringEnd, location: contents.beginning.location) contents.class.new( beginning: contents.beginning, @@ -649,7 +670,7 @@ def on_aryptn(constant, requireds, rest, posts) # of the various parts. location = if parts.empty? - find_token(LBracket).location.to(find_token(RBracket).location) + consume_token(LBracket).location.to(consume_token(RBracket).location) else parts[0].location.to(parts[-1].location) end @@ -710,7 +731,7 @@ def on_assoc_new(key, value) # :call-seq: # on_assoc_splat: (untyped value) -> AssocSplat def on_assoc_splat(value) - operator = find_token(Op, "**") + operator = consume_token_value(Op, "**") AssocSplat.new( value: value, @@ -770,23 +791,23 @@ def on_bare_assoc_hash(assocs) # :call-seq: # on_begin: (untyped bodystmt) -> Begin | PinnedBegin def on_begin(bodystmt) - pin = find_token(Op, "^", consume: false) + pin = find_token_value(Op, "^") if pin && pin.location.start_char < bodystmt.location.start_char tokens.delete(pin) - find_token(LParen) + consume_token(LParen) - rparen = find_token(RParen) + rparen = consume_token(RParen) location = pin.location.to(rparen.location) PinnedBegin.new(statement: bodystmt, location: location) else - keyword = find_token(Kw, "begin") + keyword = consume_token_value(Kw, "begin") end_location = if bodystmt.else_clause bodystmt.location else - find_token(Kw, "end").location + consume_token_value(Kw, "end").location end bodystmt.bind( @@ -861,7 +882,7 @@ def on_block_var(params, locals) # :call-seq: # on_blockarg: (Ident name) -> BlockArg def on_blockarg(name) - operator = find_token(Op, "&") + operator = consume_token_value(Op, "&") location = operator.location location = location.to(name.location) if name @@ -880,7 +901,7 @@ def on_bodystmt(statements, rescue_clause, else_clause, ensure_clause) BodyStmt.new( statements: statements, rescue_clause: rescue_clause, - else_keyword: else_clause && find_token(Kw, "else"), + else_keyword: else_clause && consume_token_value(Kw, "else"), else_clause: else_clause, ensure_clause: ensure_clause, location: @@ -894,8 +915,8 @@ def on_bodystmt(statements, rescue_clause, else_clause, ensure_clause) # Statements statements # ) -> BraceBlock def on_brace_block(block_var, statements) - lbrace = find_token(LBrace) - rbrace = find_token(RBrace) + lbrace = consume_token(LBrace) + rbrace = consume_token(RBrace) location = (block_var || lbrace).location start_char = find_next_statement_start(location.end_char) @@ -930,7 +951,7 @@ def on_brace_block(block_var, statements) # :call-seq: # on_break: (Args arguments) -> Break def on_break(arguments) - keyword = find_token(Kw, "break") + keyword = consume_token_value(Kw, "break") location = keyword.location location = location.to(arguments.location) if arguments.parts.any? @@ -966,7 +987,7 @@ def on_call(receiver, operator, message) # :call-seq: # on_case: (untyped value, untyped consequent) -> Case | RAssign def on_case(value, consequent) - if (keyword = find_token(Kw, "case", consume: false)) + if (keyword = find_token_value(Kw, "case")) tokens.delete(keyword) Case.new( @@ -977,10 +998,10 @@ def on_case(value, consequent) ) else operator = - if (keyword = find_token(Kw, "in", consume: false)) + if (keyword = find_token_value(Kw, "in")) tokens.delete(keyword) else - find_token(Op, "=>") + consume_token_value(Op, "=>") end node = RAssign.new( @@ -1002,8 +1023,8 @@ def on_case(value, consequent) # BodyStmt bodystmt # ) -> ClassDeclaration def on_class(constant, superclass, bodystmt) - beginning = find_token(Kw, "class") - ending = find_token(Kw, "end") + beginning = consume_token_value(Kw, "class") + ending = consume_token_value(Kw, "end") location = (superclass || constant).location start_char = find_next_statement_start(location.end_char) @@ -1161,7 +1182,7 @@ def on_def(name, params, bodystmt) # Find the beginning of the method definition, which works for single-line # and normal method definitions. - beginning = find_token(Kw, "def") + beginning = consume_token_value(Kw, "def") # If there aren't any params then we need to correct the params node # location information @@ -1181,7 +1202,7 @@ def on_def(name, params, bodystmt) params = Params.new(location: location) end - ending = find_token(Kw, "end", consume: false) + ending = find_token_value(Kw, "end") if ending tokens.delete(ending) @@ -1219,13 +1240,13 @@ def on_def(name, params, bodystmt) # :call-seq: # on_defined: (untyped value) -> Defined def on_defined(value) - beginning = find_token(Kw, "defined?") + beginning = consume_token_value(Kw, "defined?") ending = value range = beginning.location.end_char...value.location.start_char if source[range].include?("(") - find_token(LParen) - ending = find_token(RParen) + consume_token(LParen) + ending = consume_token(RParen) end Defined.new( @@ -1266,8 +1287,8 @@ def on_defs(target, operator, name, params, bodystmt) params = Params.new(location: location) end - beginning = find_token(Kw, "def") - ending = find_token(Kw, "end", consume: false) + beginning = consume_token_value(Kw, "def") + ending = find_token_value(Kw, "end") if ending tokens.delete(ending) @@ -1307,8 +1328,8 @@ def on_defs(target, operator, name, params, bodystmt) # :call-seq: # on_do_block: (BlockVar block_var, BodyStmt bodystmt) -> DoBlock def on_do_block(block_var, bodystmt) - beginning = find_token(Kw, "do") - ending = find_token(Kw, "end") + beginning = consume_token_value(Kw, "do") + ending = consume_token_value(Kw, "end") location = (block_var || beginning).location start_char = find_next_statement_start(location.end_char) @@ -1330,7 +1351,7 @@ def on_do_block(block_var, bodystmt) # :call-seq: # on_dot2: ((nil | untyped) left, (nil | untyped) right) -> Dot2 def on_dot2(left, right) - operator = find_token(Op, "..") + operator = consume_token_value(Op, "..") beginning = left || operator ending = right || operator @@ -1345,7 +1366,7 @@ def on_dot2(left, right) # :call-seq: # on_dot3: ((nil | untyped) left, (nil | untyped) right) -> Dot3 def on_dot3(left, right) - operator = find_token(Op, "...") + operator = consume_token_value(Op, "...") beginning = left || operator ending = right || operator @@ -1360,10 +1381,10 @@ def on_dot3(left, right) # :call-seq: # on_dyna_symbol: (StringContent string_content) -> DynaSymbol def on_dyna_symbol(string_content) - if find_token(SymBeg, consume: false) + if symbeg = find_token(SymBeg) # A normal dynamic symbol - symbeg = find_token(SymBeg) - tstring_end = find_token(TStringEnd, location: symbeg.location) + tokens.delete(symbeg) + tstring_end = consume_token(TStringEnd, location: symbeg.location) DynaSymbol.new( quote: symbeg.value, @@ -1372,8 +1393,8 @@ def on_dyna_symbol(string_content) ) else # A dynamic symbol as a hash key - tstring_beg = find_token(TStringBeg) - label_end = find_token(LabelEnd) + tstring_beg = consume_token(TStringBeg) + label_end = consume_token(LabelEnd) DynaSymbol.new( parts: string_content.parts, @@ -1386,7 +1407,7 @@ def on_dyna_symbol(string_content) # :call-seq: # on_else: (Statements statements) -> Else def on_else(statements) - keyword = find_token(Kw, "else") + keyword = consume_token_value(Kw, "else") # else can either end with an end keyword (in which case we'll want to # consume that event) or it can end with an ensure keyword (in which case @@ -1426,8 +1447,8 @@ def on_else(statements) # (nil | Elsif | Else) consequent # ) -> Elsif def on_elsif(predicate, statements, consequent) - beginning = find_token(Kw, "elsif") - ending = consequent || find_token(Kw, "end") + beginning = consume_token_value(Kw, "elsif") + ending = consequent || consume_token_value(Kw, "end") start_char = find_next_statement_start(predicate.location.end_char) statements.bind( @@ -1547,11 +1568,11 @@ def on_embvar(value) # :call-seq: # on_ensure: (Statements statements) -> Ensure def on_ensure(statements) - keyword = find_token(Kw, "ensure") + keyword = consume_token_value(Kw, "ensure") # We don't want to consume the :@kw event, because that would break # def..ensure..end chains. - ending = find_token(Kw, "end", consume: false) + ending = find_token_value(Kw, "end") start_char = find_next_statement_start(keyword.location.end_char) statements.bind( start_char, @@ -1573,7 +1594,7 @@ def on_ensure(statements) # :call-seq: # on_excessed_comma: () -> ExcessedComma def on_excessed_comma(*) - comma = find_token(Comma) + comma = consume_token(Comma) ExcessedComma.new(value: comma.value, location: comma.location) end @@ -1626,9 +1647,7 @@ def on_fndptn(constant, left, values, right) # right left parenthesis, or the left splat. We're going to use this to # determine how to find the closing of the pattern, as well as determining # the location of the node. - opening = - find_token(LBracket, consume: false) || - find_token(LParen, consume: false) || left + opening = find_token(LBracket) || find_token(LParen) || left # The closing is based on the opening, which is either the matched # punctuation or the right splat. @@ -1636,10 +1655,10 @@ def on_fndptn(constant, left, values, right) case opening in LBracket tokens.delete(opening) - find_token(RBracket) + consume_token(RBracket) in LParen tokens.delete(opening) - find_token(RParen) + consume_token(RParen) else right end @@ -1660,13 +1679,13 @@ def on_fndptn(constant, left, values, right) # Statements statements # ) -> For def on_for(index, collection, statements) - beginning = find_token(Kw, "for") - in_keyword = find_token(Kw, "in") - ending = find_token(Kw, "end") + beginning = consume_token_value(Kw, "for") + in_keyword = consume_token_value(Kw, "in") + ending = consume_token_value(Kw, "end") # Consume the do keyword if it exists so that it doesn't get confused for # some other block - keyword = find_token(Kw, "do", consume: false) + keyword = find_token_value(Kw, "do") if keyword && keyword.location.start_char > collection.location.end_char && keyword.location.end_char < ending.location.start_char @@ -1714,8 +1733,8 @@ def on_gvar(value) # :call-seq: # on_hash: ((nil | Array[AssocNew | AssocSplat]) assocs) -> HashLiteral def on_hash(assocs) - lbrace = find_token(LBrace) - rbrace = find_token(RBrace) + lbrace = consume_token(LBrace) + rbrace = consume_token(RBrace) HashLiteral.new( lbrace: lbrace, @@ -1799,8 +1818,8 @@ def on_hshptn(constant, keywords, keyword_rest) if keyword_rest # We're doing this to delete the token from the list so that it doesn't # confuse future patterns by thinking they have an extra ** on the end. - find_token(Op, "**") - elsif (token = find_token(Op, "**", consume: false)) + consume_token_value(Op, "**") + elsif (token = find_token_value(Op, "**")) tokens.delete(token) # Create an artificial VarField if we find an extra ** on the end. This @@ -1813,8 +1832,8 @@ def on_hshptn(constant, keywords, keyword_rest) # If there's no constant, there may be braces, so we're going to look for # those to get our bounds. unless constant - lbrace = find_token(LBrace, consume: false) - rbrace = find_token(RBrace, consume: false) + lbrace = find_token(LBrace) + rbrace = find_token(RBrace) if lbrace && rbrace parts = [lbrace, *parts, rbrace] @@ -1853,8 +1872,8 @@ def on_ident(value) # (nil | Elsif | Else) consequent # ) -> If def on_if(predicate, statements, consequent) - beginning = find_token(Kw, "if") - ending = consequent || find_token(Kw, "end") + beginning = consume_token_value(Kw, "if") + ending = consequent || consume_token_value(Kw, "end") start_char = find_next_statement_start(predicate.location.end_char) statements.bind( @@ -1886,7 +1905,7 @@ def on_ifop(predicate, truthy, falsy) # :call-seq: # on_if_mod: (untyped predicate, untyped statement) -> IfMod def on_if_mod(predicate, statement) - find_token(Kw, "if") + consume_token_value(Kw, "if") IfMod.new( statement: statement, @@ -1929,11 +1948,11 @@ def on_in(pattern, statements, consequent) # Here we have a rightward assignment return pattern unless statements - beginning = find_token(Kw, "in") - ending = consequent || find_token(Kw, "end") + beginning = consume_token_value(Kw, "in") + ending = consequent || consume_token_value(Kw, "end") statements_start = pattern - if (token = find_token(Kw, "then", consume: false)) + if (token = find_token_value(Kw, "then")) tokens.delete(token) statements_start = token end @@ -2010,7 +2029,7 @@ def on_kw(value) # :call-seq: # on_kwrest_param: ((nil | Ident) name) -> KwRestParam def on_kwrest_param(name) - location = find_token(Op, "**").location + location = consume_token_value(Op, "**").location location = location.to(name.location) if name KwRestParam.new(name: name, location: location) @@ -2056,7 +2075,7 @@ def on_label_end(value) # (BodyStmt | Statements) statements # ) -> Lambda def on_lambda(params, statements) - beginning = find_token(TLambda) + beginning = consume_token(TLambda) braces = tokens.any? do |token| token.is_a?(TLamBeg) && @@ -2099,11 +2118,11 @@ def on_lambda(params, statements) end if braces - opening = find_token(TLamBeg) - closing = find_token(RBrace) + opening = consume_token(TLamBeg) + closing = consume_token(RBrace) else - opening = find_token(Kw, "do") - closing = find_token(Kw, "end") + opening = consume_token_value(Kw, "do") + closing = consume_token_value(Kw, "end") end start_char = find_next_statement_start(opening.location.end_char) @@ -2334,7 +2353,7 @@ def on_mlhs_add_post(left, right) # (nil | ARefField | Field | Ident | VarField) part # ) -> MLHS def on_mlhs_add_star(mlhs, part) - beginning = find_token(Op, "*") + beginning = consume_token_value(Op, "*") ending = part || beginning location = beginning.location.to(ending.location) @@ -2357,8 +2376,8 @@ def on_mlhs_new # :call-seq: # on_mlhs_paren: ((MLHS | MLHSParen) contents) -> MLHSParen def on_mlhs_paren(contents) - lparen = find_token(LParen) - rparen = find_token(RParen) + lparen = consume_token(LParen) + rparen = consume_token(RParen) comma_range = lparen.location.end_char...rparen.location.start_char contents.comma = true if source[comma_range].strip.end_with?(",") @@ -2375,8 +2394,8 @@ def on_mlhs_paren(contents) # BodyStmt bodystmt # ) -> ModuleDeclaration def on_module(constant, bodystmt) - beginning = find_token(Kw, "module") - ending = find_token(Kw, "end") + beginning = consume_token_value(Kw, "module") + ending = consume_token_value(Kw, "end") start_char = find_next_statement_start(constant.location.end_char) bodystmt.bind( @@ -2415,7 +2434,7 @@ def on_mrhs_add(mrhs, part) # :call-seq: # on_mrhs_add_star: (MRHS mrhs, untyped value) -> MRHS def on_mrhs_add_star(mrhs, value) - beginning = find_token(Op, "*") + beginning = consume_token_value(Op, "*") ending = value || beginning arg_star = @@ -2443,7 +2462,7 @@ def on_mrhs_new_from_args(arguments) # :call-seq: # on_next: (Args arguments) -> Next def on_next(arguments) - keyword = find_token(Kw, "next") + keyword = consume_token_value(Kw, "next") location = keyword.location location = location.to(arguments.location) if arguments.parts.any? @@ -2558,8 +2577,8 @@ def on_params( # :call-seq: # on_paren: (untyped contents) -> Paren def on_paren(contents) - lparen = find_token(LParen) - rparen = find_token(RParen) + lparen = consume_token(LParen) + rparen = consume_token(RParen) if contents.is_a?(Params) location = contents.location @@ -2764,7 +2783,7 @@ def on_qsymbols_beg(value) # :call-seq: # on_qsymbols_new: () -> QSymbols def on_qsymbols_new - beginning = find_token(QSymbolsBeg) + beginning = consume_token(QSymbolsBeg) QSymbols.new( beginning: beginning, @@ -2805,7 +2824,7 @@ def on_qwords_beg(value) # :call-seq: # on_qwords_new: () -> QWords def on_qwords_new - beginning = find_token(QWordsBeg) + beginning = consume_token(QWordsBeg) QWords.new( beginning: beginning, @@ -2870,7 +2889,7 @@ def on_rbracket(value) # :call-seq: # on_redo: () -> Redo def on_redo - keyword = find_token(Kw, "redo") + keyword = consume_token_value(Kw, "redo") Redo.new(value: keyword.value, location: keyword.location) end @@ -2946,7 +2965,7 @@ def on_regexp_literal(regexp_content, ending) # :call-seq: # on_regexp_new: () -> RegexpContent def on_regexp_new - regexp_beg = find_token(RegexpBeg) + regexp_beg = consume_token(RegexpBeg) RegexpContent.new( beginning: regexp_beg.value, @@ -2963,7 +2982,7 @@ def on_regexp_new # (nil | Rescue) consequent # ) -> Rescue def on_rescue(exceptions, variable, statements, consequent) - keyword = find_token(Kw, "rescue") + keyword = consume_token_value(Kw, "rescue") exceptions = exceptions[0] if exceptions.is_a?(Array) last_node = variable || exceptions || keyword @@ -3015,7 +3034,7 @@ def on_rescue(exceptions, variable, statements, consequent) # :call-seq: # on_rescue_mod: (untyped statement, untyped value) -> RescueMod def on_rescue_mod(statement, value) - find_token(Kw, "rescue") + consume_token_value(Kw, "rescue") RescueMod.new( statement: statement, @@ -3027,7 +3046,7 @@ def on_rescue_mod(statement, value) # :call-seq: # on_rest_param: ((nil | Ident) name) -> RestParam def on_rest_param(name) - location = find_token(Op, "*").location + location = consume_token_value(Op, "*").location location = location.to(name.location) if name RestParam.new(name: name, location: location) @@ -3036,7 +3055,7 @@ def on_rest_param(name) # :call-seq: # on_retry: () -> Retry def on_retry - keyword = find_token(Kw, "retry") + keyword = consume_token_value(Kw, "retry") Retry.new(value: keyword.value, location: keyword.location) end @@ -3044,7 +3063,7 @@ def on_retry # :call-seq: # on_return: (Args arguments) -> Return def on_return(arguments) - keyword = find_token(Kw, "return") + keyword = consume_token_value(Kw, "return") Return.new( arguments: arguments, @@ -3055,7 +3074,7 @@ def on_return(arguments) # :call-seq: # on_return0: () -> Return0 def on_return0 - keyword = find_token(Kw, "return") + keyword = consume_token_value(Kw, "return") Return0.new(value: keyword.value, location: keyword.location) end @@ -3082,8 +3101,8 @@ def on_rparen(value) # :call-seq: # on_sclass: (untyped target, BodyStmt bodystmt) -> SClass def on_sclass(target, bodystmt) - beginning = find_token(Kw, "class") - ending = find_token(Kw, "end") + beginning = consume_token_value(Kw, "class") + ending = consume_token_value(Kw, "end") start_char = find_next_statement_start(target.location.end_char) bodystmt.bind( @@ -3181,7 +3200,7 @@ def on_string_content # :call-seq: # on_string_dvar: ((Backref | VarRef) variable) -> StringDVar def on_string_dvar(variable) - embvar = find_token(EmbVar) + embvar = consume_token(EmbVar) StringDVar.new( variable: variable, @@ -3192,8 +3211,8 @@ def on_string_dvar(variable) # :call-seq: # on_string_embexpr: (Statements statements) -> StringEmbExpr def on_string_embexpr(statements) - embexpr_beg = find_token(EmbExprBeg) - embexpr_end = find_token(EmbExprEnd) + embexpr_beg = consume_token(EmbExprBeg) + embexpr_end = consume_token(EmbExprEnd) statements.bind( embexpr_beg.location.end_char, @@ -3234,8 +3253,8 @@ def on_string_literal(string) location: heredoc.location ) else - tstring_beg = find_token(TStringBeg) - tstring_end = find_token(TStringEnd, location: tstring_beg.location) + tstring_beg = consume_token(TStringBeg) + tstring_end = consume_token(TStringEnd, location: tstring_beg.location) location = Location.new( @@ -3261,7 +3280,7 @@ def on_string_literal(string) # :call-seq: # on_super: ((ArgParen | Args) arguments) -> Super def on_super(arguments) - keyword = find_token(Kw, "super") + keyword = consume_token_value(Kw, "super") Super.new( arguments: arguments, @@ -3308,7 +3327,7 @@ def on_symbol(value) # ) -> SymbolLiteral def on_symbol_literal(value) if value.is_a?(SymbolContent) - symbeg = find_token(SymBeg) + symbeg = consume_token(SymBeg) SymbolLiteral.new( value: value.value, @@ -3352,7 +3371,7 @@ def on_symbols_beg(value) # :call-seq: # on_symbols_new: () -> Symbols def on_symbols_new - beginning = find_token(SymbolsBeg) + beginning = consume_token(SymbolsBeg) Symbols.new( beginning: beginning, @@ -3482,13 +3501,13 @@ def on_unary(operator, statement) # We have somewhat special handling of the not operator since if it has # parentheses they don't get reported as a paren node for some reason. - beginning = find_token(Kw, "not") + beginning = consume_token_value(Kw, "not") ending = statement || beginning parentheses = source[beginning.location.end_char] == "(" if parentheses - find_token(LParen) - ending = find_token(RParen) + consume_token(LParen) + ending = consume_token(RParen) end Not.new( @@ -3521,7 +3540,7 @@ def on_unary(operator, statement) # :call-seq: # on_undef: (Array[DynaSymbol | SymbolLiteral] symbols) -> Undef def on_undef(symbols) - keyword = find_token(Kw, "undef") + keyword = consume_token_value(Kw, "undef") Undef.new( symbols: symbols, @@ -3536,8 +3555,8 @@ def on_undef(symbols) # ((nil | Elsif | Else) consequent) # ) -> Unless def on_unless(predicate, statements, consequent) - beginning = find_token(Kw, "unless") - ending = consequent || find_token(Kw, "end") + beginning = consume_token_value(Kw, "unless") + ending = consequent || consume_token_value(Kw, "end") start_char = find_next_statement_start(predicate.location.end_char) statements.bind( @@ -3558,7 +3577,7 @@ def on_unless(predicate, statements, consequent) # :call-seq: # on_unless_mod: (untyped predicate, untyped statement) -> UnlessMod def on_unless_mod(predicate, statement) - find_token(Kw, "unless") + consume_token_value(Kw, "unless") UnlessMod.new( statement: statement, @@ -3570,12 +3589,12 @@ def on_unless_mod(predicate, statement) # :call-seq: # on_until: (untyped predicate, Statements statements) -> Until def on_until(predicate, statements) - beginning = find_token(Kw, "until") - ending = find_token(Kw, "end") + beginning = consume_token_value(Kw, "until") + ending = consume_token_value(Kw, "end") # Consume the do keyword if it exists so that it doesn't get confused for # some other block - keyword = find_token(Kw, "do", consume: false) + keyword = find_token_value(Kw, "do") if keyword && keyword.location.start_char > predicate.location.end_char && keyword.location.end_char < ending.location.start_char tokens.delete(keyword) @@ -3600,7 +3619,7 @@ def on_until(predicate, statements) # :call-seq: # on_until_mod: (untyped predicate, untyped statement) -> UntilMod def on_until_mod(predicate, statement) - find_token(Kw, "until") + consume_token_value(Kw, "until") UntilMod.new( statement: statement, @@ -3612,7 +3631,7 @@ def on_until_mod(predicate, statement) # :call-seq: # on_var_alias: (GVar left, (Backref | GVar) right) -> VarAlias def on_var_alias(left, right) - keyword = find_token(Kw, "alias") + keyword = consume_token_value(Kw, "alias") VarAlias.new( left: left, @@ -3666,11 +3685,11 @@ def on_void_stmt # (nil | Else | When) consequent # ) -> When def on_when(arguments, statements, consequent) - beginning = find_token(Kw, "when") - ending = consequent || find_token(Kw, "end") + beginning = consume_token_value(Kw, "when") + ending = consequent || consume_token_value(Kw, "end") statements_start = arguments - if (token = find_token(Kw, "then", consume: false)) + if (token = find_token_value(Kw, "then")) tokens.delete(token) statements_start = token end @@ -3696,12 +3715,12 @@ def on_when(arguments, statements, consequent) # :call-seq: # on_while: (untyped predicate, Statements statements) -> While def on_while(predicate, statements) - beginning = find_token(Kw, "while") - ending = find_token(Kw, "end") + beginning = consume_token_value(Kw, "while") + ending = consume_token_value(Kw, "end") # Consume the do keyword if it exists so that it doesn't get confused for # some other block - keyword = find_token(Kw, "do", consume: false) + keyword = find_token_value(Kw, "do") if keyword && keyword.location.start_char > predicate.location.end_char && keyword.location.end_char < ending.location.start_char tokens.delete(keyword) @@ -3726,7 +3745,7 @@ def on_while(predicate, statements) # :call-seq: # on_while_mod: (untyped predicate, untyped statement) -> WhileMod def on_while_mod(predicate, statement) - find_token(Kw, "while") + consume_token_value(Kw, "while") WhileMod.new( statement: statement, @@ -3789,7 +3808,7 @@ def on_words_beg(value) # :call-seq: # on_words_new: () -> Words def on_words_new - beginning = find_token(WordsBeg) + beginning = consume_token(WordsBeg) Words.new( beginning: beginning, @@ -3823,7 +3842,7 @@ def on_xstring_new if heredoc && heredoc.beginning.value.include?("`") heredoc.location else - find_token(Backtick).location + consume_token(Backtick).location end XString.new(parts: [], location: location) @@ -3843,7 +3862,7 @@ def on_xstring_literal(xstring) location: heredoc.location ) else - ending = find_token(TStringEnd, location: xstring.location) + ending = consume_token(TStringEnd, location: xstring.location) XStringLiteral.new( parts: xstring.parts, @@ -3855,7 +3874,7 @@ def on_xstring_literal(xstring) # :call-seq: # on_yield: ((Args | Paren) arguments) -> Yield def on_yield(arguments) - keyword = find_token(Kw, "yield") + keyword = consume_token_value(Kw, "yield") Yield.new( arguments: arguments, @@ -3866,7 +3885,7 @@ def on_yield(arguments) # :call-seq: # on_yield0: () -> Yield0 def on_yield0 - keyword = find_token(Kw, "yield") + keyword = consume_token_value(Kw, "yield") Yield0.new(value: keyword.value, location: keyword.location) end @@ -3874,7 +3893,7 @@ def on_yield0 # :call-seq: # on_zsuper: () -> ZSuper def on_zsuper - keyword = find_token(Kw, "super") + keyword = consume_token_value(Kw, "super") ZSuper.new(value: keyword.value, location: keyword.location) end From 840ebabc7a245ae9f23d7136a39482aef4be8367 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Sat, 15 Oct 2022 23:26:50 -0400 Subject: [PATCH 03/15] Even more performance tweaks --- lib/syntax_tree.rb | 10 ++ lib/syntax_tree/node.rb | 86 +++++++++----- lib/syntax_tree/parser.rb | 231 +++++++++++++++++++------------------- test/interface_test.rb | 4 + 4 files changed, 187 insertions(+), 144 deletions(-) diff --git a/lib/syntax_tree.rb b/lib/syntax_tree.rb index 29ed048c..da84273c 100644 --- a/lib/syntax_tree.rb +++ b/lib/syntax_tree.rb @@ -20,6 +20,16 @@ require_relative "syntax_tree/parser" +# We rely on Symbol#name being available, which is only available in Ruby 3.0+. +# In case we're running on an older Ruby version, we polyfill it here. +unless :+.respond_to?(:name) + class Symbol + def name + to_s.freeze + end + end +end + # Syntax Tree is a suite of tools built on top of the internal CRuby parser. It # provides the ability to generate a syntax tree from source, as well as the # tools necessary to inspect and manipulate that syntax tree. It can be used to diff --git a/lib/syntax_tree/node.rb b/lib/syntax_tree/node.rb index 2aa51fd8..24d35985 100644 --- a/lib/syntax_tree/node.rb +++ b/lib/syntax_tree/node.rb @@ -606,12 +606,14 @@ def format(q) private def trailing_comma? - case arguments - in Args[parts: [*, ArgBlock]] + return false unless arguments.is_a?(Args) + parts = arguments.parts + + if parts.last&.is_a?(ArgBlock) # If the last argument is a block, then we can't put a trailing comma # after it without resulting in a syntax error. false - in Args[parts: [Command | CommandCall]] + elsif parts.length == 1 && (part = parts.first) && (part.is_a?(Command) || part.is_a?(CommandCall)) # If the only argument is a command or command call, then a trailing # comma would be parsed as part of that expression instead of on this # one, so we don't want to add a trailing comma. @@ -1668,13 +1670,11 @@ def format(q) q.text(" ") unless power if operator == :<< - q.text(operator.to_s) - q.text(" ") + q.text("<< ") q.format(right) else q.group do - q.text(operator.to_s) - + q.text(operator.name) q.indent do power ? q.breakable_empty : q.breakable_space q.format(right) @@ -1974,12 +1974,11 @@ def format(q) # If the receiver of this block a Command or CommandCall node, then there # are no parentheses around the arguments to that command, so we need to # break the block. - case q.parent - in { call: Command | CommandCall } + case q.parent.call + when Command, CommandCall q.break_parent format_break(q, break_opening, break_closing) return - else end q.group do @@ -1999,9 +1998,9 @@ def unchangeable_bounds?(q) # know for certain we're going to get split over multiple lines # anyway. case parent - in Statements | ArgParen + when Statements, ArgParen break false - in Command | CommandCall + when Command, CommandCall true else false @@ -2012,8 +2011,8 @@ def unchangeable_bounds?(q) # If we're a sibling of a control-flow keyword, then we're going to have to # use the do..end bounds. def forced_do_end_bounds?(q) - case q.parent - in { call: Break | Next | Return | Super } + case q.parent.call + when Break, Next, Return, Super true else false @@ -2997,15 +2996,31 @@ def format(q) private def align(q, node, &block) - case node.arguments - in Args[parts: [Def | Defs | DefEndless]] - q.text(" ") - yield - in Args[parts: [IfOp]] - q.if_flat { q.text(" ") } - yield - in Args[parts: [Command => command]] - align(q, command, &block) + arguments = node.arguments + + if arguments.is_a?(Args) + parts = arguments.parts + + if parts.size == 1 + part = parts.first + + case part + when Def, Defs, DefEndless + q.text(" ") + yield + when IfOp + q.if_flat { q.text(" ") } + yield + when Command + align(q, part, &block) + else + q.text(" ") + q.nest(message.value.length + 1) { yield } + end + else + q.text(" ") + q.nest(message.value.length + 1) { yield } + end else q.text(" ") q.nest(message.value.length + 1) { yield } @@ -3092,13 +3107,16 @@ def format(q) end end - case arguments - in Args[parts: [IfOp]] - q.if_flat { q.text(" ") } - q.format(arguments) - in Args - q.text(" ") - q.nest(argument_alignment(q, doc)) { q.format(arguments) } + if arguments + parts = arguments.parts + + if parts.length == 1 && parts.first.is_a?(IfOp) + q.if_flat { q.text(" ") } + q.format(arguments) + else + q.text(" ") + q.nest(argument_alignment(q, doc)) { q.format(arguments) } + end else # If there are no arguments, print nothing. end @@ -5861,11 +5879,15 @@ class Kw < Node # [String] the value of the keyword attr_reader :value + # [Symbol] the symbol version of the value + attr_reader :name + # [Array[ Comment | EmbDoc ]] the comments attached to this node attr_reader :comments def initialize(value:, location:, comments: []) @value = value + @name = value.to_sym @location = location @comments = comments end @@ -6645,11 +6667,15 @@ class Op < Node # [String] the operator attr_reader :value + # [Symbol] the symbol version of the value + attr_reader :name + # [Array[ Comment | EmbDoc ]] the comments attached to this node attr_reader :comments def initialize(value:, location:, comments: []) @value = value + @name = value.to_sym @location = location @comments = comments end diff --git a/lib/syntax_tree/parser.rb b/lib/syntax_tree/parser.rb index 132780b6..3245efa1 100644 --- a/lib/syntax_tree/parser.rb +++ b/lib/syntax_tree/parser.rb @@ -164,7 +164,7 @@ def initialize(source, *) @line_counts = [] last_index = 0 - @source.lines.each do |line| + @source.each_line do |line| @line_counts << if line.size == line.bytesize SingleByteString.new(last_index) else @@ -250,42 +250,48 @@ def find_token_error(location) # by a syntax error in the source that we're printing. It could also be # caused by accidentally attempting to consume a token twice by two # different parser event handlers. + def find_token(type) index = tokens.rindex { |token| token.is_a?(type) } tokens[index] if index end - def find_token_value(type, value) - index = - tokens.rindex do |token| - token.is_a?(type) && (token.value == value) - end - + def find_keyword(name) + index = tokens.rindex { |token| token.is_a?(Kw) && (token.name == name) } tokens[index] if index end - def consume_token(type, location: nil) - index = tokens.rindex { |token| token.is_a?(type) } + def find_operator(name) + index = tokens.rindex { |token| token.is_a?(Op) && (token.name == name) } + tokens[index] if index + end - unless index - message = "Cannot find expected #{type.name.split("::", 2).last}" - raise ParseError.new(message, *find_token_error(location)) - end + def consume_error(name, location) + message = "Cannot find expected #{name}" + raise ParseError.new(message, *find_token_error(location)) + end + def consume_token(type) + index = tokens.rindex { |token| token.is_a?(type) } + consume_error(type.name.split("::", 2).last, nil) unless index tokens.delete_at(index) end - def consume_token_value(type, value) - index = - tokens.rindex do |token| - token.is_a?(type) && (token.value == value) - end + def consume_tstring_end(location) + index = tokens.rindex { |token| token.is_a?(TStringEnd) } + consume_error("string ending", location) unless index + tokens.delete_at(index) + end - unless index - message = "Cannot find expected #{value}" - raise ParseError.new(message, *find_token_error(nil)) - end + def consume_keyword(name) + index = tokens.rindex { |token| token.is_a?(Kw) && (token.name == name) } + consume_error(name, nil) unless index + tokens.delete_at(index) + end + def consume_operator(name) + index = tokens.rindex { |token| token.is_a?(Op) && (token.name == name) } + consume_error(name, nil) unless index tokens.delete_at(index) end @@ -348,7 +354,7 @@ def on_BEGIN(statements) rbrace.location.start_column ) - keyword = consume_token_value(Kw, "BEGIN") + keyword = consume_keyword(:BEGIN) BEGINBlock.new( lbrace: lbrace, @@ -386,7 +392,7 @@ def on_END(statements) rbrace.location.start_column ) - keyword = consume_token_value(Kw, "END") + keyword = consume_keyword(:END) ENDBlock.new( lbrace: lbrace, @@ -417,7 +423,7 @@ def on___end__(value) # (DynaSymbol | SymbolLiteral) right # ) -> Alias def on_alias(left, right) - keyword = consume_token_value(Kw, "alias") + keyword = consume_keyword(:alias) Alias.new( left: left, @@ -510,7 +516,7 @@ def on_args_add_block(arguments, block) # First, see if there is an & operator that could potentially be # associated with the block part of this args_add_block. If there is not, # then just return the arguments. - operator = find_token_value(Op, "&") + operator = find_operator(:&) return arguments unless operator # If there are any arguments and the operator we found from the list is @@ -542,7 +548,7 @@ def on_args_add_block(arguments, block) # :call-seq: # on_args_add_star: (Args arguments, untyped star) -> Args def on_args_add_star(arguments, argument) - beginning = consume_token_value(Op, "*") + beginning = consume_operator(:*) ending = argument || beginning location = @@ -564,7 +570,7 @@ def on_args_add_star(arguments, argument) # :call-seq: # on_args_forward: () -> ArgsForward def on_args_forward - op = consume_token_value(Op, "...") + op = consume_operator(:"...") ArgsForward.new(value: op.value, location: op.location) end @@ -593,8 +599,7 @@ def on_array(contents) location: lbracket.location.to(rbracket.location) ) else - tstring_end = - consume_token(TStringEnd, location: contents.beginning.location) + tstring_end = consume_tstring_end(contents.beginning.location) contents.class.new( beginning: contents.beginning, @@ -731,7 +736,7 @@ def on_assoc_new(key, value) # :call-seq: # on_assoc_splat: (untyped value) -> AssocSplat def on_assoc_splat(value) - operator = consume_token_value(Op, "**") + operator = consume_operator(:**) AssocSplat.new( value: value, @@ -791,7 +796,7 @@ def on_bare_assoc_hash(assocs) # :call-seq: # on_begin: (untyped bodystmt) -> Begin | PinnedBegin def on_begin(bodystmt) - pin = find_token_value(Op, "^") + pin = find_operator(:^) if pin && pin.location.start_char < bodystmt.location.start_char tokens.delete(pin) @@ -802,12 +807,12 @@ def on_begin(bodystmt) PinnedBegin.new(statement: bodystmt, location: location) else - keyword = consume_token_value(Kw, "begin") + keyword = consume_keyword(:begin) end_location = if bodystmt.else_clause bodystmt.location else - consume_token_value(Kw, "end").location + consume_keyword(:end).location end bodystmt.bind( @@ -833,13 +838,11 @@ def on_binary(left, operator, right) # Here, we're going to search backward for the token that's between the # two operands that matches the operator so we can delete it from the # list. + range = (left.location.end_char + 1)...right.location.start_char index = tokens.rindex do |token| - location = token.location - - token.is_a?(Op) && token.value == operator.to_s && - location.start_char > left.location.end_char && - location.end_char < right.location.start_char + token.is_a?(Op) && token.name == operator && + range.cover?(token.location.start_char) end tokens.delete_at(index) if index @@ -882,7 +885,7 @@ def on_block_var(params, locals) # :call-seq: # on_blockarg: (Ident name) -> BlockArg def on_blockarg(name) - operator = consume_token_value(Op, "&") + operator = consume_operator(:&) location = operator.location location = location.to(name.location) if name @@ -901,7 +904,7 @@ def on_bodystmt(statements, rescue_clause, else_clause, ensure_clause) BodyStmt.new( statements: statements, rescue_clause: rescue_clause, - else_keyword: else_clause && consume_token_value(Kw, "else"), + else_keyword: else_clause && consume_keyword(:else), else_clause: else_clause, ensure_clause: ensure_clause, location: @@ -951,7 +954,7 @@ def on_brace_block(block_var, statements) # :call-seq: # on_break: (Args arguments) -> Break def on_break(arguments) - keyword = consume_token_value(Kw, "break") + keyword = consume_keyword(:break) location = keyword.location location = location.to(arguments.location) if arguments.parts.any? @@ -987,7 +990,7 @@ def on_call(receiver, operator, message) # :call-seq: # on_case: (untyped value, untyped consequent) -> Case | RAssign def on_case(value, consequent) - if (keyword = find_token_value(Kw, "case")) + if (keyword = find_keyword(:case)) tokens.delete(keyword) Case.new( @@ -998,10 +1001,10 @@ def on_case(value, consequent) ) else operator = - if (keyword = find_token_value(Kw, "in")) + if (keyword = find_keyword(:in)) tokens.delete(keyword) else - consume_token_value(Op, "=>") + consume_operator(:"=>") end node = RAssign.new( @@ -1023,8 +1026,8 @@ def on_case(value, consequent) # BodyStmt bodystmt # ) -> ClassDeclaration def on_class(constant, superclass, bodystmt) - beginning = consume_token_value(Kw, "class") - ending = consume_token_value(Kw, "end") + beginning = consume_keyword(:class) + ending = consume_keyword(:end) location = (superclass || constant).location start_char = find_next_statement_start(location.end_char) @@ -1182,7 +1185,7 @@ def on_def(name, params, bodystmt) # Find the beginning of the method definition, which works for single-line # and normal method definitions. - beginning = consume_token_value(Kw, "def") + beginning = consume_keyword(:def) # If there aren't any params then we need to correct the params node # location information @@ -1202,7 +1205,7 @@ def on_def(name, params, bodystmt) params = Params.new(location: location) end - ending = find_token_value(Kw, "end") + ending = find_keyword(:end) if ending tokens.delete(ending) @@ -1240,7 +1243,7 @@ def on_def(name, params, bodystmt) # :call-seq: # on_defined: (untyped value) -> Defined def on_defined(value) - beginning = consume_token_value(Kw, "defined?") + beginning = consume_keyword(:defined?) ending = value range = beginning.location.end_char...value.location.start_char @@ -1287,8 +1290,8 @@ def on_defs(target, operator, name, params, bodystmt) params = Params.new(location: location) end - beginning = consume_token_value(Kw, "def") - ending = find_token_value(Kw, "end") + beginning = consume_keyword(:def) + ending = find_keyword(:end) if ending tokens.delete(ending) @@ -1328,8 +1331,8 @@ def on_defs(target, operator, name, params, bodystmt) # :call-seq: # on_do_block: (BlockVar block_var, BodyStmt bodystmt) -> DoBlock def on_do_block(block_var, bodystmt) - beginning = consume_token_value(Kw, "do") - ending = consume_token_value(Kw, "end") + beginning = consume_keyword(:do) + ending = consume_keyword(:end) location = (block_var || beginning).location start_char = find_next_statement_start(location.end_char) @@ -1351,7 +1354,7 @@ def on_do_block(block_var, bodystmt) # :call-seq: # on_dot2: ((nil | untyped) left, (nil | untyped) right) -> Dot2 def on_dot2(left, right) - operator = consume_token_value(Op, "..") + operator = consume_operator(:"..") beginning = left || operator ending = right || operator @@ -1366,7 +1369,7 @@ def on_dot2(left, right) # :call-seq: # on_dot3: ((nil | untyped) left, (nil | untyped) right) -> Dot3 def on_dot3(left, right) - operator = consume_token_value(Op, "...") + operator = consume_operator(:"...") beginning = left || operator ending = right || operator @@ -1384,7 +1387,7 @@ def on_dyna_symbol(string_content) if symbeg = find_token(SymBeg) # A normal dynamic symbol tokens.delete(symbeg) - tstring_end = consume_token(TStringEnd, location: symbeg.location) + tstring_end = consume_tstring_end(symbeg.location) DynaSymbol.new( quote: symbeg.value, @@ -1407,7 +1410,7 @@ def on_dyna_symbol(string_content) # :call-seq: # on_else: (Statements statements) -> Else def on_else(statements) - keyword = consume_token_value(Kw, "else") + keyword = consume_keyword(:else) # else can either end with an end keyword (in which case we'll want to # consume that event) or it can end with an ensure keyword (in which case @@ -1447,8 +1450,8 @@ def on_else(statements) # (nil | Elsif | Else) consequent # ) -> Elsif def on_elsif(predicate, statements, consequent) - beginning = consume_token_value(Kw, "elsif") - ending = consequent || consume_token_value(Kw, "end") + beginning = consume_keyword(:elsif) + ending = consequent || consume_keyword(:end) start_char = find_next_statement_start(predicate.location.end_char) statements.bind( @@ -1568,11 +1571,11 @@ def on_embvar(value) # :call-seq: # on_ensure: (Statements statements) -> Ensure def on_ensure(statements) - keyword = consume_token_value(Kw, "ensure") + keyword = consume_keyword(:ensure) # We don't want to consume the :@kw event, because that would break # def..ensure..end chains. - ending = find_token_value(Kw, "end") + ending = find_keyword(:end) start_char = find_next_statement_start(keyword.location.end_char) statements.bind( start_char, @@ -1679,13 +1682,13 @@ def on_fndptn(constant, left, values, right) # Statements statements # ) -> For def on_for(index, collection, statements) - beginning = consume_token_value(Kw, "for") - in_keyword = consume_token_value(Kw, "in") - ending = consume_token_value(Kw, "end") + beginning = consume_keyword(:for) + in_keyword = consume_keyword(:in) + ending = consume_keyword(:end) # Consume the do keyword if it exists so that it doesn't get confused for # some other block - keyword = find_token_value(Kw, "do") + keyword = find_keyword(:do) if keyword && keyword.location.start_char > collection.location.end_char && keyword.location.end_char < ending.location.start_char @@ -1818,8 +1821,8 @@ def on_hshptn(constant, keywords, keyword_rest) if keyword_rest # We're doing this to delete the token from the list so that it doesn't # confuse future patterns by thinking they have an extra ** on the end. - consume_token_value(Op, "**") - elsif (token = find_token_value(Op, "**")) + consume_operator(:**) + elsif (token = find_operator(:**)) tokens.delete(token) # Create an artificial VarField if we find an extra ** on the end. This @@ -1872,8 +1875,8 @@ def on_ident(value) # (nil | Elsif | Else) consequent # ) -> If def on_if(predicate, statements, consequent) - beginning = consume_token_value(Kw, "if") - ending = consequent || consume_token_value(Kw, "end") + beginning = consume_keyword(:if) + ending = consequent || consume_keyword(:end) start_char = find_next_statement_start(predicate.location.end_char) statements.bind( @@ -1905,7 +1908,7 @@ def on_ifop(predicate, truthy, falsy) # :call-seq: # on_if_mod: (untyped predicate, untyped statement) -> IfMod def on_if_mod(predicate, statement) - consume_token_value(Kw, "if") + consume_keyword(:if) IfMod.new( statement: statement, @@ -1948,11 +1951,11 @@ def on_in(pattern, statements, consequent) # Here we have a rightward assignment return pattern unless statements - beginning = consume_token_value(Kw, "in") - ending = consequent || consume_token_value(Kw, "end") + beginning = consume_keyword(:in) + ending = consequent || consume_keyword(:end) statements_start = pattern - if (token = find_token_value(Kw, "then")) + if (token = find_keyword(:then)) tokens.delete(token) statements_start = token end @@ -2029,7 +2032,7 @@ def on_kw(value) # :call-seq: # on_kwrest_param: ((nil | Ident) name) -> KwRestParam def on_kwrest_param(name) - location = consume_token_value(Op, "**").location + location = consume_operator(:**).location location = location.to(name.location) if name KwRestParam.new(name: name, location: location) @@ -2121,8 +2124,8 @@ def on_lambda(params, statements) opening = consume_token(TLamBeg) closing = consume_token(RBrace) else - opening = consume_token_value(Kw, "do") - closing = consume_token_value(Kw, "end") + opening = consume_keyword(:do) + closing = consume_keyword(:end) end start_char = find_next_statement_start(opening.location.end_char) @@ -2353,7 +2356,7 @@ def on_mlhs_add_post(left, right) # (nil | ARefField | Field | Ident | VarField) part # ) -> MLHS def on_mlhs_add_star(mlhs, part) - beginning = consume_token_value(Op, "*") + beginning = consume_operator(:*) ending = part || beginning location = beginning.location.to(ending.location) @@ -2394,8 +2397,8 @@ def on_mlhs_paren(contents) # BodyStmt bodystmt # ) -> ModuleDeclaration def on_module(constant, bodystmt) - beginning = consume_token_value(Kw, "module") - ending = consume_token_value(Kw, "end") + beginning = consume_keyword(:module) + ending = consume_keyword(:end) start_char = find_next_statement_start(constant.location.end_char) bodystmt.bind( @@ -2434,7 +2437,7 @@ def on_mrhs_add(mrhs, part) # :call-seq: # on_mrhs_add_star: (MRHS mrhs, untyped value) -> MRHS def on_mrhs_add_star(mrhs, value) - beginning = consume_token_value(Op, "*") + beginning = consume_operator(:*) ending = value || beginning arg_star = @@ -2462,7 +2465,7 @@ def on_mrhs_new_from_args(arguments) # :call-seq: # on_next: (Args arguments) -> Next def on_next(arguments) - keyword = consume_token_value(Kw, "next") + keyword = consume_keyword(:next) location = keyword.location location = location.to(arguments.location) if arguments.parts.any? @@ -2889,7 +2892,7 @@ def on_rbracket(value) # :call-seq: # on_redo: () -> Redo def on_redo - keyword = consume_token_value(Kw, "redo") + keyword = consume_keyword(:redo) Redo.new(value: keyword.value, location: keyword.location) end @@ -2982,7 +2985,7 @@ def on_regexp_new # (nil | Rescue) consequent # ) -> Rescue def on_rescue(exceptions, variable, statements, consequent) - keyword = consume_token_value(Kw, "rescue") + keyword = consume_keyword(:rescue) exceptions = exceptions[0] if exceptions.is_a?(Array) last_node = variable || exceptions || keyword @@ -3034,7 +3037,7 @@ def on_rescue(exceptions, variable, statements, consequent) # :call-seq: # on_rescue_mod: (untyped statement, untyped value) -> RescueMod def on_rescue_mod(statement, value) - consume_token_value(Kw, "rescue") + consume_keyword(:rescue) RescueMod.new( statement: statement, @@ -3046,7 +3049,7 @@ def on_rescue_mod(statement, value) # :call-seq: # on_rest_param: ((nil | Ident) name) -> RestParam def on_rest_param(name) - location = consume_token_value(Op, "*").location + location = consume_operator(:*).location location = location.to(name.location) if name RestParam.new(name: name, location: location) @@ -3055,7 +3058,7 @@ def on_rest_param(name) # :call-seq: # on_retry: () -> Retry def on_retry - keyword = consume_token_value(Kw, "retry") + keyword = consume_keyword(:retry) Retry.new(value: keyword.value, location: keyword.location) end @@ -3063,7 +3066,7 @@ def on_retry # :call-seq: # on_return: (Args arguments) -> Return def on_return(arguments) - keyword = consume_token_value(Kw, "return") + keyword = consume_keyword(:return) Return.new( arguments: arguments, @@ -3074,7 +3077,7 @@ def on_return(arguments) # :call-seq: # on_return0: () -> Return0 def on_return0 - keyword = consume_token_value(Kw, "return") + keyword = consume_keyword(:return) Return0.new(value: keyword.value, location: keyword.location) end @@ -3101,8 +3104,8 @@ def on_rparen(value) # :call-seq: # on_sclass: (untyped target, BodyStmt bodystmt) -> SClass def on_sclass(target, bodystmt) - beginning = consume_token_value(Kw, "class") - ending = consume_token_value(Kw, "end") + beginning = consume_keyword(:class) + ending = consume_keyword(:end) start_char = find_next_statement_start(target.location.end_char) bodystmt.bind( @@ -3254,7 +3257,7 @@ def on_string_literal(string) ) else tstring_beg = consume_token(TStringBeg) - tstring_end = consume_token(TStringEnd, location: tstring_beg.location) + tstring_end = consume_tstring_end(tstring_beg.location) location = Location.new( @@ -3280,7 +3283,7 @@ def on_string_literal(string) # :call-seq: # on_super: ((ArgParen | Args) arguments) -> Super def on_super(arguments) - keyword = consume_token_value(Kw, "super") + keyword = consume_keyword(:super) Super.new( arguments: arguments, @@ -3501,7 +3504,7 @@ def on_unary(operator, statement) # We have somewhat special handling of the not operator since if it has # parentheses they don't get reported as a paren node for some reason. - beginning = consume_token_value(Kw, "not") + beginning = consume_keyword(:not) ending = statement || beginning parentheses = source[beginning.location.end_char] == "(" @@ -3540,7 +3543,7 @@ def on_unary(operator, statement) # :call-seq: # on_undef: (Array[DynaSymbol | SymbolLiteral] symbols) -> Undef def on_undef(symbols) - keyword = consume_token_value(Kw, "undef") + keyword = consume_keyword(:undef) Undef.new( symbols: symbols, @@ -3555,8 +3558,8 @@ def on_undef(symbols) # ((nil | Elsif | Else) consequent) # ) -> Unless def on_unless(predicate, statements, consequent) - beginning = consume_token_value(Kw, "unless") - ending = consequent || consume_token_value(Kw, "end") + beginning = consume_keyword(:unless) + ending = consequent || consume_keyword(:end) start_char = find_next_statement_start(predicate.location.end_char) statements.bind( @@ -3577,7 +3580,7 @@ def on_unless(predicate, statements, consequent) # :call-seq: # on_unless_mod: (untyped predicate, untyped statement) -> UnlessMod def on_unless_mod(predicate, statement) - consume_token_value(Kw, "unless") + consume_keyword(:unless) UnlessMod.new( statement: statement, @@ -3589,12 +3592,12 @@ def on_unless_mod(predicate, statement) # :call-seq: # on_until: (untyped predicate, Statements statements) -> Until def on_until(predicate, statements) - beginning = consume_token_value(Kw, "until") - ending = consume_token_value(Kw, "end") + beginning = consume_keyword(:until) + ending = consume_keyword(:end) # Consume the do keyword if it exists so that it doesn't get confused for # some other block - keyword = find_token_value(Kw, "do") + keyword = find_keyword(:do) if keyword && keyword.location.start_char > predicate.location.end_char && keyword.location.end_char < ending.location.start_char tokens.delete(keyword) @@ -3619,7 +3622,7 @@ def on_until(predicate, statements) # :call-seq: # on_until_mod: (untyped predicate, untyped statement) -> UntilMod def on_until_mod(predicate, statement) - consume_token_value(Kw, "until") + consume_keyword(:until) UntilMod.new( statement: statement, @@ -3631,7 +3634,7 @@ def on_until_mod(predicate, statement) # :call-seq: # on_var_alias: (GVar left, (Backref | GVar) right) -> VarAlias def on_var_alias(left, right) - keyword = consume_token_value(Kw, "alias") + keyword = consume_keyword(:alias) VarAlias.new( left: left, @@ -3685,11 +3688,11 @@ def on_void_stmt # (nil | Else | When) consequent # ) -> When def on_when(arguments, statements, consequent) - beginning = consume_token_value(Kw, "when") - ending = consequent || consume_token_value(Kw, "end") + beginning = consume_keyword(:when) + ending = consequent || consume_keyword(:end) statements_start = arguments - if (token = find_token_value(Kw, "then")) + if (token = find_keyword(:then)) tokens.delete(token) statements_start = token end @@ -3715,12 +3718,12 @@ def on_when(arguments, statements, consequent) # :call-seq: # on_while: (untyped predicate, Statements statements) -> While def on_while(predicate, statements) - beginning = consume_token_value(Kw, "while") - ending = consume_token_value(Kw, "end") + beginning = consume_keyword(:while) + ending = consume_keyword(:end) # Consume the do keyword if it exists so that it doesn't get confused for # some other block - keyword = find_token_value(Kw, "do") + keyword = find_keyword(:do) if keyword && keyword.location.start_char > predicate.location.end_char && keyword.location.end_char < ending.location.start_char tokens.delete(keyword) @@ -3745,7 +3748,7 @@ def on_while(predicate, statements) # :call-seq: # on_while_mod: (untyped predicate, untyped statement) -> WhileMod def on_while_mod(predicate, statement) - consume_token_value(Kw, "while") + consume_keyword(:while) WhileMod.new( statement: statement, @@ -3862,7 +3865,7 @@ def on_xstring_literal(xstring) location: heredoc.location ) else - ending = consume_token(TStringEnd, location: xstring.location) + ending = consume_tstring_end(xstring.location) XStringLiteral.new( parts: xstring.parts, @@ -3874,7 +3877,7 @@ def on_xstring_literal(xstring) # :call-seq: # on_yield: ((Args | Paren) arguments) -> Yield def on_yield(arguments) - keyword = consume_token_value(Kw, "yield") + keyword = consume_keyword(:yield) Yield.new( arguments: arguments, @@ -3885,7 +3888,7 @@ def on_yield(arguments) # :call-seq: # on_yield0: () -> Yield0 def on_yield0 - keyword = consume_token_value(Kw, "yield") + keyword = consume_keyword(:yield) Yield0.new(value: keyword.value, location: keyword.location) end @@ -3893,7 +3896,7 @@ def on_yield0 # :call-seq: # on_zsuper: () -> ZSuper def on_zsuper - keyword = consume_token_value(Kw, "super") + keyword = consume_keyword(:super) ZSuper.new(value: keyword.value, location: keyword.location) end diff --git a/test/interface_test.rb b/test/interface_test.rb index 49a74e92..5086680e 100644 --- a/test/interface_test.rb +++ b/test/interface_test.rb @@ -54,8 +54,12 @@ def instantiate(klass) case klass.name when "SyntaxTree::Binary" klass.new(**params, operator: :+) + when "SyntaxTree::Kw" + klass.new(**params, value: "kw") when "SyntaxTree::Label" klass.new(**params, value: "label:") + when "SyntaxTree::Op" + klass.new(**params, value: "+") when "SyntaxTree::RegexpLiteral" klass.new(**params, ending: "/") when "SyntaxTree::Statements" From b5d226b4f24e225e72d62b7d4fb3152e9b7aa0b7 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Mon, 17 Oct 2022 09:54:32 -0400 Subject: [PATCH 04/15] Move lambdas into their own methods, smarter token finding in on_args_add_block --- lib/syntax_tree/node.rb | 102 ++++++++++++++++++++++---------------- lib/syntax_tree/parser.rb | 25 ++++++---- 2 files changed, 72 insertions(+), 55 deletions(-) diff --git a/lib/syntax_tree/node.rb b/lib/syntax_tree/node.rb index 24d35985..eccf0638 100644 --- a/lib/syntax_tree/node.rb +++ b/lib/syntax_tree/node.rb @@ -2885,27 +2885,15 @@ def deconstruct_keys(_keys) end def format(q) - declaration = -> do - q.group do - q.text("class ") - q.format(constant) - - if superclass - q.text(" < ") - q.format(superclass) - end - end - end - if bodystmt.empty? q.group do - declaration.call + format_declaration(q) q.breakable_force q.text("end") end else q.group do - declaration.call + format_declaration(q) q.indent do q.breakable_force @@ -2917,6 +2905,20 @@ def format(q) end end end + + private + + def format_declaration(q) + q.group do + q.text("class ") + q.format(constant) + + if superclass + q.text(" < ") + q.format(superclass) + end + end + end end # Comma represents the use of the , operator. @@ -5122,18 +5124,7 @@ def deconstruct_keys(_keys) def format(q) parts = keywords.map { |(key, value)| KeywordFormatter.new(key, value) } parts << KeywordRestFormatter.new(keyword_rest) if keyword_rest - nested = PATTERNS.include?(q.parent.class) - contents = -> do - q.group { q.seplist(parts) { |part| q.format(part, stackable: false) } } - - # If there isn't a constant, and there's a blank keyword_rest, then we - # have an plain ** that needs to have a `then` after it in order to - # parse correctly on the next parse. - if !constant && keyword_rest && keyword_rest.value.nil? && !nested - q.text(" then") - end - end # If there is a constant, we're going to format to have the constant name # first and then use brackets. @@ -5143,7 +5134,7 @@ def format(q) q.text("[") q.indent do q.breakable_empty - contents.call + format_contents(q, parts, nested) end q.breakable_empty q.text("]") @@ -5160,7 +5151,7 @@ def format(q) # If there's only one pair, then we'll just print the contents provided # we're not inside another pattern. if !nested && parts.size == 1 - contents.call + format_contents(q, parts, nested) return end @@ -5170,7 +5161,7 @@ def format(q) q.text("{") q.indent do q.breakable_space - contents.call + format_contents(q, parts, nested) end if q.target_ruby_version < Gem::Version.new("2.7.3") @@ -5181,6 +5172,19 @@ def format(q) end end end + + private + + def format_contents(q, parts, nested) + q.group { q.seplist(parts) { |part| q.format(part, stackable: false) } } + + # If there isn't a constant, and there's a blank keyword_rest, then we + # have an plain ** that needs to have a `then` after it in order to + # parse correctly on the next parse. + if !constant && keyword_rest && keyword_rest.value.nil? && !nested + q.text(" then") + end + end end # The list of nodes that represent patterns inside of pattern matching so that @@ -6543,22 +6547,15 @@ def deconstruct_keys(_keys) end def format(q) - declaration = -> do - q.group do - q.text("module ") - q.format(constant) - end - end - if bodystmt.empty? q.group do - declaration.call + format_declaration(q) q.breakable_force q.text("end") end else q.group do - declaration.call + format_declaration(q) q.indent do q.breakable_force @@ -6570,6 +6567,15 @@ def format(q) end end end + + private + + def format_declaration(q) + q.group do + q.text("module ") + q.format(constant) + end + end end # MRHS represents the values that are being assigned on the right-hand side of @@ -7023,27 +7029,35 @@ def format(q) parts << KeywordRestFormatter.new(keyword_rest) if keyword_rest parts << block if block - contents = -> do - q.seplist(parts) { |part| q.format(part) } - q.format(rest) if rest.is_a?(ExcessedComma) + if parts.empty? + q.nest(0) { format_contents(q, parts) } + return end - if ![Def, Defs, DefEndless].include?(q.parent.class) || parts.empty? - q.nest(0, &contents) - else + case q.parent + when Def, Defs, DefEndless q.nest(0) do q.text("(") q.group do q.indent do q.breakable_empty - contents.call + format_contents(q, parts) end q.breakable_empty end q.text(")") end + else + q.nest(0) { format_contents(q, parts) } end end + + private + + def format_contents(q, parts) + q.seplist(parts) { |part| q.format(part) } + q.format(rest) if rest.is_a?(ExcessedComma) + end end # Paren represents using balanced parentheses in a couple places in a Ruby diff --git a/lib/syntax_tree/parser.rb b/lib/syntax_tree/parser.rb index 3245efa1..78a6f84b 100644 --- a/lib/syntax_tree/parser.rb +++ b/lib/syntax_tree/parser.rb @@ -513,23 +513,26 @@ def on_args_add(arguments, argument) # (false | untyped) block # ) -> Args def on_args_add_block(arguments, block) + end_char = arguments.parts.any? && arguments.location.end_char + # First, see if there is an & operator that could potentially be # associated with the block part of this args_add_block. If there is not, # then just return the arguments. - operator = find_operator(:&) - return arguments unless operator - - # If there are any arguments and the operator we found from the list is - # not after them, then we're going to return the arguments as-is because - # we're looking at an & that occurs before the arguments are done. - if arguments.parts.any? && - operator.location.start_char < arguments.location.end_char - return arguments - end + index = + tokens.rindex do |token| + # If there are any arguments and the operator we found from the list + # is not after them, then we're going to return the arguments as-is + # because we're looking at an & that occurs before the arguments are + # done. + return arguments if end_char && token.location.start_char < end_char + token.is_a?(Op) && (token.name == :&) + end + + return arguments unless index # Now we know we have an & operator, so we're going to delete it from the # list of tokens to make sure it doesn't get confused with anything else. - tokens.delete(operator) + operator = tokens.delete_at(index) # Construct the location that represents the block argument. location = operator.location From 7b6dbeb3edea077f38369240f5a0dc4d1e0390ae Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Mon, 17 Oct 2022 11:16:46 -0400 Subject: [PATCH 05/15] More micro-optimizations --- bin/ybench | 17 +++ lib/syntax_tree/node.rb | 239 ++++++++++++++++++++++------------------ 2 files changed, 151 insertions(+), 105 deletions(-) create mode 100755 bin/ybench diff --git a/bin/ybench b/bin/ybench new file mode 100755 index 00000000..9dcc45aa --- /dev/null +++ b/bin/ybench @@ -0,0 +1,17 @@ +#!/usr/bin/env ruby +# frozen_string_literal: true + +require "bundler/inline" + +gemfile do + source "https://rubygems.org" + gem "benchmark-ips" +end + +string = "a" * 1000 + "\n" + +Benchmark.ips do |x| + x.report("chomp") { string.chomp } + x.report("chop") { string.chop } + x.compare! +end diff --git a/lib/syntax_tree/node.rb b/lib/syntax_tree/node.rb index eccf0638..b3babfb5 100644 --- a/lib/syntax_tree/node.rb +++ b/lib/syntax_tree/node.rb @@ -1413,17 +1413,21 @@ class Labels def format_key(q, key) case key - in Label + when Label q.format(key) - in SymbolLiteral + when SymbolLiteral q.format(key.value) q.text(":") - in DynaSymbol[parts: [TStringContent[value: LABEL] => part]] - q.format(part) - q.text(":") - in DynaSymbol - q.format(key) - q.text(":") + when DynaSymbol + parts = key.parts + + if parts.length == 1 && (part = parts.first) && part.is_a?(TStringContent) && part.value.match?(LABEL) + q.format(part) + q.text(":") + else + q.format(key) + q.text(":") + end end end end @@ -1433,8 +1437,7 @@ class Rockets def format_key(q, key) case key when Label - q.text(":") - q.text(key.value.chomp(":")) + q.text(":#{key.value.chomp(":")}") when DynaSymbol q.text(":") q.format(key) @@ -2145,105 +2148,128 @@ def format(q) q.group do q.text(keyword) - case node.arguments.parts - in [] + parts = node.arguments.parts + length = parts.length + + if length == 0 # Here there are no arguments at all, so we're not going to print # anything. This would be like if we had: # # break # - in [ - Paren[ - contents: { - body: [ArrayLiteral[contents: { parts: [_, _, *] }] => array] } - ] - ] - # Here we have a single argument that is a set of parentheses wrapping - # an array literal that has at least 2 elements. We're going to print - # the contents of the array directly. This would be like if we had: - # - # break([1, 2, 3]) - # - # which we will print as: - # - # break 1, 2, 3 - # - q.text(" ") - format_array_contents(q, array) - in [Paren[contents: { body: [ArrayLiteral => statement] }]] - # Here we have a single argument that is a set of parentheses wrapping - # an array literal that has 0 or 1 elements. We're going to skip the - # parentheses but print the array itself. This would be like if we - # had: - # - # break([1]) - # - # which we will print as: - # - # break [1] - # - q.text(" ") - q.format(statement) - in [Paren[contents: { body: [statement] }]] if skip_parens?(statement) - # Here we have a single argument that is a set of parentheses that - # themselves contain a single statement. That statement is a simple - # value that we can skip the parentheses for. This would be like if we - # had: - # - # break(1) - # - # which we will print as: - # - # break 1 - # - q.text(" ") - q.format(statement) - in [Paren => part] - # Here we have a single argument that is a set of parentheses. We're - # going to print the parentheses themselves as if they were the set of - # arguments. This would be like if we had: - # - # break(foo.bar) - # - q.format(part) - in [ArrayLiteral[contents: { parts: [_, _, *] }] => array] - # Here there is a single argument that is an array literal with at - # least two elements. We skip directly into the array literal's - # elements in order to print the contents. This would be like if we - # had: - # - # break [1, 2, 3] - # - # which we will print as: - # - # break 1, 2, 3 - # - q.text(" ") - format_array_contents(q, array) - in [ArrayLiteral => part] - # Here there is a single argument that is an array literal with 0 or 1 - # elements. In this case we're going to print the array as it is - # because skipping the brackets would change the remaining. This would - # be like if we had: - # - # break [] - # break [1] - # - q.text(" ") - q.format(part) - in [_] - # Here there is a single argument that hasn't matched one of our - # previous cases. We're going to print the argument as it is. This - # would be like if we had: - # - # break foo - # - format_arguments(q, "(", ")") - else + elsif length >= 2 # If there are multiple arguments, format them all. If the line is # going to break into multiple, then use brackets to start and end the # expression. format_arguments(q, " [", "]") + else + # If we get here, then we're formatting a single argument to the flow + # control keyword. + part = parts.first + + case part + when Paren + statements = part.contents.body + + if statements.length == 1 + statement = statements.first + + if statement.is_a?(ArrayLiteral) + contents = statement.contents + + if contents && contents.parts.length >= 2 + # Here we have a single argument that is a set of parentheses + # wrapping an array literal that has at least 2 elements. + # We're going to print the contents of the array directly. + # This would be like if we had: + # + # break([1, 2, 3]) + # + # which we will print as: + # + # break 1, 2, 3 + # + q.text(" ") + format_array_contents(q, statement) + else + # Here we have a single argument that is a set of parentheses + # wrapping an array literal that has 0 or 1 elements. We're + # going to skip the parentheses but print the array itself. + # This would be like if we had: + # + # break([1]) + # + # which we will print as: + # + # break [1] + # + q.text(" ") + q.format(statement) + end + elsif skip_parens?(statement) + # Here we have a single argument that is a set of parentheses + # that themselves contain a single statement. That statement is + # a simple value that we can skip the parentheses for. This + # would be like if we had: + # + # break(1) + # + # which we will print as: + # + # break 1 + # + q.text(" ") + q.format(statement) + else + # Here we have a single argument that is a set of parentheses. + # We're going to print the parentheses themselves as if they + # were the set of arguments. This would be like if we had: + # + # break(foo.bar) + # + q.format(part) + end + else + q.format(part) + end + when ArrayLiteral + contents = part.contents + + if contents && contents.parts.length >= 2 + # Here there is a single argument that is an array literal with at + # least two elements. We skip directly into the array literal's + # elements in order to print the contents. This would be like if + # we had: + # + # break [1, 2, 3] + # + # which we will print as: + # + # break 1, 2, 3 + # + q.text(" ") + format_array_contents(q, part) + else + # Here there is a single argument that is an array literal with 0 + # or 1 elements. In this case we're going to print the array as it + # is because skipping the brackets would change the remaining. + # This would be like if we had: + # + # break [] + # break [1] + # + q.text(" ") + q.format(part) + end + else + # Here there is a single argument that hasn't matched one of our + # previous cases. We're going to print the argument as it is. This + # would be like if we had: + # + # break foo + # + format_arguments(q, "(", ")") + end end end end @@ -3791,15 +3817,18 @@ def initialize(operator, node) end def format(q) - space = [If, IfMod, Unless, UnlessMod].include?(q.parent.class) - left = node.left right = node.right q.format(left) if left - q.text(" ") if space - q.text(operator) - q.text(" ") if space + + case q.parent + when If, IfMod, Unless, UnlessMod + q.text(" #{operator} ") + else + q.text(operator) + end + q.format(right) if right end end From 929726bda1371541101ff40e6c91765a0e3aad25 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Mon, 17 Oct 2022 12:21:36 -0400 Subject: [PATCH 06/15] Remove remaining pattern matching --- bin/ybench | 17 --- lib/syntax_tree/formatter.rb | 4 + lib/syntax_tree/node.rb | 204 +++++++++++++++++++---------------- lib/syntax_tree/parser.rb | 21 ++-- 4 files changed, 129 insertions(+), 117 deletions(-) delete mode 100755 bin/ybench diff --git a/bin/ybench b/bin/ybench deleted file mode 100755 index 9dcc45aa..00000000 --- a/bin/ybench +++ /dev/null @@ -1,17 +0,0 @@ -#!/usr/bin/env ruby -# frozen_string_literal: true - -require "bundler/inline" - -gemfile do - source "https://rubygems.org" - gem "benchmark-ips" -end - -string = "a" * 1000 + "\n" - -Benchmark.ips do |x| - x.report("chomp") { string.chomp } - x.report("chop") { string.chop } - x.compare! -end diff --git a/lib/syntax_tree/formatter.rb b/lib/syntax_tree/formatter.rb index dc124fbc..5fe5e260 100644 --- a/lib/syntax_tree/formatter.rb +++ b/lib/syntax_tree/formatter.rb @@ -120,6 +120,10 @@ def format_each(nodes) nodes.each { |node| format(node) } end + def grandparent + stack[-3] + end + def parent stack[-2] end diff --git a/lib/syntax_tree/node.rb b/lib/syntax_tree/node.rb index b3babfb5..cb4fadef 100644 --- a/lib/syntax_tree/node.rb +++ b/lib/syntax_tree/node.rb @@ -2298,10 +2298,15 @@ def format_arguments(q, opening, closing) def skip_parens?(node) case node - in FloatLiteral | Imaginary | Int | RationalLiteral - true - in VarRef[value: Const | CVar | GVar | IVar | Kw] + when FloatLiteral, Imaginary, Int, RationalLiteral true + when VarRef + case node.value + when Const, CVar, GVar, IVar, Kw + true + else + false + end else false end @@ -2364,8 +2369,14 @@ def comments def format(q) case operator - in :"::" | Op[value: "::"] + when :"::" q.text(".") + when Op + if operator.value == "::" + q.text(".") + else + operator.format(q) + end else operator.format(q) end @@ -2401,13 +2412,18 @@ def format(q) # First, walk down the chain until we get to the point where we're not # longer at a chainable node. loop do - case children.last - in Call[receiver: Call] - children << children.last.receiver - in Call[receiver: MethodAddBlock[call: Call]] - children << children.last.receiver - in MethodAddBlock[call: Call] - children << children.last.call + case (child = children.last) + when Call + case (receiver = child.receiver) + when Call + children << receiver + when MethodAddBlock + receiver.call.is_a?(Call) ? children << receiver : break + else + break + end + when MethodAddBlock + child.call.is_a?(Call) ? children << child.call : break else break end @@ -2426,10 +2442,8 @@ def format(q) # nodes. parent = parents[3] if parent.is_a?(DoBlock) - case parent - in MethodAddBlock[call: FCall[value: { value: "sig" }]] + if parent.is_a?(MethodAddBlock) && parent.call.is_a?(FCall) && parent.call.value.value == "sig" threshold = 2 - else end end @@ -2472,20 +2486,17 @@ def format_chain(q, children) skip_operator = false while (child = children.pop) - case child - in Call[ - receiver: Call[message: { value: "where" }], - message: { value: "not" } - ] - # This is very specialized behavior wherein we group - # .where.not calls together because it looks better. For more - # information, see - # https://github.com/prettier/plugin-ruby/issues/862. - in Call - # If we're at a Call node and not a MethodAddBlock node in the - # chain then we're going to add a newline so it indents properly. - q.breakable_empty - else + if child.is_a?(Call) + if child.receiver.is_a?(Call) && child.receiver.message.value == "where" && child.message.value == "not" + # This is very specialized behavior wherein we group + # .where.not calls together because it looks better. For more + # information, see + # https://github.com/prettier/plugin-ruby/issues/862. + else + # If we're at a Call node and not a MethodAddBlock node in the + # chain then we're going to add a newline so it indents properly. + q.breakable_empty + end end format_child( @@ -2498,9 +2509,9 @@ def format_chain(q, children) # If the parent call node has a comment on the message then we need # to print the operator trailing in order to keep it working. - case children.last - in Call[message: { comments: [_, *] }, operator:] - q.format(CallOperatorFormatter.new(operator)) + last_child = children.last + if last_child.is_a?(Call) && last_child.message.comments.any? + q.format(CallOperatorFormatter.new(last_child.operator)) skip_operator = true else skip_operator = false @@ -2515,18 +2526,22 @@ def format_chain(q, children) if empty_except_last case node - in Call + when Call node.format_arguments(q) - in MethodAddBlock[block:] - q.format(block) + when MethodAddBlock + q.format(node.block) end end end def self.chained?(node) + return false if ENV["STREE_FAST_FORMAT"] + case node - in Call | MethodAddBlock[call: Call] + when Call true + when MethodAddBlock + node.call.is_a?(Call) else false end @@ -2538,9 +2553,12 @@ def self.chained?(node) # want to indent the first call. So we'll pop off the first children and # format it separately here. def attach_directly?(node) - [ArrayLiteral, HashLiteral, Heredoc, If, Unless, XStringLiteral].include?( - node.receiver.class - ) + case node.receiver + when ArrayLiteral, HashLiteral, Heredoc, If, Unless, XStringLiteral + true + else + false + end end def format_child( @@ -2552,7 +2570,7 @@ def format_child( ) # First, format the actual contents of the child. case child - in Call + when Call q.group do unless skip_operator q.format(CallOperatorFormatter.new(child.operator)) @@ -2560,7 +2578,7 @@ def format_child( q.format(child.message) if child.message != :call child.format_arguments(q) unless skip_attached end - in MethodAddBlock + when MethodAddBlock q.format(child.block) unless skip_attached end @@ -2643,9 +2661,7 @@ def format(q) # If we're at the top of a call chain, then we're going to do some # specialized printing in case we can print it nicely. We _only_ do this # at the top of the chain to avoid weird recursion issues. - if !ENV["STREE_SKIP_CALL_CHAIN"] && - !CallChainFormatter.chained?(q.parent) && - CallChainFormatter.chained?(receiver) + if CallChainFormatter.chained?(receiver) && !CallChainFormatter.chained?(q.parent) q.group do q .if_break { CallChainFormatter.new(self).format(q) } @@ -2658,9 +2674,9 @@ def format(q) def format_arguments(q) case arguments - in ArgParen + when ArgParen q.format(arguments) - in Args + when Args q.text(" ") q.format(arguments) else @@ -2821,7 +2837,7 @@ def format(q) q.format(operator) case pattern - in AryPtn | FndPtn | HshPtn + when AryPtn, FndPtn, HshPtn q.text(" ") q.format(pattern) else @@ -5286,28 +5302,35 @@ def self.call(parent) module Ternaryable class << self def call(q, node) - case q.parents.take(2)[1] - in Paren[contents: Statements[body: [node]]] - # If this is a conditional inside of a parentheses as the only - # content, then we don't want to transform it into a ternary. - # Presumably the user wanted it to be an explicit conditional because - # there are parentheses around it. So we'll just leave it in place. - false - else - # Otherwise, we're going to check the conditional for certain cases. - case node - in predicate: Assign | Command | CommandCall | MAssign | OpAssign - false - in predicate: Not[parentheses: false] - false - in { - statements: { body: [truthy] }, - consequent: Else[statements: { body: [falsy] }] } - ternaryable?(truthy) && ternaryable?(falsy) - else - false - end + return false if ENV["STREE_FAST_FORMAT"] + + # If this is a conditional inside of a parentheses as the only content, + # then we don't want to transform it into a ternary. Presumably the user + # wanted it to be an explicit conditional because there are parentheses + # around it. So we'll just leave it in place. + grandparent = q.grandparent + if grandparent.is_a?(Paren) && (body = grandparent.contents.body) && body.length == 1 && body.first == node + return false + end + + # Otherwise, we'll check the type of predicate. For certain nodes we + # want to force it to not be a ternary, like if the predicate is an + # assignment because it's hard to read. + case node.predicate + when Assign, Command, CommandCall, MAssign, OpAssign + return false + when Not + return false unless node.predicate.parentheses? end + + # If there's no Else, then this can't be represented as a ternary. + return false unless node.consequent.is_a?(Else) + + truthy_body = node.statements.body + falsy_body = node.consequent.statements.body + + (truthy_body.length == 1) && ternaryable?(truthy_body.first) && + (falsy_body.length == 1) && ternaryable?(falsy_body.first) end private @@ -5316,24 +5339,23 @@ def call(q, node) # parentheses around them. In this case we say they cannot be ternaried # and default instead to breaking them into multiple lines. def ternaryable?(statement) - # This is a list of nodes that should not be allowed to be a part of a - # ternary clause. - no_ternary = [ - Alias, Assign, Break, Command, CommandCall, Heredoc, If, IfMod, IfOp, + case statement + when Alias, Assign, Break, Command, CommandCall, Heredoc, If, IfMod, IfOp, Lambda, MAssign, Next, OpAssign, RescueMod, Return, Return0, Super, Undef, Unless, UnlessMod, Until, UntilMod, VarAlias, VoidStmt, While, WhileMod, Yield, Yield0, ZSuper - ] - - # Here we're going to check that the only statement inside the - # statements node is no a part of our denied list of nodes that can be - # ternaries. - # - # If the user is using one of the lower precedence "and" or "or" - # operators, then we can't use a ternary expression as it would break - # the flow control. - !no_ternary.include?(statement.class) && - !(statement.is_a?(Binary) && %i[and or].include?(statement.operator)) + # This is a list of nodes that should not be allowed to be a part of a + # ternary clause. + false + when Binary + # If the user is using one of the lower precedence "and" or "or" + # operators, then we can't use a ternary expression as it would break + # the flow control. + operator = statement.operator + operator != "and" && operator != "or" + else + true + end end end end @@ -5453,8 +5475,11 @@ def format_ternary(q) end def contains_conditional? - case node - in statements: { body: [If | IfMod | IfOp | Unless | UnlessMod] } + statements = node.statements.body + return false if statements.length != 1 + + case statements.first + when If, IfMod, IfOp, Unless, UnlessMod true else false @@ -6410,9 +6435,7 @@ def format(q) # If we're at the top of a call chain, then we're going to do some # specialized printing in case we can print it nicely. We _only_ do this # at the top of the chain to avoid weird recursion issues. - if !ENV["STREE_SKIP_CALL_CHAIN"] && - !CallChainFormatter.chained?(q.parent) && - CallChainFormatter.chained?(call) + if CallChainFormatter.chained?(call) && !CallChainFormatter.chained?(q.parent) q.group do q .if_break { CallChainFormatter.new(self).format(q) } @@ -9122,6 +9145,7 @@ class Not < Node # [boolean] whether or not parentheses were used attr_reader :parentheses + alias parentheses? parentheses # [Array[ Comment | EmbDoc ]] the comments attached to this node attr_reader :comments @@ -9160,10 +9184,10 @@ def format(q) q.format(statement) if statement q.text(")") else - parent = q.parents.take(2)[1] + grandparent = q.grandparent ternary = - (parent.is_a?(If) || parent.is_a?(Unless)) && - Ternaryable.call(q, parent) + (grandparent.is_a?(If) || grandparent.is_a?(Unless)) && + Ternaryable.call(q, grandparent) if ternary q.if_break { q.text(" ") }.if_flat { q.text("(") } diff --git a/lib/syntax_tree/parser.rb b/lib/syntax_tree/parser.rb index 78a6f84b..15f8522b 100644 --- a/lib/syntax_tree/parser.rb +++ b/lib/syntax_tree/parser.rb @@ -689,12 +689,13 @@ def on_aryptn(constant, requireds, rest, posts) if rest.is_a?(VarField) && rest.value.nil? tokens.rindex do |rtoken| case rtoken - in Op[value: "*"] - rest = VarField.new(value: nil, location: rtoken.location) + when Comma break - in Comma - break - else + when Op + if rtoken.value == "*" + rest = VarField.new(value: nil, location: rtoken.location) + break + end end end end @@ -1659,10 +1660,10 @@ def on_fndptn(constant, left, values, right) # punctuation or the right splat. closing = case opening - in LBracket + when LBracket tokens.delete(opening) consume_token(RBracket) - in LParen + when LParen tokens.delete(opening) consume_token(RParen) else @@ -2092,7 +2093,7 @@ def on_lambda(params, statements) # capturing lambda var until 3.2, we need to normalize all of that here. params = case params - in Paren[contents: Params] + when Paren # In this case we've gotten to the <3.2 parentheses wrapping a set of # parameters case. Here we need to manually scan for lambda locals. range = (params.location.start_char + 1)...params.location.end_char @@ -2112,12 +2113,12 @@ def on_lambda(params, statements) location: params.location, comments: params.comments ) - in Params + when Params # In this case we've gotten to the <3.2 plain set of parameters. In # this case there cannot be lambda locals, so we will wrap the # parameters into a lambda var that has no locals. LambdaVar.new(params: params, locals: [], location: params.location) - in LambdaVar + when LambdaVar # In this case we've gotten to 3.2+ lambda var. In this case we don't # need to do anything and can just the value as given. params From b5154ac7efdc625daf71a761a41c3a3009d87328 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Mon, 17 Oct 2022 12:40:06 -0400 Subject: [PATCH 07/15] each_line instead of split --- lib/syntax_tree/formatter.rb | 53 ++++++++++---------- lib/syntax_tree/node.rb | 95 ++++++++++++++++++++++++++---------- 2 files changed, 96 insertions(+), 52 deletions(-) diff --git a/lib/syntax_tree/formatter.rb b/lib/syntax_tree/formatter.rb index 5fe5e260..39ed1583 100644 --- a/lib/syntax_tree/formatter.rb +++ b/lib/syntax_tree/formatter.rb @@ -4,26 +4,6 @@ module SyntaxTree # A slightly enhanced PP that knows how to format recursively including # comments. class Formatter < PrettierPrint - # It's very common to use seplist with ->(q) { q.breakable_return }. We wrap - # that pattern into an object to cut down on having to create a bunch of - # lambdas all over the place. - class BreakableReturnSeparator - def call(q) - q.breakable_return - end - end - - # Similar to the previous, it's common to ->(q) { q.breakable_space }. We - # also wrap that pattern into an object to cut down on lambdas. - class BreakableSpaceSeparator - def call(q) - q.breakable_space - end - end - - BREAKABLE_RETURN_SEPARATOR = BreakableReturnSeparator.new - BREAKABLE_SPACE_SEPARATOR = BreakableSpaceSeparator.new - # We want to minimize as much as possible the number of options that are # available in syntax tree. For the most part, if users want non-default # formatting, they should override the format methods on the specific nodes @@ -82,20 +62,39 @@ def format(node, stackable: true) # If there are comments, then we're going to format them around the node # so that they get printed properly. if node.comments.any? - leading, trailing = node.comments.partition(&:leading?) + trailing = [] + last_leading = nil - # Print all comments that were found before the node. - leading.each do |comment| - comment.format(self) - breakable(force: true) + # First, we're going to print all of the comments that were found before + # the node. We'll also gather up any trailing comments that we find. + node.comments.each do |comment| + if comment.leading? + comment.format(self) + breakable(force: true) + last_leading = comment + else + trailing << comment + end end # If the node has a stree-ignore comment right before it, then we're # going to just print out the node as it was seen in the source. doc = - if leading.last&.ignore? + if last_leading&.ignore? range = source[node.location.start_char...node.location.end_char] - seplist(range.split(/\r?\n/, -1), Formatter::BREAKABLE_RETURN_SEPARATOR) { |line| text(line) } + first = true + + range.each_line(chomp: true) do |line| + if first + first = false + else + breakable_return + end + + text(line) + end + + breakable_return if range.end_with?("\n") else node.format(self) end diff --git a/lib/syntax_tree/node.rb b/lib/syntax_tree/node.rb index cb4fadef..ce67a135 100644 --- a/lib/syntax_tree/node.rb +++ b/lib/syntax_tree/node.rb @@ -328,7 +328,19 @@ def deconstruct_keys(_keys) def format(q) q.text("__END__") q.breakable_force - q.seplist(value.split(/\r?\n/, -1), Formatter::BREAKABLE_RETURN_SEPARATOR) { |line| q.text(line) } + + first = true + value.each_line(chomp: true) do |line| + if first + first = false + else + q.breakable_return + end + + q.text(line) + end + + q.breakable_return if value.end_with?("\n") end end @@ -792,6 +804,17 @@ def format(q) # [one, two, three] # class ArrayLiteral < Node + # It's very common to use seplist with ->(q) { q.breakable_space }. We wrap + # that pattern into an object to cut down on having to create a bunch of + # lambdas all over the place. + class BreakableSpaceSeparator + def call(q) + q.breakable_space + end + end + + BREAKABLE_SPACE_SEPARATOR = BreakableSpaceSeparator.new + # Formats an array of multiple simple string literals into the %w syntax. class QWordsFormatter # [Args] the contents of the array @@ -806,7 +829,7 @@ def format(q) q.group do q.indent do q.breakable_empty - q.seplist(contents.parts, Formatter::BREAKABLE_SPACE_SEPARATOR) do |part| + q.seplist(contents.parts, BREAKABLE_SPACE_SEPARATOR) do |part| if part.is_a?(StringLiteral) q.format(part.parts.first) else @@ -834,7 +857,7 @@ def format(q) q.group do q.indent do q.breakable_empty - q.seplist(contents.parts, Formatter::BREAKABLE_SPACE_SEPARATOR) do |part| + q.seplist(contents.parts, BREAKABLE_SPACE_SEPARATOR) do |part| q.format(part.value) end end @@ -4034,9 +4057,19 @@ def format(q) parts.each do |part| if part.is_a?(TStringContent) value = Quotes.normalize(part.value, closing_quote) - q.seplist(value.split(/\r?\n/, -1), Formatter::BREAKABLE_RETURN_SEPARATOR) do |text| - q.text(text) + first = true + + value.each_line(chomp: true) do |line| + if first + first = false + else + q.breakable_return + end + + q.text(line) end + + q.breakable_return if value.end_with?("\n") else q.format(part) end @@ -4957,17 +4990,7 @@ def deconstruct_keys(_keys) # This is a very specific behavior where you want to force a newline, but # don't want to force the break parent. - class Separator - DOC = PrettierPrint::Breakable.new(" ", 1, indent: false, force: true) - - def call(q) - q.target << DOC - end - end - - # We're going to keep an instance around so we don't have to allocate a new - # one every time we format a heredoc. - SEPARATOR = Separator.new + SEPARATOR = PrettierPrint::Breakable.new(" ", 1, indent: false, force: true) def format(q) q.group do @@ -4975,12 +4998,24 @@ def format(q) q.line_suffix(priority: Formatter::HEREDOC_PRIORITY) do q.group do - SEPARATOR.call(q) + q.target << SEPARATOR parts.each do |part| if part.is_a?(TStringContent) - texts = part.value.split(/\r?\n/, -1) - q.seplist(texts, SEPARATOR) { |text| q.text(text) } + value = part.value + first = true + + value.each_line(chomp: true) do |line| + if first + first = false + else + q.target << SEPARATOR + end + + q.text(line) + end + + q.target << SEPARATOR if value.end_with?("\n") else q.format(part) end @@ -7295,7 +7330,7 @@ def format(q) q.group do q.indent do q.breakable_empty - q.seplist(elements, Formatter::BREAKABLE_SPACE_SEPARATOR) do |element| + q.seplist(elements, ArrayLiteral::BREAKABLE_SPACE_SEPARATOR) do |element| q.format(element) end end @@ -7388,7 +7423,7 @@ def format(q) q.group do q.indent do q.breakable_empty - q.seplist(elements, Formatter::BREAKABLE_SPACE_SEPARATOR) do |element| + q.seplist(elements, ArrayLiteral::BREAKABLE_SPACE_SEPARATOR) do |element| q.format(element) end end @@ -8626,9 +8661,19 @@ def format(q) parts.each do |part| if part.is_a?(TStringContent) value = Quotes.normalize(part.value, closing_quote) - q.seplist(value.split(/\r?\n/, -1), Formatter::BREAKABLE_RETURN_SEPARATOR) do |text| - q.text(text) + first = true + + value.each_line(chomp: true) do |line| + if first + first = false + else + q.breakable_return + end + + q.text(line) end + + q.breakable_return if value.end_with?("\n") else q.format(part) end @@ -8845,7 +8890,7 @@ def format(q) q.group do q.indent do q.breakable_empty - q.seplist(elements, Formatter::BREAKABLE_SPACE_SEPARATOR) do |element| + q.seplist(elements, ArrayLiteral::BREAKABLE_SPACE_SEPARATOR) do |element| q.format(element) end end @@ -10184,7 +10229,7 @@ def format(q) q.group do q.indent do q.breakable_empty - q.seplist(elements, Formatter::BREAKABLE_SPACE_SEPARATOR) do |element| + q.seplist(elements, ArrayLiteral::BREAKABLE_SPACE_SEPARATOR) do |element| q.format(element) end end From e31ddedb55f68b66baef1ef494d434c395ba1930 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Mon, 17 Oct 2022 13:06:35 -0400 Subject: [PATCH 08/15] Update the prettier_print version --- Gemfile.lock | 8 ++++---- syntax_tree.gemspec | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Gemfile.lock b/Gemfile.lock index 6415fcb0..76bda432 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -2,7 +2,7 @@ PATH remote: . specs: syntax_tree (3.6.3) - prettier_print + prettier_print (>= 1.0.0) GEM remote: https://rubygems.org/ @@ -14,10 +14,10 @@ GEM parallel (1.22.1) parser (3.1.2.1) ast (~> 2.4.1) - prettier_print (0.1.0) + prettier_print (1.0.0) rainbow (3.1.1) rake (13.0.6) - regexp_parser (2.5.0) + regexp_parser (2.6.0) rexml (3.2.5) rubocop (1.36.0) json (~> 2.3) @@ -38,7 +38,7 @@ GEM simplecov_json_formatter (~> 0.1) simplecov-html (0.12.3) simplecov_json_formatter (0.1.4) - unicode-display_width (2.2.0) + unicode-display_width (2.3.0) PLATFORMS arm64-darwin-21 diff --git a/syntax_tree.gemspec b/syntax_tree.gemspec index 2b461dfd..ec7d57ef 100644 --- a/syntax_tree.gemspec +++ b/syntax_tree.gemspec @@ -25,7 +25,7 @@ Gem::Specification.new do |spec| spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) } spec.require_paths = %w[lib] - spec.add_dependency "prettier_print" + spec.add_dependency "prettier_print", ">= 1.0.0" spec.add_development_dependency "bundler" spec.add_development_dependency "minitest" From b117c9b1ae3bef29698598b4f7dcf51340739a6a Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Mon, 17 Oct 2022 13:15:21 -0400 Subject: [PATCH 09/15] Fix message checking accidentally introduced when removing pattern matching --- lib/syntax_tree/node.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/syntax_tree/node.rb b/lib/syntax_tree/node.rb index ce67a135..cbeca9ae 100644 --- a/lib/syntax_tree/node.rb +++ b/lib/syntax_tree/node.rb @@ -2510,7 +2510,7 @@ def format_chain(q, children) while (child = children.pop) if child.is_a?(Call) - if child.receiver.is_a?(Call) && child.receiver.message.value == "where" && child.message.value == "not" + if child.receiver.is_a?(Call) && (child.receiver.message != :call) && (child.receiver.message.value == "where") && (child.message.value == "not") # This is very specialized behavior wherein we group # .where.not calls together because it looks better. For more # information, see From f5ac5fef12c8736b60560de4d3b92f764b9eafa7 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Mon, 17 Oct 2022 13:37:37 -0400 Subject: [PATCH 10/15] Reformat --- lib/syntax_tree/node.rb | 88 ++++++++++++++++++++++----------------- lib/syntax_tree/parser.rb | 26 ++++++------ 2 files changed, 64 insertions(+), 50 deletions(-) diff --git a/lib/syntax_tree/node.rb b/lib/syntax_tree/node.rb index cbeca9ae..82d378c6 100644 --- a/lib/syntax_tree/node.rb +++ b/lib/syntax_tree/node.rb @@ -625,7 +625,8 @@ def trailing_comma? # If the last argument is a block, then we can't put a trailing comma # after it without resulting in a syntax error. false - elsif parts.length == 1 && (part = parts.first) && (part.is_a?(Command) || part.is_a?(CommandCall)) + elsif parts.length == 1 && (part = parts.first) && + (part.is_a?(Command) || part.is_a?(CommandCall)) # If the only argument is a command or command call, then a trailing # comma would be parsed as part of that expression instead of on this # one, so we don't want to add a trailing comma. @@ -1444,7 +1445,8 @@ def format_key(q, key) when DynaSymbol parts = key.parts - if parts.length == 1 && (part = parts.first) && part.is_a?(TStringContent) && part.value.match?(LABEL) + if parts.length == 1 && (part = parts.first) && + part.is_a?(TStringContent) && part.value.match?(LABEL) q.format(part) q.text(":") else @@ -2054,7 +2056,8 @@ def forced_brace_bounds?(q) when Paren, Statements # If we hit certain breakpoints then we know we're safe. return false - when If, IfMod, IfOp, Unless, UnlessMod, While, WhileMod, Until, UntilMod + when If, IfMod, IfOp, Unless, UnlessMod, While, WhileMod, Until, + UntilMod return true if parent.predicate == previous previous = parent end @@ -2395,11 +2398,7 @@ def format(q) when :"::" q.text(".") when Op - if operator.value == "::" - q.text(".") - else - operator.format(q) - end + operator.value == "::" ? q.text(".") : operator.format(q) else operator.format(q) end @@ -2465,7 +2464,8 @@ def format(q) # nodes. parent = parents[3] if parent.is_a?(DoBlock) - if parent.is_a?(MethodAddBlock) && parent.call.is_a?(FCall) && parent.call.value.value == "sig" + if parent.is_a?(MethodAddBlock) && parent.call.is_a?(FCall) && + parent.call.value.value == "sig" threshold = 2 end end @@ -2510,7 +2510,10 @@ def format_chain(q, children) while (child = children.pop) if child.is_a?(Call) - if child.receiver.is_a?(Call) && (child.receiver.message != :call) && (child.receiver.message.value == "where") && (child.message.value == "not") + if child.receiver.is_a?(Call) && + (child.receiver.message != :call) && + (child.receiver.message.value == "where") && + (child.message.value == "not") # This is very specialized behavior wherein we group # .where.not calls together because it looks better. For more # information, see @@ -2684,7 +2687,8 @@ def format(q) # If we're at the top of a call chain, then we're going to do some # specialized printing in case we can print it nicely. We _only_ do this # at the top of the chain to avoid weird recursion issues. - if CallChainFormatter.chained?(receiver) && !CallChainFormatter.chained?(q.parent) + if CallChainFormatter.chained?(receiver) && + !CallChainFormatter.chained?(q.parent) q.group do q .if_break { CallChainFormatter.new(self).format(q) } @@ -5344,7 +5348,8 @@ def call(q, node) # wanted it to be an explicit conditional because there are parentheses # around it. So we'll just leave it in place. grandparent = q.grandparent - if grandparent.is_a?(Paren) && (body = grandparent.contents.body) && body.length == 1 && body.first == node + if grandparent.is_a?(Paren) && (body = grandparent.contents.body) && + body.length == 1 && body.first == node return false end @@ -5375,10 +5380,10 @@ def call(q, node) # and default instead to breaking them into multiple lines. def ternaryable?(statement) case statement - when Alias, Assign, Break, Command, CommandCall, Heredoc, If, IfMod, IfOp, - Lambda, MAssign, Next, OpAssign, RescueMod, Return, Return0, Super, - Undef, Unless, UnlessMod, Until, UntilMod, VarAlias, VoidStmt, While, - WhileMod, Yield, Yield0, ZSuper + when Alias, Assign, Break, Command, CommandCall, Heredoc, If, IfMod, + IfOp, Lambda, MAssign, Next, OpAssign, RescueMod, Return, Return0, + Super, Undef, Unless, UnlessMod, Until, UntilMod, VarAlias, + VoidStmt, While, WhileMod, Yield, Yield0, ZSuper # This is a list of nodes that should not be allowed to be a part of a # ternary clause. false @@ -6470,7 +6475,8 @@ def format(q) # If we're at the top of a call chain, then we're going to do some # specialized printing in case we can print it nicely. We _only_ do this # at the top of the chain to avoid weird recursion issues. - if CallChainFormatter.chained?(call) && !CallChainFormatter.chained?(q.parent) + if CallChainFormatter.chained?(call) && + !CallChainFormatter.chained?(q.parent) q.group do q .if_break { CallChainFormatter.new(self).format(q) } @@ -7330,9 +7336,10 @@ def format(q) q.group do q.indent do q.breakable_empty - q.seplist(elements, ArrayLiteral::BREAKABLE_SPACE_SEPARATOR) do |element| - q.format(element) - end + q.seplist( + elements, + ArrayLiteral::BREAKABLE_SPACE_SEPARATOR + ) { |element| q.format(element) } end q.breakable_empty end @@ -7423,9 +7430,10 @@ def format(q) q.group do q.indent do q.breakable_empty - q.seplist(elements, ArrayLiteral::BREAKABLE_SPACE_SEPARATOR) do |element| - q.format(element) - end + q.seplist( + elements, + ArrayLiteral::BREAKABLE_SPACE_SEPARATOR + ) { |element| q.format(element) } end q.breakable_empty end @@ -8378,7 +8386,7 @@ def format(q) q.text("; ") q.format(statement) end - + line = statement.location.end_line previous = statement end @@ -8890,9 +8898,10 @@ def format(q) q.group do q.indent do q.breakable_empty - q.seplist(elements, ArrayLiteral::BREAKABLE_SPACE_SEPARATOR) do |element| - q.format(element) - end + q.seplist( + elements, + ArrayLiteral::BREAKABLE_SPACE_SEPARATOR + ) { |element| q.format(element) } end q.breakable_empty end @@ -9773,15 +9782,17 @@ def format(q) def pin(parent) replace = PinnedVarRef.new(value: value, location: location) - parent.deconstruct_keys([]).each do |key, value| - if value == self - parent.instance_variable_set(:"@#{key}", replace) - break - elsif value.is_a?(Array) && (index = value.index(self)) - parent.public_send(key)[index] = replace - break + parent + .deconstruct_keys([]) + .each do |key, value| + if value == self + parent.instance_variable_set(:"@#{key}", replace) + break + elsif value.is_a?(Array) && (index = value.index(self)) + parent.public_send(key)[index] = replace + break + end end - end end end @@ -10229,9 +10240,10 @@ def format(q) q.group do q.indent do q.breakable_empty - q.seplist(elements, ArrayLiteral::BREAKABLE_SPACE_SEPARATOR) do |element| - q.format(element) - end + q.seplist( + elements, + ArrayLiteral::BREAKABLE_SPACE_SEPARATOR + ) { |element| q.format(element) } end q.breakable_empty end diff --git a/lib/syntax_tree/parser.rb b/lib/syntax_tree/parser.rb index 15f8522b..70f1e2a3 100644 --- a/lib/syntax_tree/parser.rb +++ b/lib/syntax_tree/parser.rb @@ -1011,12 +1011,13 @@ def on_case(value, consequent) consume_operator(:"=>") end - node = RAssign.new( - value: value, - operator: operator, - pattern: consequent, - location: value.location.to(consequent.location) - ) + node = + RAssign.new( + value: value, + operator: operator, + pattern: consequent, + location: value.location.to(consequent.location) + ) PinVisitor.visit(node, tokens) node @@ -1973,12 +1974,13 @@ def on_in(pattern, statements, consequent) ending.location.start_column ) - node = In.new( - pattern: pattern, - statements: statements, - consequent: consequent, - location: beginning.location.to(ending.location) - ) + node = + In.new( + pattern: pattern, + statements: statements, + consequent: consequent, + location: beginning.location.to(ending.location) + ) PinVisitor.visit(node, tokens) node From 95b25842b1e5f2cb6aeb1fd53d1795186352ec97 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Mon, 17 Oct 2022 13:47:10 -0400 Subject: [PATCH 11/15] Fix incorrect logic in forced_brace_bounds? translation --- lib/syntax_tree/node.rb | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/syntax_tree/node.rb b/lib/syntax_tree/node.rb index 82d378c6..8b4e2e1d 100644 --- a/lib/syntax_tree/node.rb +++ b/lib/syntax_tree/node.rb @@ -2059,8 +2059,10 @@ def forced_brace_bounds?(q) when If, IfMod, IfOp, Unless, UnlessMod, While, WhileMod, Until, UntilMod return true if parent.predicate == previous - previous = parent end + + previous = parent + false end end From 89081dfb05266530c83b6b8b17be52f8017bfd24 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Mon, 17 Oct 2022 13:53:26 -0400 Subject: [PATCH 12/15] Fix up rubocop violations --- .rubocop.yml | 3 +++ lib/syntax_tree.rb | 2 +- lib/syntax_tree/formatter.rb | 2 +- lib/syntax_tree/node.rb | 16 +++++++++------- lib/syntax_tree/parser.rb | 2 +- 5 files changed, 15 insertions(+), 10 deletions(-) diff --git a/.rubocop.yml b/.rubocop.yml index f6ffbcd0..3323c741 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -55,6 +55,9 @@ Style/IdenticalConditionalBranches: Style/IfInsideElse: Enabled: false +Style/IfWithBooleanLiteralBranches: + Enabled: false + Style/KeywordParametersOrder: Enabled: false diff --git a/lib/syntax_tree.rb b/lib/syntax_tree.rb index da84273c..ed783e47 100644 --- a/lib/syntax_tree.rb +++ b/lib/syntax_tree.rb @@ -23,7 +23,7 @@ # We rely on Symbol#name being available, which is only available in Ruby 3.0+. # In case we're running on an older Ruby version, we polyfill it here. unless :+.respond_to?(:name) - class Symbol + class Symbol # rubocop:disable Style/Documentation def name to_s.freeze end diff --git a/lib/syntax_tree/formatter.rb b/lib/syntax_tree/formatter.rb index 39ed1583..f878490c 100644 --- a/lib/syntax_tree/formatter.rb +++ b/lib/syntax_tree/formatter.rb @@ -148,7 +148,7 @@ def group # A similar version to the super, except that it calls back into the # separator proc with the instance of `self`. - def seplist(list, sep = nil, iter_method = :each) # :yield: element + def seplist(list, sep = nil, iter_method = :each) first = true list.__send__(iter_method) do |*v| if first diff --git a/lib/syntax_tree/node.rb b/lib/syntax_tree/node.rb index 8b4e2e1d..d183a9f8 100644 --- a/lib/syntax_tree/node.rb +++ b/lib/syntax_tree/node.rb @@ -621,11 +621,11 @@ def trailing_comma? return false unless arguments.is_a?(Args) parts = arguments.parts - if parts.last&.is_a?(ArgBlock) + if parts.last.is_a?(ArgBlock) # If the last argument is a block, then we can't put a trailing comma # after it without resulting in a syntax error. false - elsif parts.length == 1 && (part = parts.first) && + elsif (parts.length == 1) && (part = parts.first) && (part.is_a?(Command) || part.is_a?(CommandCall)) # If the only argument is a command or command call, then a trailing # comma would be parsed as part of that expression instead of on this @@ -891,6 +891,7 @@ def format(q) # # provided the line length was hit between `bar` and `baz`. class VarRefsFormatter + # The separator for the fill algorithm. class Separator def call(q) q.text(",") @@ -2522,7 +2523,8 @@ def format_chain(q, children) # https://github.com/prettier/plugin-ruby/issues/862. else # If we're at a Call node and not a MethodAddBlock node in the - # chain then we're going to add a newline so it indents properly. + # chain then we're going to add a newline so it indents + # properly. q.breakable_empty end end @@ -2701,6 +2703,8 @@ def format(q) end end + # Print out the arguments to this call. If there are no arguments, then do + #nothing. def format_arguments(q) case arguments when ArgParen @@ -2708,8 +2712,6 @@ def format_arguments(q) when Args q.text(" ") q.format(arguments) - else - # Do nothing if there are no arguments. end end @@ -3180,6 +3182,8 @@ def format(q) end end + # Format the arguments for this command call here. If there are no + # arguments, then print nothing. if arguments parts = arguments.parts @@ -3190,8 +3194,6 @@ def format(q) q.text(" ") q.nest(argument_alignment(q, doc)) { q.format(arguments) } end - else - # If there are no arguments, print nothing. end end end diff --git a/lib/syntax_tree/parser.rb b/lib/syntax_tree/parser.rb index 70f1e2a3..61a7ca57 100644 --- a/lib/syntax_tree/parser.rb +++ b/lib/syntax_tree/parser.rb @@ -1389,7 +1389,7 @@ def on_dot3(left, right) # :call-seq: # on_dyna_symbol: (StringContent string_content) -> DynaSymbol def on_dyna_symbol(string_content) - if symbeg = find_token(SymBeg) + if (symbeg = find_token(SymBeg)) # A normal dynamic symbol tokens.delete(symbeg) tstring_end = consume_tstring_end(symbeg.location) From 8f15bbb771129cb43587579a3f10679e260fc808 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Mon, 17 Oct 2022 13:55:42 -0400 Subject: [PATCH 13/15] Check if this is working on truffleruby --- .github/workflows/main.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index d35471fa..afd7eb8e 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -12,6 +12,7 @@ jobs: - '3.0' - '3.1' - head + - truffleruby name: CI runs-on: ubuntu-latest env: From aa0573ddc919c18d641b71757571805c780a3d3a Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Mon, 17 Oct 2022 13:58:58 -0400 Subject: [PATCH 14/15] Fix incorrect translation of Ternaryable.call --- lib/syntax_tree/node.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/syntax_tree/node.rb b/lib/syntax_tree/node.rb index d183a9f8..dcdd0275 100644 --- a/lib/syntax_tree/node.rb +++ b/lib/syntax_tree/node.rb @@ -5396,7 +5396,7 @@ def ternaryable?(statement) # operators, then we can't use a ternary expression as it would break # the flow control. operator = statement.operator - operator != "and" && operator != "or" + operator != :and && operator != :or else true end From 7b2bc9b6d46970b58bf7a3457d23d2ee87d28e6a Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Mon, 17 Oct 2022 14:01:12 -0400 Subject: [PATCH 15/15] Remove truffleruby from tests until we finish removing pattern matching --- .github/workflows/main.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index afd7eb8e..d35471fa 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -12,7 +12,6 @@ jobs: - '3.0' - '3.1' - head - - truffleruby name: CI runs-on: ubuntu-latest env: