Skip to content

Commit c1ddb79

Browse files
committed
Add a rubocop ast translator
1 parent da19f6a commit c1ddb79

File tree

4 files changed

+172
-110
lines changed

4 files changed

+172
-110
lines changed

lib/syntax_tree/parser.rb

Lines changed: 18 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -275,7 +275,7 @@ def find_keyword(name)
275275
end
276276

277277
def find_keyword_between(name, left, right)
278-
bounds = left.location.end_char...right.location.start_char
278+
bounds = left.end_char...right.start_char
279279
index =
280280
tokens.rindex do |token|
281281
char = token.location.start_char
@@ -1807,19 +1807,19 @@ def on_for(index, collection, statements)
18071807
in_keyword = consume_keyword(:in)
18081808
ending = consume_keyword(:end)
18091809

1810-
# Consume the do keyword if it exists so that it doesn't get confused for
1811-
# some other block
1812-
if (keyword = find_keyword_between(:do, collection, ending))
1813-
tokens.delete(keyword)
1814-
end
1810+
delimiter =
1811+
find_keyword_between(:do, collection, ending) ||
1812+
find_token_between(Semicolon, collection, ending)
1813+
1814+
tokens.delete(delimiter) if delimiter
18151815

18161816
start_char =
1817-
find_next_statement_start((keyword || collection).location.end_char)
1817+
find_next_statement_start((delimiter || collection).location.end_char)
18181818

18191819
statements.bind(
18201820
start_char,
18211821
start_char -
1822-
line_counts[(keyword || collection).location.end_line - 1].start,
1822+
line_counts[(delimiter || collection).location.end_line - 1].start,
18231823
ending.location.start_char,
18241824
ending.location.start_column
18251825
)
@@ -3328,10 +3328,13 @@ def on_sclass(target, bodystmt)
33283328
)
33293329
end
33303330

3331+
# Semicolons are tokens that get added to the token list but never get
3332+
# attached to the AST. Because of this they only need to track their
3333+
# associated location so they can be used for computing bounds.
33313334
class Semicolon
33323335
attr_reader :location
33333336

3334-
def initialize(location:)
3337+
def initialize(location)
33353338
@location = location
33363339
end
33373340
end
@@ -3340,13 +3343,12 @@ def initialize(location:)
33403343
# on_semicolon: (String value) -> Semicolon
33413344
def on_semicolon(value)
33423345
tokens << Semicolon.new(
3343-
location:
3344-
Location.token(
3345-
line: lineno,
3346-
char: char_pos,
3347-
column: current_column,
3348-
size: value.size
3349-
)
3346+
Location.token(
3347+
line: lineno,
3348+
char: char_pos,
3349+
column: current_column,
3350+
size: value.size
3351+
)
33503352
)
33513353
end
33523354

lib/syntax_tree/translation.rb

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,5 +13,16 @@ def self.to_parser(node, buffer)
1313

1414
node.accept(Parser.new(buffer))
1515
end
16+
17+
# This method translates the given node into the representation defined by
18+
# the rubocop/rubocop-ast gem. We don't explicitly list it as a dependency
19+
# because it's not required for the core functionality of Syntax Tree.
20+
def self.to_rubocop_ast(node, buffer)
21+
require "rubocop/ast"
22+
require_relative "translation/parser"
23+
require_relative "translation/rubocop_ast"
24+
25+
node.accept(RuboCopAST.new(buffer))
26+
end
1627
end
1728
end

lib/syntax_tree/translation/parser.rb

Lines changed: 122 additions & 94 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,76 @@ module Translation
55
# This visitor is responsible for converting the syntax tree produced by
66
# Syntax Tree into the syntax tree produced by the whitequark/parser gem.
77
class Parser < BasicVisitor
8+
# Heredocs are represented _very_ differently in the parser gem from how
9+
# they are represented in the Syntax Tree AST. This class is responsible
10+
# for handling the translation.
11+
class HeredocBuilder
12+
Line = Struct.new(:value, :segments)
13+
14+
attr_reader :node, :segments
15+
16+
def initialize(node)
17+
@node = node
18+
@segments = []
19+
end
20+
21+
def <<(segment)
22+
if segment.type == :str && segments.last &&
23+
segments.last.type == :str &&
24+
!segments.last.children.first.end_with?("\n")
25+
segments.last.children.first << segment.children.first
26+
else
27+
segments << segment
28+
end
29+
end
30+
31+
def trim!
32+
return unless node.beginning.value[2] == "~"
33+
lines = [Line.new(+"", [])]
34+
35+
segments.each do |segment|
36+
lines.last.segments << segment
37+
38+
if segment.type == :str
39+
lines.last.value << segment.children.first
40+
41+
if lines.last.value.end_with?("\n")
42+
lines << Line.new(+"", [])
43+
end
44+
end
45+
end
46+
47+
lines.pop if lines.last.value.empty?
48+
return if lines.empty?
49+
50+
segments.clear
51+
lines.each do |line|
52+
remaining = node.dedent
53+
54+
line.segments.each do |segment|
55+
if segment.type == :str
56+
if remaining > 0
57+
whitespace = segment.children.first[/^\s{0,#{remaining}}/]
58+
segment.children.first.sub!(/^#{whitespace}/, "")
59+
remaining -= whitespace.length
60+
end
61+
62+
if node.beginning.value[3] != "'" && segments.any? &&
63+
segments.last.type == :str &&
64+
segments.last.children.first.end_with?("\\\n")
65+
segments.last.children.first.gsub!(/\\\n\z/, "")
66+
segments.last.children.first.concat(segment.children.first)
67+
elsif !segment.children.first.empty?
68+
segments << segment
69+
end
70+
else
71+
segments << segment
72+
end
73+
end
74+
end
75+
end
76+
end
77+
878
attr_reader :buffer, :stack
979

1080
def initialize(buffer)
@@ -665,6 +735,25 @@ def visit_command_call(node)
665735
node.end_char
666736
end
667737

738+
expression =
739+
if node.arguments.is_a?(ArgParen)
740+
srange(node.start_char, node.arguments.end_char)
741+
elsif node.arguments.is_a?(Args) && node.arguments.parts.any?
742+
last_part = node.arguments.parts.last
743+
end_char =
744+
if last_part.is_a?(Heredoc)
745+
last_part.beginning.end_char
746+
else
747+
last_part.end_char
748+
end
749+
750+
srange(node.start_char, end_char)
751+
elsif node.block
752+
srange_node(node.message)
753+
else
754+
srange_node(node)
755+
end
756+
668757
call =
669758
s(
670759
if node.operator.is_a?(Op) && node.operator.value == "&."
@@ -690,14 +779,7 @@ def visit_command_call(node)
690779
node.message == :call ? nil : srange_node(node.message),
691780
begin_token,
692781
end_token,
693-
if node.arguments.is_a?(ArgParen) ||
694-
(node.arguments.is_a?(Args) && node.arguments.parts.any?)
695-
srange(node.start_char, node.arguments.end_char)
696-
elsif node.block
697-
srange_node(node.message)
698-
else
699-
srange_node(node)
700-
end
782+
expression
701783
)
702784
)
703785

@@ -1049,7 +1131,8 @@ def visit_for(node)
10491131
smap_for(
10501132
srange_length(node.start_char, 3),
10511133
srange_find_between(node.index, node.collection, "in"),
1052-
srange_search_between(node.collection, node.statements, "do"),
1134+
srange_search_between(node.collection, node.statements, "do") ||
1135+
srange_search_between(node.collection, node.statements, ";"),
10531136
srange_length(node.end_char, -3),
10541137
srange_node(node)
10551138
)
@@ -1078,98 +1161,43 @@ def visit_hash(node)
10781161
)
10791162
end
10801163

1081-
# Heredocs are represented _very_ differently in the parser gem from how
1082-
# they are represented in the Syntax Tree AST. This class is responsible
1083-
# for handling the translation.
1084-
class HeredocSegments
1085-
HeredocLine = Struct.new(:value, :segments)
1086-
1087-
attr_reader :node, :segments
1088-
1089-
def initialize(node)
1090-
@node = node
1091-
@segments = []
1092-
end
1093-
1094-
def <<(segment)
1095-
if segment.type == :str && segments.last &&
1096-
segments.last.type == :str &&
1097-
!segments.last.children.first.end_with?("\n")
1098-
segments.last.children.first << segment.children.first
1099-
else
1100-
segments << segment
1101-
end
1102-
end
1103-
1104-
def trim!
1105-
return unless node.beginning.value[2] == "~"
1106-
lines = [HeredocLine.new(+"", [])]
1107-
1108-
segments.each do |segment|
1109-
lines.last.segments << segment
1110-
1111-
if segment.type == :str
1112-
lines.last.value << segment.children.first
1113-
1114-
if lines.last.value.end_with?("\n")
1115-
lines << HeredocLine.new(+"", [])
1116-
end
1117-
end
1118-
end
1119-
1120-
lines.pop if lines.last.value.empty?
1121-
return if lines.empty?
1122-
1123-
segments.clear
1124-
lines.each do |line|
1125-
remaining = node.dedent
1126-
1127-
line.segments.each do |segment|
1128-
if segment.type == :str
1129-
if remaining > 0
1130-
whitespace = segment.children.first[/^\s{0,#{remaining}}/]
1131-
segment.children.first.sub!(/^#{whitespace}/, "")
1132-
remaining -= whitespace.length
1133-
end
1134-
1135-
if node.beginning.value[3] != "'" && segments.any? &&
1136-
segments.last.type == :str &&
1137-
segments.last.children.first.end_with?("\\\n")
1138-
segments.last.children.first.gsub!(/\\\n\z/, "")
1139-
segments.last.children.first.concat(segment.children.first)
1140-
elsif !segment.children.first.empty?
1141-
segments << segment
1142-
end
1143-
else
1144-
segments << segment
1145-
end
1146-
end
1147-
end
1148-
end
1149-
end
1150-
11511164
# Visit a Heredoc node.
11521165
def visit_heredoc(node)
1153-
heredoc_segments = HeredocSegments.new(node)
1166+
heredoc = HeredocBuilder.new(node)
11541167

1168+
# For each part of the heredoc, if it's a string content node, split it
1169+
# into multiple string content nodes, one for each line. Otherwise,
1170+
# visit the node as normal.
11551171
node.parts.each do |part|
11561172
if part.is_a?(TStringContent) && part.value.count("\n") > 1
1157-
part
1158-
.value
1159-
.split("\n")
1160-
.each { |line| heredoc_segments << s(:str, ["#{line}\n"], nil) }
1173+
index = part.start_char
1174+
lines = part.value.split("\n")
1175+
1176+
lines.each do |line|
1177+
length = line.length + 1
1178+
location = smap_collection_bare(srange_length(index, length))
1179+
1180+
heredoc << s(:str, ["#{line}\n"], location)
1181+
index += length
1182+
end
11611183
else
1162-
heredoc_segments << visit(part)
1184+
heredoc << visit(part)
11631185
end
11641186
end
11651187

1166-
heredoc_segments.trim!
1188+
# Now that we have all of the pieces on the heredoc, we can trim it if
1189+
# it is a heredoc that supports trimming (i.e., it has a ~ on the
1190+
# declaration).
1191+
heredoc.trim!
1192+
1193+
# Generate the location for the heredoc, which goes from the declaration
1194+
# to the ending delimiter.
11671195
location =
11681196
smap_heredoc(
11691197
srange_node(node.beginning),
11701198
srange(
11711199
if node.parts.empty?
1172-
node.beginning.end_char
1200+
node.beginning.end_char + 1
11731201
else
11741202
node.parts.first.start_char
11751203
end,
@@ -1178,15 +1206,15 @@ def visit_heredoc(node)
11781206
srange(node.ending.start_char, node.ending.end_char - 1)
11791207
)
11801208

1209+
# Finally, decide which kind of heredoc node to generate based on its
1210+
# declaration and contents.
11811211
if node.beginning.value.match?(/`\w+`\z/)
1182-
s(:xstr, heredoc_segments.segments, location)
1183-
elsif heredoc_segments.segments.length > 1
1184-
s(:dstr, heredoc_segments.segments, location)
1185-
elsif heredoc_segments.segments.empty?
1186-
s(:dstr, [], location)
1187-
else
1188-
segment = heredoc_segments.segments.first
1212+
s(:xstr, heredoc.segments, location)
1213+
elsif heredoc.segments.length == 1
1214+
segment = heredoc.segments.first
11891215
s(segment.type, segment.children, location)
1216+
else
1217+
s(:dstr, heredoc.segments, location)
11901218
end
11911219
end
11921220

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
# frozen_string_literal: true
2+
3+
module SyntaxTree
4+
module Translation
5+
# This visitor is responsible for converting the syntax tree produced by
6+
# Syntax Tree into the syntax tree produced by the rubocop/rubocop-ast gem.
7+
class RuboCopAST < Parser
8+
private
9+
10+
# This method is effectively the same thing as the parser gem except that
11+
# it uses the rubocop-ast specializations of the nodes.
12+
def s(type, children, location)
13+
::RuboCop::AST::Builder::NODE_MAP.fetch(type, ::RuboCop::AST::Node).new(
14+
type,
15+
children,
16+
location: location
17+
)
18+
end
19+
end
20+
end
21+
end

0 commit comments

Comments
 (0)