Skip to content

Commit 5a666e4

Browse files
authored
Merge pull request #306 from ruby-syntax-tree/parser-location
Parser location
2 parents e0be579 + 52f4403 commit 5a666e4

File tree

8 files changed

+923
-1056
lines changed

8 files changed

+923
-1056
lines changed

lib/syntax_tree/formatter.rb

+1-1
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ def format(node, stackable: true)
138138
# going to just print out the node as it was seen in the source.
139139
doc =
140140
if last_leading&.ignore?
141-
range = source[node.location.start_char...node.location.end_char]
141+
range = source[node.start_char...node.end_char]
142142
first = true
143143

144144
range.each_line(chomp: true) do |line|

lib/syntax_tree/node.rb

+8
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,14 @@ def format(q)
126126
raise NotImplementedError
127127
end
128128

129+
def start_char
130+
location.start_char
131+
end
132+
133+
def end_char
134+
location.end_char
135+
end
136+
129137
def pretty_print(q)
130138
accept(Visitor::PrettyPrintVisitor.new(q))
131139
end

lib/syntax_tree/parser.rb

+149-62
Original file line numberDiff line numberDiff line change
@@ -256,11 +256,37 @@ def find_token(type)
256256
tokens[index] if index
257257
end
258258

259+
def find_token_between(type, left, right)
260+
bounds = left.location.end_char...right.location.start_char
261+
index =
262+
tokens.rindex do |token|
263+
char = token.location.start_char
264+
break if char < bounds.begin
265+
266+
token.is_a?(type) && bounds.cover?(char)
267+
end
268+
269+
tokens[index] if index
270+
end
271+
259272
def find_keyword(name)
260273
index = tokens.rindex { |token| token.is_a?(Kw) && (token.name == name) }
261274
tokens[index] if index
262275
end
263276

277+
def find_keyword_between(name, left, right)
278+
bounds = left.end_char...right.start_char
279+
index =
280+
tokens.rindex do |token|
281+
char = token.location.start_char
282+
break if char < bounds.begin
283+
284+
token.is_a?(Kw) && (token.name == name) && bounds.cover?(char)
285+
end
286+
287+
tokens[index] if index
288+
end
289+
264290
def find_operator(name)
265291
index = tokens.rindex { |token| token.is_a?(Op) && (token.name == name) }
266292
tokens[index] if index
@@ -645,7 +671,7 @@ def visit_var_ref(node)
645671
end
646672

647673
def self.visit(node, tokens)
648-
start_char = node.location.start_char
674+
start_char = node.start_char
649675
allocated = []
650676

651677
tokens.reverse_each do |token|
@@ -874,13 +900,34 @@ def on_binary(left, operator, right)
874900
# on_block_var: (Params params, (nil | Array[Ident]) locals) -> BlockVar
875901
def on_block_var(params, locals)
876902
index =
877-
tokens.rindex do |node|
878-
node.is_a?(Op) && %w[| ||].include?(node.value) &&
879-
node.location.start_char < params.location.start_char
880-
end
903+
tokens.rindex { |node| node.is_a?(Op) && %w[| ||].include?(node.value) }
904+
905+
ending = tokens.delete_at(index)
906+
beginning = ending.value == "||" ? ending : consume_operator(:|)
907+
908+
# If there are no parameters, then we didn't have anything to base the
909+
# location information of off. Now that we have an opening of the
910+
# block, we can correct this.
911+
if params.empty?
912+
start_line = params.location.start_line
913+
start_char =
914+
(
915+
if beginning.value == "||"
916+
beginning.location.start_char
917+
else
918+
find_next_statement_start(beginning.location.end_char)
919+
end
920+
)
881921

882-
beginning = tokens[index]
883-
ending = tokens[-1]
922+
location =
923+
Location.fixed(
924+
line: start_line,
925+
char: start_char,
926+
column: start_char - line_counts[start_line - 1].start
927+
)
928+
929+
params = params.copy(location: location)
930+
end
884931

885932
BlockVar.new(
886933
params: params,
@@ -1760,21 +1807,19 @@ def on_for(index, collection, statements)
17601807
in_keyword = consume_keyword(:in)
17611808
ending = consume_keyword(:end)
17621809

1763-
# Consume the do keyword if it exists so that it doesn't get confused for
1764-
# some other block
1765-
keyword = find_keyword(:do)
1766-
if keyword &&
1767-
keyword.location.start_char > collection.location.end_char &&
1768-
keyword.location.end_char < ending.location.start_char
1769-
tokens.delete(keyword)
1770-
end
1810+
delimiter =
1811+
find_keyword_between(:do, collection, ending) ||
1812+
find_token_between(Semicolon, collection, ending)
1813+
1814+
tokens.delete(delimiter) if delimiter
17711815

17721816
start_char =
1773-
find_next_statement_start((keyword || collection).location.end_char)
1817+
find_next_statement_start((delimiter || collection).location.end_char)
1818+
17741819
statements.bind(
17751820
start_char,
17761821
start_char -
1777-
line_counts[(keyword || collection).location.end_line - 1].start,
1822+
line_counts[(delimiter || collection).location.end_line - 1].start,
17781823
ending.location.start_char,
17791824
ending.location.start_column
17801825
)
@@ -1984,7 +2029,12 @@ def on_if(predicate, statements, consequent)
19842029
beginning = consume_keyword(:if)
19852030
ending = consequent || consume_keyword(:end)
19862031

1987-
start_char = find_next_statement_start(predicate.location.end_char)
2032+
if (keyword = find_keyword_between(:then, predicate, ending))
2033+
tokens.delete(keyword)
2034+
end
2035+
2036+
start_char =
2037+
find_next_statement_start((keyword || predicate).location.end_char)
19882038
statements.bind(
19892039
start_char,
19902040
start_char - line_counts[predicate.location.end_line - 1].start,
@@ -2068,7 +2118,8 @@ def on_in(pattern, statements, consequent)
20682118
statements_start = token
20692119
end
20702120

2071-
start_char = find_next_statement_start(statements_start.location.end_char)
2121+
start_char =
2122+
find_next_statement_start((token || statements_start).location.end_char)
20722123
statements.bind(
20732124
start_char,
20742125
start_char -
@@ -2194,12 +2245,19 @@ def on_lambda(params, statements)
21942245
token.location.start_char > beginning.location.start_char
21952246
end
21962247

2248+
if braces
2249+
opening = consume_token(TLamBeg)
2250+
closing = consume_token(RBrace)
2251+
else
2252+
opening = consume_keyword(:do)
2253+
closing = consume_keyword(:end)
2254+
end
2255+
21972256
# We need to do some special mapping here. Since ripper doesn't support
2198-
# capturing lambda var until 3.2, we need to normalize all of that here.
2257+
# capturing lambda vars, we need to normalize all of that here.
21992258
params =
2200-
case params
2201-
when Paren
2202-
# In this case we've gotten to the <3.2 parentheses wrapping a set of
2259+
if params.is_a?(Paren)
2260+
# In this case we've gotten to the parentheses wrapping a set of
22032261
# parameters case. Here we need to manually scan for lambda locals.
22042262
range = (params.location.start_char + 1)...params.location.end_char
22052263
locals = lambda_locals(source[range])
@@ -2221,25 +2279,28 @@ def on_lambda(params, statements)
22212279

22222280
node.comments.concat(params.comments)
22232281
node
2224-
when Params
2225-
# In this case we've gotten to the <3.2 plain set of parameters. In
2226-
# this case there cannot be lambda locals, so we will wrap the
2227-
# parameters into a lambda var that has no locals.
2282+
else
2283+
# If there are no parameters, then we didn't have anything to base the
2284+
# location information of off. Now that we have an opening of the
2285+
# block, we can correct this.
2286+
if params.empty?
2287+
opening_location = opening.location
2288+
location =
2289+
Location.fixed(
2290+
line: opening_location.start_line,
2291+
char: opening_location.start_char,
2292+
column: opening_location.start_column
2293+
)
2294+
2295+
params = params.copy(location: location)
2296+
end
2297+
2298+
# In this case we've gotten to the plain set of parameters. In this
2299+
# case there cannot be lambda locals, so we will wrap the parameters
2300+
# into a lambda var that has no locals.
22282301
LambdaVar.new(params: params, locals: [], location: params.location)
2229-
when LambdaVar
2230-
# In this case we've gotten to 3.2+ lambda var. In this case we don't
2231-
# need to do anything and can just the value as given.
2232-
params
22332302
end
22342303

2235-
if braces
2236-
opening = consume_token(TLamBeg)
2237-
closing = consume_token(RBrace)
2238-
else
2239-
opening = consume_keyword(:do)
2240-
closing = consume_keyword(:end)
2241-
end
2242-
22432304
start_char = find_next_statement_start(opening.location.end_char)
22442305
statements.bind(
22452306
start_char,
@@ -3134,7 +3195,7 @@ def on_rescue(exceptions, variable, statements, consequent)
31343195
exceptions = exceptions[0] if exceptions.is_a?(Array)
31353196

31363197
last_node = variable || exceptions || keyword
3137-
start_char = find_next_statement_start(last_node.location.end_char)
3198+
start_char = find_next_statement_start(last_node.end_char)
31383199
statements.bind(
31393200
start_char,
31403201
start_char - line_counts[last_node.location.start_line - 1].start,
@@ -3156,7 +3217,7 @@ def on_rescue(exceptions, variable, statements, consequent)
31563217
start_char: keyword.location.end_char + 1,
31573218
start_column: keyword.location.end_column + 1,
31583219
end_line: last_node.location.end_line,
3159-
end_char: last_node.location.end_char,
3220+
end_char: last_node.end_char,
31603221
end_column: last_node.location.end_column
31613222
)
31623223
)
@@ -3267,9 +3328,29 @@ def on_sclass(target, bodystmt)
32673328
)
32683329
end
32693330

3270-
# def on_semicolon(value)
3271-
# value
3272-
# end
3331+
# Semicolons are tokens that get added to the token list but never get
3332+
# attached to the AST. Because of this they only need to track their
3333+
# associated location so they can be used for computing bounds.
3334+
class Semicolon
3335+
attr_reader :location
3336+
3337+
def initialize(location)
3338+
@location = location
3339+
end
3340+
end
3341+
3342+
# :call-seq:
3343+
# on_semicolon: (String value) -> Semicolon
3344+
def on_semicolon(value)
3345+
tokens << Semicolon.new(
3346+
Location.token(
3347+
line: lineno,
3348+
char: char_pos,
3349+
column: current_column,
3350+
size: value.size
3351+
)
3352+
)
3353+
end
32733354

32743355
# def on_sp(value)
32753356
# value
@@ -3706,7 +3787,12 @@ def on_unless(predicate, statements, consequent)
37063787
beginning = consume_keyword(:unless)
37073788
ending = consequent || consume_keyword(:end)
37083789

3709-
start_char = find_next_statement_start(predicate.location.end_char)
3790+
if (keyword = find_keyword_between(:then, predicate, ending))
3791+
tokens.delete(keyword)
3792+
end
3793+
3794+
start_char =
3795+
find_next_statement_start((keyword || predicate).location.end_char)
37103796
statements.bind(
37113797
start_char,
37123798
start_char - line_counts[predicate.location.end_line - 1].start,
@@ -3742,16 +3828,16 @@ def on_until(predicate, statements)
37423828
beginning = consume_keyword(:until)
37433829
ending = consume_keyword(:end)
37443830

3745-
# Consume the do keyword if it exists so that it doesn't get confused for
3746-
# some other block
3747-
keyword = find_keyword(:do)
3748-
if keyword && keyword.location.start_char > predicate.location.end_char &&
3749-
keyword.location.end_char < ending.location.start_char
3750-
tokens.delete(keyword)
3751-
end
3831+
delimiter =
3832+
find_keyword_between(:do, predicate, statements) ||
3833+
find_token_between(Semicolon, predicate, statements)
3834+
3835+
tokens.delete(delimiter) if delimiter
37523836

37533837
# Update the Statements location information
3754-
start_char = find_next_statement_start(predicate.location.end_char)
3838+
start_char =
3839+
find_next_statement_start((delimiter || predicate).location.end_char)
3840+
37553841
statements.bind(
37563842
start_char,
37573843
start_char - line_counts[predicate.location.end_line - 1].start,
@@ -3845,7 +3931,8 @@ def on_when(arguments, statements, consequent)
38453931
statements_start = token
38463932
end
38473933

3848-
start_char = find_next_statement_start(statements_start.location.end_char)
3934+
start_char =
3935+
find_next_statement_start((token || statements_start).location.end_char)
38493936

38503937
statements.bind(
38513938
start_char,
@@ -3869,16 +3956,16 @@ def on_while(predicate, statements)
38693956
beginning = consume_keyword(:while)
38703957
ending = consume_keyword(:end)
38713958

3872-
# Consume the do keyword if it exists so that it doesn't get confused for
3873-
# some other block
3874-
keyword = find_keyword(:do)
3875-
if keyword && keyword.location.start_char > predicate.location.end_char &&
3876-
keyword.location.end_char < ending.location.start_char
3877-
tokens.delete(keyword)
3878-
end
3959+
delimiter =
3960+
find_keyword_between(:do, predicate, statements) ||
3961+
find_token_between(Semicolon, predicate, statements)
3962+
3963+
tokens.delete(delimiter) if delimiter
38793964

38803965
# Update the Statements location information
3881-
start_char = find_next_statement_start(predicate.location.end_char)
3966+
start_char =
3967+
find_next_statement_start((delimiter || predicate).location.end_char)
3968+
38823969
statements.bind(
38833970
start_char,
38843971
start_char - line_counts[predicate.location.end_line - 1].start,

lib/syntax_tree/translation.rb

+11
Original file line numberDiff line numberDiff line change
@@ -13,5 +13,16 @@ def self.to_parser(node, buffer)
1313

1414
node.accept(Parser.new(buffer))
1515
end
16+
17+
# This method translates the given node into the representation defined by
18+
# the rubocop/rubocop-ast gem. We don't explicitly list it as a dependency
19+
# because it's not required for the core functionality of Syntax Tree.
20+
def self.to_rubocop_ast(node, buffer)
21+
require "rubocop/ast"
22+
require_relative "translation/parser"
23+
require_relative "translation/rubocop_ast"
24+
25+
node.accept(RuboCopAST.new(buffer))
26+
end
1627
end
1728
end

0 commit comments

Comments
 (0)