@@ -27,21 +27,56 @@ class CodeLine
2727 # Returns an array of CodeLine objects
2828 # from the source string
2929 def self . from_source ( source )
30- tokens_for_line = LexAll . new ( source : source ) . each_with_object ( Hash . new { |h , k | h [ k ] = [ ] } ) { |token , hash | hash [ token . line ] << token }
30+ source = +source
31+ parse_result = Prism . parse_lex ( source )
32+ ast , tokens = parse_result . value
33+
34+ clean_comments! ( source , parse_result . comments )
35+
36+ visitor = Visitor . new
37+ visitor . visit ( ast )
38+ tokens . sort_by! { |token , _state | token . location . start_line }
39+
40+ prev_token = nil
41+ tokens . map! do |token , _state |
42+ prev_token = Token . new ( token , prev_token , visitor )
43+ end
44+
45+ tokens_for_line = tokens . each_with_object ( Hash . new { |h , k | h [ k ] = [ ] } ) { |token , hash | hash [ token . line ] << token }
3146 source . lines . map . with_index do |line , index |
3247 CodeLine . new (
3348 line : line ,
3449 index : index ,
35- tokens : tokens_for_line [ index + 1 ]
50+ tokens : tokens_for_line [ index + 1 ] ,
51+ consecutive : visitor . consecutive_lines . include? ( index + 1 )
3652 )
3753 end
3854 end
3955
56+ # Remove comments that apear on their own in source. They will never be the cause
57+ # of syntax errors and are just visual noise. Example:
58+ #
59+ # source = +<<~RUBY
60+ # # Comment-only line
61+ # foo # Inline comment
62+ # RUBY
63+ # CodeLine.clean_comments!(source, Prism.parse(source).comments)
64+ # source # => "\nfoo # Inline comment\n"
65+ def self . clean_comments! ( source , comments )
66+ # Iterate backwards since we are modifying the source in place and must preserve
67+ # the offsets. Prism comments are sorted by their location in the source.
68+ comments . reverse_each do |comment |
69+ next if comment . trailing?
70+ source . bytesplice ( comment . location . start_offset , comment . location . length , "" )
71+ end
72+ end
73+
4074 attr_reader :line , :index , :tokens , :line_number , :indent
41- def initialize ( line :, index :, tokens :)
75+ def initialize ( line :, index :, tokens :, consecutive : )
4276 @tokens = tokens
4377 @line = line
4478 @index = index
79+ @consecutive = consecutive
4580 @original = line
4681 @line_number = @index + 1
4782 strip_line = line . dup
@@ -150,91 +185,36 @@ def <=>(other)
150185 index <=> other . index
151186 end
152187
153- # [Not stable API]
154- #
155- # Lines that have a `on_ignored_nl` type token and NOT
156- # a `BEG` type seem to be a good proxy for the ability
157- # to join multiple lines into one.
158- #
159- # This predicate method is used to determine when those
160- # two criteria have been met.
161- #
162- # The one known case this doesn't handle is:
163- #
164- # Ripper.lex <<~EOM
165- # a &&
166- # b ||
167- # c
168- # EOM
169- #
170- # For some reason this introduces `on_ignore_newline` but with BEG type
171- def ignore_newline_not_beg?
172- @ignore_newline_not_beg
188+ # Can this line be logically joined together
189+ # with the following line? Determined by walking
190+ # the AST
191+ def consecutive?
192+ @consecutive
173193 end
174194
175- # Determines if the given line has a trailing slash
195+ # Determines if the given line has a trailing slash.
196+ # Simply check if the line contains a backslash after
197+ # the content of the last token.
176198 #
177199 # lines = CodeLine.from_source(<<~EOM)
178200 # it "foo" \
179201 # EOM
180202 # expect(lines.first.trailing_slash?).to eq(true)
181203 #
182204 def trailing_slash?
183- last = @tokens . last
184-
185- # Older versions of prism diverged slightly from Ripper in compatibility mode
186- case last &.type
187- when :on_sp
188- last . value == TRAILING_SLASH
189- when :on_tstring_end
190- true
191- else
192- false
193- end
205+ return unless ( last = @tokens . last )
206+ @line . byteindex ( TRAILING_SLASH , last . location . end_column ) != nil
194207 end
195208
196- # Endless method detection
197- #
198- # From https://github.com/ruby/irb/commit/826ae909c9c93a2ddca6f9cfcd9c94dbf53d44ab
199- # Detecting a "oneliner" seems to need a state machine.
200- # This can be done by looking mostly at the "state" (last value):
201- #
202- # ENDFN -> BEG (token = '=' ) -> END
203- #
204209 private def set_kw_end
205- oneliner_count = 0
206- in_oneliner_def = nil
207-
208210 kw_count = 0
209211 end_count = 0
210212
211- @ignore_newline_not_beg = false
212213 @tokens . each do |token |
213214 kw_count += 1 if token . is_kw?
214215 end_count += 1 if token . is_end?
215-
216- if token . type == :on_ignored_nl
217- @ignore_newline_not_beg = !token . expr_beg?
218- end
219-
220- if in_oneliner_def . nil?
221- in_oneliner_def = :ENDFN if token . state . allbits? ( Ripper ::EXPR_ENDFN )
222- elsif token . state . allbits? ( Ripper ::EXPR_ENDFN )
223- # Continue
224- elsif token . state . allbits? ( Ripper ::EXPR_BEG )
225- in_oneliner_def = :BODY if token . value == "="
226- elsif token . state . allbits? ( Ripper ::EXPR_END )
227- # We found an endless method, count it
228- oneliner_count += 1 if in_oneliner_def == :BODY
229-
230- in_oneliner_def = nil
231- else
232- in_oneliner_def = nil
233- end
234216 end
235217
236- kw_count -= oneliner_count
237-
238218 @is_kw = ( kw_count - end_count ) > 0
239219 @is_end = ( end_count - kw_count ) > 0
240220 end
0 commit comments