ruby
diff --git a/‎CHANGELOG.md‎
Lines changed: 2 additions & 1 deletion b/‎CHANGELOG.md‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎lib/syntax_suggest/api.rb‎
Lines changed: 2 additions & 4 deletions b/‎lib/syntax_suggest/api.rb‎
Lines changed: 2 additions & 4 deletions
diff --git a/‎lib/syntax_suggest/clean_document.rb‎
Lines changed: 8 additions & 91 deletions b/‎lib/syntax_suggest/clean_document.rb‎
Lines changed: 8 additions & 91 deletions
diff --git a/‎lib/syntax_suggest/code_line.rb‎
Lines changed: 48 additions & 68 deletions b/‎lib/syntax_suggest/code_line.rb‎
Lines changed: 48 additions & 68 deletions
diff --git a/‎lib/syntax_suggest/left_right_token_count.rb‎
Lines changed: 9 additions & 15 deletions b/‎lib/syntax_suggest/left_right_token_count.rb‎
Lines changed: 9 additions & 15 deletions
@@ -1,6 +1,7 @@
 ## HEAD (unreleased)
 
-- Changed: Changed: Minimum supported Ruby version is now 3.3. (https://github.com/ruby/syntax_suggest/pull/246)
+- Changed: Minimum supported Ruby version is now 3.3. (https://github.com/ruby/syntax_suggest/pull/246)
+- Changed: Use native prism to analyse. (https://github.com/ruby/syntax_suggest/pull/251)
 
 ## 2.0.3
 
 
@@ -9,9 +9,6 @@
 
 # Prism is the new parser, replacing Ripper
 require "prism"
-# We need Ripper loaded for `Prism.lex_compat` even if we're using Prism
-# for lexing and parsing
-require "ripper"
 
 module SyntaxSuggest
   # Used to indicate a default value that cannot
@@ -188,7 +185,6 @@ def self.valid?(source)
 require_relative "clean_document"
 
 # Helpers
-require_relative "lex_all"
 require_relative "code_line"
 require_relative "code_block"
 require_relative "block_expand"
@@ -200,3 +196,5 @@ def self.valid?(source)
 require_relative "pathname_from_message"
 require_relative "display_invalid_blocks"
 require_relative "parse_blocks_from_indent_line"
+require_relative "visitor"
+require_relative "token"
@@ -66,27 +66,9 @@ module SyntaxSuggest
   #
   # All of these problems are fixed by joining the whole heredoc into a single
   # line.
-  #
-  # ## Comments and whitespace
-  #
-  # Comments can throw off the way the lexer tells us that the line
-  # logically belongs with the next line. This is valid ruby but
-  # results in a different lex output than before:
-  #
-  #     1 User.
-  #     2   where(name: "schneems").
-  #     3   # Comment here
-  #     4   first
-  #
-  # To handle this we can replace comment lines with empty lines
-  # and then re-lex the source. This removal and re-lexing preserves
-  # line index and document size, but generates an easier to work with
-  # document.
-  #
   class CleanDocument
     def initialize(source:)
-      lines = clean_sweep(source: source)
-      @document = CodeLine.from_source(lines.join)
+      @document = CodeLine.from_source(source)
     end
 
     # Call all of the document "cleaners"
@@ -110,62 +92,6 @@ def to_s
       @document.join
     end
 
-    # Remove comments
-    #
-    # replace with empty newlines
-    #
-    #     source = <<~'EOM'
-    #       # Comment 1
-    #       puts "hello"
-    #       # Comment 2
-    #       puts "world"
-    #     EOM
-    #
-    #     lines = CleanDocument.new(source: source).lines
-    #     expect(lines[0].to_s).to eq("\n")
-    #     expect(lines[1].to_s).to eq("puts "hello")
-    #     expect(lines[2].to_s).to eq("\n")
-    #     expect(lines[3].to_s).to eq("puts "world")
-    #
-    # Important: This must be done before lexing.
-    #
-    # After this change is made, we lex the document because
-    # removing comments can change how the doc is parsed.
-    #
-    # For example:
-    #
-    #     values = LexAll.new(source: <<~EOM))
-    #       User.
-    #         # comment
-    #         where(name: 'schneems')
-    #     EOM
-    #     expect(
-    #       values.count {|v| v.type == :on_ignored_nl}
-    #     ).to eq(1)
-    #
-    # After the comment is removed:
-    #
-    #     values = LexAll.new(source: <<~EOM))
-    #       User.
-    #
-    #         where(name: 'schneems')
-    #     EOM
-    #     expect(
-    #      values.count {|v| v.type == :on_ignored_nl}
-    #    ).to eq(2)
-    #
-    def clean_sweep(source:)
-      # Match comments, but not HEREDOC strings with #{variable} interpolation
-      # https://rubular.com/r/HPwtW9OYxKUHXQ
-      source.lines.map do |line|
-        if line.match?(/^\s*#([^{].*|)$/)
-          $/
-        else
-          line
-        end
-      end
-    end
-
     # Smushes all heredoc lines into one line
     #
     #     source = <<~'EOM'
@@ -184,9 +110,9 @@ def join_heredoc!
       lines.each do |line|
         line.tokens.each do |token|
           case token.type
-          when :on_heredoc_beg
+          when :HEREDOC_START
             start_index_stack << line.index
-          when :on_heredoc_end
+          when :HEREDOC_END
             start_index = start_index_stack.pop
             end_index = line.index
             heredoc_beg_end_index << [start_index, end_index]
@@ -212,20 +138,10 @@ def join_heredoc!
     #     expect(lines[0].to_s).to eq(source)
     #     expect(lines[1].to_s).to eq("")
     #
-    # The one known case this doesn't handle is:
-    #
-    #     Ripper.lex <<~EOM
-    #       a &&
-    #        b ||
-    #        c
-    #     EOM
-    #
-    # For some reason this introduces `on_ignore_newline` but with BEG type
-    #
     def join_consecutive!
-      consecutive_groups = @document.select(&:ignore_newline_not_beg?).map do |code_line|
+      consecutive_groups = @document.select(&:consecutive?).map do |code_line|
         take_while_including(code_line.index..) do |line|
-          line.ignore_newline_not_beg?
+          line.consecutive?
         end
       end
 
@@ -275,14 +191,15 @@ def join_groups(groups)
         @document[line.index] = CodeLine.new(
           tokens: lines.map(&:tokens).flatten,
           line: lines.join,
-          index: line.index
+          index: line.index,
+          consecutive: false
         )
 
         # Hide the rest of the lines
         lines[1..].each do |line|
           # The above lines already have newlines in them, if add more
           # then there will be double newline, use an empty line instead
-          @document[line.index] = CodeLine.new(line: "", index: line.index, tokens: [])
+          @document[line.index] = CodeLine.new(line: "", index: line.index, tokens: [], consecutive: false)
         end
       end
       self
 
@@ -27,21 +27,56 @@ class CodeLine
     # Returns an array of CodeLine objects
     # from the source string
     def self.from_source(source)
-      tokens_for_line = LexAll.new(source: source).each_with_object(Hash.new { |h, k| h[k] = [] }) { |token, hash| hash[token.line] << token }
+      source = +source
+      parse_result = Prism.parse_lex(source)
+      ast, tokens = parse_result.value
+
+      clean_comments!(source, parse_result.comments)
+
+      visitor = Visitor.new
+      visitor.visit(ast)
+      tokens.sort_by! { |token, _state| token.location.start_line }
+
+      prev_token = nil
+      tokens.map! do |token, _state|
+        prev_token = Token.new(token, prev_token, visitor)
+      end
+
+      tokens_for_line = tokens.each_with_object(Hash.new { |h, k| h[k] = [] }) { |token, hash| hash[token.line] << token }
       source.lines.map.with_index do |line, index|
         CodeLine.new(
           line: line,
           index: index,
-          tokens: tokens_for_line[index + 1]
+          tokens: tokens_for_line[index + 1],
+          consecutive: visitor.consecutive_lines.include?(index + 1)
         )
       end
     end
 
+    # Remove comments that apear on their own in source. They will never be the cause
+    # of syntax errors and are just visual noise. Example:
+    #
+    #   source = +<<~RUBY
+    #     # Comment-only line
+    #     foo # Inline comment
+    #   RUBY
+    #   CodeLine.clean_comments!(source, Prism.parse(source).comments)
+    #   source # => "\nfoo # Inline comment\n"
+    def self.clean_comments!(source, comments)
+      # Iterate backwards since we are modifying the source in place and must preserve
+      # the offsets. Prism comments are sorted by their location in the source.
+      comments.reverse_each do |comment|
+        next if comment.trailing?
+        source.bytesplice(comment.location.start_offset, comment.location.length, "")
+      end
+    end
+
     attr_reader :line, :index, :tokens, :line_number, :indent
-    def initialize(line:, index:, tokens:)
+    def initialize(line:, index:, tokens:, consecutive:)
       @tokens = tokens
       @line = line
       @index = index
+      @consecutive = consecutive
       @original = line
       @line_number = @index + 1
       strip_line = line.dup
@@ -150,91 +185,36 @@ def <=>(other)
       index <=> other.index
     end
 
-    # [Not stable API]
-    #
-    # Lines that have a `on_ignored_nl` type token and NOT
-    # a `BEG` type seem to be a good proxy for the ability
-    # to join multiple lines into one.
-    #
-    # This predicate method is used to determine when those
-    # two criteria have been met.
-    #
-    # The one known case this doesn't handle is:
-    #
-    #     Ripper.lex <<~EOM
-    #       a &&
-    #        b ||
-    #        c
-    #     EOM
-    #
-    # For some reason this introduces `on_ignore_newline` but with BEG type
-    def ignore_newline_not_beg?
-      @ignore_newline_not_beg
+    # Can this line be logically joined together
+    # with the following line? Determined by walking
+    # the AST
+    def consecutive?
+      @consecutive
     end
 
-    # Determines if the given line has a trailing slash
+    # Determines if the given line has a trailing slash.
+    # Simply check if the line contains a backslash after
+    # the content of the last token.
     #
     #     lines = CodeLine.from_source(<<~EOM)
     #       it "foo" \
     #     EOM
     #     expect(lines.first.trailing_slash?).to eq(true)
     #
     def trailing_slash?
-      last = @tokens.last
-
-      # Older versions of prism diverged slightly from Ripper in compatibility mode
-      case last&.type
-      when :on_sp
-        last.value == TRAILING_SLASH
-      when :on_tstring_end
-        true
-      else
-        false
-      end
+      return unless (last = @tokens.last)
+      @line.byteindex(TRAILING_SLASH, last.location.end_column) != nil
     end
 
-    # Endless method detection
-    #
-    # From https://github.com/ruby/irb/commit/826ae909c9c93a2ddca6f9cfcd9c94dbf53d44ab
-    # Detecting a "oneliner" seems to need a state machine.
-    # This can be done by looking mostly at the "state" (last value):
-    #
-    #   ENDFN -> BEG (token = '=' ) -> END
-    #
     private def set_kw_end
-      oneliner_count = 0
-      in_oneliner_def = nil
-
       kw_count = 0
       end_count = 0
 
-      @ignore_newline_not_beg = false
       @tokens.each do |token|
         kw_count += 1 if token.is_kw?
         end_count += 1 if token.is_end?
-
-        if token.type == :on_ignored_nl
-          @ignore_newline_not_beg = !token.expr_beg?
-        end
-
-        if in_oneliner_def.nil?
-          in_oneliner_def = :ENDFN if token.state.allbits?(Ripper::EXPR_ENDFN)
-        elsif token.state.allbits?(Ripper::EXPR_ENDFN)
-          # Continue
-        elsif token.state.allbits?(Ripper::EXPR_BEG)
-          in_oneliner_def = :BODY if token.value == "="
-        elsif token.state.allbits?(Ripper::EXPR_END)
-          # We found an endless method, count it
-          oneliner_count += 1 if in_oneliner_def == :BODY
-
-          in_oneliner_def = nil
-        else
-          in_oneliner_def = nil
-        end
       end
 
-      kw_count -= oneliner_count
-
       @is_kw = (kw_count - end_count) > 0
       @is_end = (end_count - kw_count) > 0
     end
 
@@ -49,21 +49,22 @@ def count_end
     #
     # Example:
     #
+    #   token = CodeLine.from_source("{").first.tokens.first
     #   left_right = LeftRightTokenCount.new
-    #   left_right.count_token(Token.new(1, :on_lbrace, "{", Ripper::EXPR_BEG))
+    #   left_right.count_token(Token.new(token)
     #   left_right.count_for_char("{")
     #   # => 1
     #   left_right.count_for_char("}")
     #   # => 0
     def count_token(token)
       case token.type
-      when :on_tstring_content
+      when :STRING_CONTENT
         # ^^^
         # Means it's a string or a symbol `"{"` rather than being
         # part of a data structure (like a hash) `{ a: b }`
         # ignore it.
-      when :on_words_beg, :on_symbols_beg, :on_qwords_beg,
-           :on_qsymbols_beg, :on_regexp_beg, :on_tstring_beg
+      when :PERCENT_UPPER_W, :PERCENT_UPPER_I, :PERCENT_LOWER_W,
+           :PERCENT_LOWER_I, :REGEXP_BEGIN, :STRING_BEGIN
         # ^^^
         # Handle shorthand syntaxes like `%Q{ i am a string }`
         #
@@ -72,25 +73,18 @@ def count_token(token)
         # can be used
         char = token.value[-1]
         @count_for_char[char] += 1 if @count_for_char.key?(char)
-      when :on_embexpr_beg
+      when :EMBEXPR_BEGIN
         # ^^^
         # Embedded string expressions like `"#{foo} <-embed"`
         # are parsed with chars:
         #
-        # `#{` as :on_embexpr_beg
-        #  `}` as :on_embexpr_end
-        #
-        # We cannot ignore both :on_emb_expr_beg and :on_embexpr_end
-        # because sometimes the lexer thinks something is an embed
-        # string end, when it is not like `lol = }` (no clue why).
+        # `#{` as :EMBEXPR_BEGIN
+        #  `}` as :EMBEXPR_END
         #
         # When we see `#{` count it as a `{` or we will
         # have a mis-match count.
         #
-        case token.value
-        when "\#{"
-          @count_for_char["{"] += 1
-        end
+        @count_for_char["{"] += 1
       else
         @end_count += 1 if token.is_end?
         @kw_count += 1 if token.is_kw?