@@ -66,13 +66,14 @@ class TokenKind(IntEnum):
6666 PossibleRealLiteral = 7 #: Last char was a ``.``
6767 PossibleCharacterLiteral = 8 #: Last char was a ``'``
6868 PossibleStringLiteralStart = 9 #: Last char was a ``"``
69- PossibleExtendedIdentifierStart = 10 #: Last char was a ``\``
70- SingleLineComment = 11 #: Found ``--`` before
71- MultiLineComment = 12 #: Found ``/*`` before
72- Linebreak = 13 #: Last char was a ``\n``
73- Directive = 14 #: Last char was a `` ` ``
74- FuseableCharacter = 15 #: Last char was a character that could be fused
75- OtherChars = 16 #: Anything else
69+ PossibleStringLiteralEnd = 10 #: Last char was a ``"`` while being in state ``PossibleStringLiteralStart``
70+ PossibleExtendedIdentifierStart = 11 #: Last char was a ``\``
71+ SingleLineComment = 12 #: Found ``--`` before
72+ MultiLineComment = 13 #: Found ``/*`` before
73+ Linebreak = 14 #: Last char was a ``\n``
74+ Directive = 15 #: Last char was a `` ` ``
75+ FuseableCharacter = 16 #: Last char was a character that could be fused
76+ OtherChars = 17 #: Anything else
7677
7778 @classmethod
7879 def GetVHDLTokenizer (cls , iterable : Iterator [str ]):
@@ -336,24 +337,23 @@ def GetVHDLTokenizer(cls, iterable: Iterator[str]):
336337 buffer += char
337338 if len (buffer ) == 2 :
338339 if buffer [1 ] == "(" and isinstance (previousToken , WordToken ):
340+ # An input of the form `<word>'(` always must be a qualified expression
341+ # in order to be valid VHDL. There is not case where `'('` would be a valid character literal
342+ # if preceeded by a word token
339343 previousToken = CharacterToken (previousToken , "'" , start )
340344 yield previousToken
341345 previousToken = CharacterToken (previousToken , "(" , SourceCodePosition (row , column , absolute ))
342346 yield previousToken
343347 tokenKind = cls .TokenKind .OtherChars
344- elif buffer [1 ] == "'" :
345- previousToken = CharacterToken (previousToken , "'" , start )
346- yield previousToken
347- previousToken = CharacterToken (previousToken , "'" , SourceCodePosition (row , column , absolute ))
348- yield previousToken
349- tokenKind = cls .TokenKind .OtherChars
350348 else :
351349 continue
352350 elif (len (buffer ) == 3 ) and (buffer [2 ] == "'" ):
351+ # Whatever is enclosed in single quotes, is the content of a character literal
353352 previousToken = CharacterLiteralToken (previousToken , buffer , start , SourceCodePosition (row , column , absolute ))
354353 yield previousToken
355354 tokenKind = cls .TokenKind .OtherChars
356355 else :
356+ # If the third entry of the buffer is not a closing single quote, the single quote must belong to an attribute
357357 previousToken = CharacterToken (previousToken , "'" , start )
358358 yield previousToken
359359
@@ -371,9 +371,41 @@ def GetVHDLTokenizer(cls, iterable: Iterator[str]):
371371 elif tokenKind is cls .TokenKind .PossibleStringLiteralStart :
372372 buffer += char
373373 if char == "\" " :
374- previousToken = StringLiteralToken (previousToken , buffer , start , SourceCodePosition (row , column , absolute ))
374+ tokenKind = cls .TokenKind .PossibleStringLiteralEnd
375+
376+ # State: PossibleStringLiteralEnd
377+ elif tokenKind is cls .TokenKind .PossibleStringLiteralEnd :
378+ if char == "\" " :
379+ buffer += char
380+ tokenKind = cls .TokenKind .PossibleStringLiteralStart
381+ else :
382+ previousToken = StringLiteralToken (previousToken , buffer , start , SourceCodePosition (row , column - 1 , absolute - 1 ))
375383 yield previousToken
376- tokenKind = cls .TokenKind .OtherChars
384+
385+ start = SourceCodePosition (row , column , absolute )
386+ buffer = char
387+ if char in __WHITESPACE_CHARACTERS__ : tokenKind = cls .TokenKind .SpaceChars
388+ elif char in __NUMBER_CHARACTERS__ : tokenKind = cls .TokenKind .IntegerChars
389+ elif char in __ALPHA_CHARACTERS__ : tokenKind = cls .TokenKind .AlphaChars
390+ elif char == "'" : tokenKind = cls .TokenKind .PossibleCharacterLiteral
391+ elif char == "\" " : tokenKind = cls .TokenKind .PossibleStringLiteralStart
392+ elif char == "-" : tokenKind = cls .TokenKind .PossibleSingleLineCommentStart
393+ elif char == "\r " : tokenKind = cls .TokenKind .PossibleLinebreak
394+ elif char == "\n " :
395+ previousToken = LinebreakToken (previousToken , char , start , start )
396+ yield previousToken
397+ tokenKind = cls .TokenKind .OtherChars
398+ elif char in __FUSEABLE_CHARS__ :
399+ buffer = char
400+ tokenKind = cls .TokenKind .FuseableCharacter
401+ elif char == "." : tokenKind = cls .TokenKind .PossibleRealLiteral
402+ elif char == "\\ " : tokenKind = cls .TokenKind .PossibleExtendedIdentifierStart
403+ elif (char == "`" ) and isinstance (previousToken , (WhitespaceToken , LinebreakToken )):
404+ tokenKind = cls .TokenKind .Directive
405+ else :
406+ previousToken = CharacterToken (previousToken , char , start )
407+ yield previousToken
408+ tokenKind = cls .TokenKind .OtherChars
377409
378410 # State: PossibleExtendedIdentifierStart
379411 elif tokenKind is cls .TokenKind .PossibleExtendedIdentifierStart :
0 commit comments