Skip to content

Commit 8cd5ca2

Browse files
author
Sebastian Kaupper
committed
Add support for a single quote in a character literal and a double quote in a string literal
1 parent 0ef712e commit 8cd5ca2

2 files changed

Lines changed: 48 additions & 18 deletions

File tree

pyVHDLParser/Token/Parser.py

Lines changed: 43 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -66,13 +66,14 @@ class TokenKind(IntEnum):
6666
PossibleRealLiteral = 7 #: Last char was a ``.``
6767
PossibleCharacterLiteral = 8 #: Last char was a ``'``
6868
PossibleStringLiteralStart = 9 #: Last char was a ``"``
69-
PossibleExtendedIdentifierStart = 10 #: Last char was a ``\``
70-
SingleLineComment = 11 #: Found ``--`` before
71-
MultiLineComment = 12 #: Found ``/*`` before
72-
Linebreak = 13 #: Last char was a ``\n``
73-
Directive = 14 #: Last char was a `` ` ``
74-
FuseableCharacter = 15 #: Last char was a character that could be fused
75-
OtherChars = 16 #: Anything else
69+
PossibleStringLiteralEnd = 10 #: Last char was a ``"``
70+
PossibleExtendedIdentifierStart = 11 #: Last char was a ``\``
71+
SingleLineComment = 12 #: Found ``--`` before
72+
MultiLineComment = 13 #: Found ``/*`` before
73+
Linebreak = 14 #: Last char was a ``\n``
74+
Directive = 15 #: Last char was a `` ` ``
75+
FuseableCharacter = 16 #: Last char was a character that could be fused
76+
OtherChars = 17 #: Anything else
7677

7778
@classmethod
7879
def GetVHDLTokenizer(cls, iterable: Iterator[str]):
@@ -335,14 +336,7 @@ def GetVHDLTokenizer(cls, iterable: Iterator[str]):
335336
elif tokenKind is cls.TokenKind.PossibleCharacterLiteral:
336337
buffer += char
337338
if len(buffer) == 2:
338-
if buffer[1] == "'":
339-
previousToken = CharacterToken(previousToken, "'", start)
340-
yield previousToken
341-
previousToken = CharacterToken(previousToken, "'", SourceCodePosition(row, column, absolute))
342-
yield previousToken
343-
tokenKind = cls.TokenKind.OtherChars
344-
else:
345-
continue
339+
continue # TODO: Merge with changes from #56!
346340
elif (len(buffer) == 3) and (buffer[2] == "'"):
347341
previousToken = CharacterLiteralToken(previousToken, buffer, start, SourceCodePosition(row, column, absolute))
348342
yield previousToken
@@ -365,9 +359,41 @@ def GetVHDLTokenizer(cls, iterable: Iterator[str]):
365359
elif tokenKind is cls.TokenKind.PossibleStringLiteralStart:
366360
buffer += char
367361
if char == "\"":
368-
previousToken = StringLiteralToken(previousToken, buffer, start, SourceCodePosition(row, column, absolute))
362+
tokenKind = cls.TokenKind.PossibleStringLiteralEnd
363+
364+
# State: PossibleStringLiteralEnd
365+
elif tokenKind is cls.TokenKind.PossibleStringLiteralEnd:
366+
if char == "\"":
367+
buffer += char
368+
tokenKind = cls.TokenKind.PossibleStringLiteralStart
369+
else:
370+
previousToken = StringLiteralToken(previousToken, buffer, start, SourceCodePosition(row, column-1, absolute-1))
369371
yield previousToken
370-
tokenKind = cls.TokenKind.OtherChars
372+
373+
start = SourceCodePosition(row, column, absolute)
374+
buffer = char
375+
if char in __WHITESPACE_CHARACTERS__: tokenKind = cls.TokenKind.SpaceChars
376+
elif char in __NUMBER_CHARACTERS__: tokenKind = cls.TokenKind.IntegerChars
377+
elif char in __ALPHA_CHARACTERS__: tokenKind = cls.TokenKind.AlphaChars
378+
elif char == "'": tokenKind = cls.TokenKind.PossibleCharacterLiteral
379+
elif char == "\"": tokenKind = cls.TokenKind.PossibleStringLiteralStart
380+
elif char == "-": tokenKind = cls.TokenKind.PossibleSingleLineCommentStart
381+
elif char == "\r": tokenKind = cls.TokenKind.PossibleLinebreak
382+
elif char == "\n":
383+
previousToken = LinebreakToken(previousToken, char, start, start)
384+
yield previousToken
385+
tokenKind = cls.TokenKind.OtherChars
386+
elif char in __FUSEABLE_CHARS__:
387+
buffer = char
388+
tokenKind = cls.TokenKind.FuseableCharacter
389+
elif char == ".": tokenKind = cls.TokenKind.PossibleRealLiteral
390+
elif char == "\\": tokenKind = cls.TokenKind.PossibleExtendedIdentifierStart
391+
elif (char == "`") and isinstance(previousToken, (WhitespaceToken, LinebreakToken)):
392+
tokenKind = cls.TokenKind.Directive
393+
else:
394+
previousToken = CharacterToken(previousToken, char, start)
395+
yield previousToken
396+
tokenKind = cls.TokenKind.OtherChars
371397

372398
# State: PossibleExtendedIdentifierStart
373399
elif tokenKind is cls.TokenKind.PossibleExtendedIdentifierStart:

tests/unit/Tokenizer/Tokens.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ class Sequence_1(TestCase, ExpectedDataMixin, TokenSequence):
130130
)
131131

132132
class Sequence_2(TestCase, ExpectedDataMixin, TokenSequence):
133-
code = """abc \\def\\ \t 'a' "abc" /* help */ -- foo\n """
133+
code = """abc \\def\\ \t 'a' ''' "abc" \"\"\"\" /* help */ -- foo\n """
134134
tokenStream = ExpectedTokenStream(
135135
[(StartOfDocumentToken, None),
136136
(WordToken, "abc"),
@@ -139,8 +139,12 @@ class Sequence_2(TestCase, ExpectedDataMixin, TokenSequence):
139139
(WhitespaceToken, " \t "),
140140
(CharacterLiteralToken, "a"),
141141
(WhitespaceToken, " "),
142+
(CharacterLiteralToken, "'"),
143+
(WhitespaceToken, " "),
142144
(StringLiteralToken, "abc"),
143145
(WhitespaceToken, " "),
146+
(StringLiteralToken, "\"\""),
147+
(WhitespaceToken, " "),
144148
(MultiLineCommentToken, "/* help */"),
145149
(WhitespaceToken, " "),
146150
(SingleLineCommentToken, "-- foo\n"),

0 commit comments

Comments
 (0)