Skip to content

Commit 79d1cd5

Browse files
committed
Add support for extended identifiers. Related to #1120.
1 parent 11877d4 commit 79d1cd5

3 files changed

Lines changed: 94 additions & 51 deletions

File tree

docs/news.d/1120.feature.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
Added support for extended identifiers.
2+
3+
A known exception is that extended identifiers within a port or generic clause that uses parenthesis must
4+
have the same number of opening and closing parenthesis. For example, a generic named ``\foo(bar)\`` is
5+
accepted but ``\foo(bar\`` is not.

tests/unit/test_vhdl_parser.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -662,6 +662,39 @@ def test_two_adjacent_hyphens_in_a_literal(self):
662662
stimulus = 'signal a : std_logic_vector(3 downto 0) := "----";'
663663
self.assertEqual(remove_comments(stimulus), stimulus)
664664

665+
def test_external_identifier(self):
666+
# This test is focused on the special cases of external identifier parsing.
667+
design_file = VHDLDesignFile.parse(
668+
"""
669+
entity standard_identifier is
670+
generic (
671+
-- Extended identifiers with parenthesis will be accepted if they are balanced.
672+
-- Otherwise they will interfere with finding the closing parenthesis to the
673+
-- generic clause. Same thing with port clause. This is an acceptable limitation
674+
-- for now.
675+
\\foo(bar)\\ : integer
676+
);
677+
end entity;
678+
679+
entity non-standard-identifier is -- This entity won't be found because of illegal identifier pattern.
680+
end package;
681+
682+
entity \\extended-identifier\\ is
683+
end entity \\extended-identifier\\;
684+
685+
package \\a.package\\ is
686+
end package \\a.package\\;
687+
"""
688+
)
689+
entities = design_file.entities
690+
self.assertEqual(len(entities), 2)
691+
self.assertEqual(entities[0].identifier, "standard_identifier")
692+
self.assertEqual(entities[1].identifier, "\\extended-identifier\\")
693+
694+
packages = design_file.packages
695+
self.assertEqual(len(packages), 1)
696+
self.assertEqual(packages[0].identifier, "\\a.package\\")
697+
665698
def parse_single_entity(self, code):
666699
"""
667700
Helper function to parse a single entity

vunit/vhdl_parser.py

Lines changed: 56 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,9 @@ def parse(self, file_name):
4242
)
4343

4444

45+
_ID_PATTERN = r"[A-Za-z]\w*|\\[^\n\r\\]+\\"
46+
47+
4548
class VHDLDesignFile(object): # pylint: disable=too-many-instance-attributes
4649
"""
4750
Contains VHDL objects found within a file
@@ -86,7 +89,7 @@ def parse(cls, code):
8689
)
8790

8891
_component_re = re.compile(
89-
r"[a-zA-Z]\w*\s*\:\s*(?:component)?\s*(?:(?:[a-zA-Z]\w*)\.)?([a-zA-Z]\w*)\s*"
92+
rf"(?:{_ID_PATTERN})\s*\:\s*(?:component)?\s*(?:(?:{_ID_PATTERN})\.)?({_ID_PATTERN})\s*"
9093
r"(?:generic|port) map\s*\([\s\w\=\>\,\.\)\(\+\-\*\/\'\"]*\);",
9194
re.IGNORECASE,
9295
)
@@ -101,13 +104,13 @@ def __init__(self, identifier):
101104
self.identifier = identifier
102105

103106
_package_body_pattern = re.compile(
104-
r"""
107+
rf"""
105108
\b # Word boundary
106109
package # package keyword
107110
\s+ # At least one whitespace
108111
body # body keyword
109112
\s+ # At least one whitespace
110-
(?P<package>[a-zA-Z][\w]*) # A package
113+
(?P<package>{_ID_PATTERN}) # A package
111114
\s+ # At least one whitespace
112115
is # is keyword
113116
""",
@@ -126,23 +129,23 @@ def find(cls, code):
126129

127130
class VHDLConfiguration(object):
128131
"""
129-
A configuratio declaration
132+
A configuration declaration
130133
"""
131134

132135
def __init__(self, identifier, entity):
133136
self.identifier = identifier
134137
self.entity = entity
135138

136139
_configuration_re = re.compile(
137-
r"""
140+
rf"""
138141
\b # Word boundary
139142
configuration # configuration keyword
140143
\s+ # At least one whitespace
141-
(?P<id>[a-zA-Z][\w]*) # An identifier
144+
(?P<id>{_ID_PATTERN}) # An identifier
142145
\s+ # At least one whitespace
143146
of # of keyword
144147
\s+ # At least one whitespace
145-
(?P<entity_id>[a-zA-Z][\w]*) # An identifier
148+
(?P<entity_id>{_ID_PATTERN}) # An identifier
146149
\s+ # At least one whitespace
147150
is # is keyword
148151
""",
@@ -168,15 +171,15 @@ def __init__(self, identifier, entity):
168171
self.entity = entity
169172

170173
_architecture_re = re.compile(
171-
r"""
174+
rf"""
172175
\b # Word boundary
173176
architecture # architecture keyword
174177
\s+ # At least one whitespace
175-
(?P<id>[a-zA-Z][\w]*) # An identifier
178+
(?P<id>{_ID_PATTERN}) # An identifier
176179
\s+ # At least one whitespace
177180
of # of keyword
178181
\s+ # At least one whitespace
179-
(?P<entity_id>[a-zA-Z][\w]*) # An identifier
182+
(?P<entity_id>{_ID_PATTERN}) # An identifier
180183
\s+ # At least one whitespace
181184
is # is keyword
182185
""",
@@ -194,7 +197,9 @@ def find(cls, code):
194197
yield VHDLArchitecture(identifier, entity_id)
195198

196199

197-
PACKAGE_INSTANCE_PATTERN = r"\bpackage\s+(?P<new_name>[a-zA-Z]\w*)\s+is\s+new\s+(?P<lib>[a-zA-Z]\w*)\.(?P<name>[a-zA-Z]\w*)" # pylint: disable=line-too-long
200+
PACKAGE_INSTANCE_PATTERN = (
201+
rf"\bpackage\s+(?P<new_name>{_ID_PATTERN})\s+is\s+new\s+(?P<lib>{_ID_PATTERN})\.(?P<name>{_ID_PATTERN})"
202+
)
198203

199204

200205
class VHDLPackage(object):
@@ -209,11 +214,11 @@ def __init__(self, identifier, enumeration_types, record_types, array_types):
209214
self.array_types = array_types
210215

211216
_package_start_re = re.compile(
212-
r"""
217+
rf"""
213218
\b # Word boundary
214219
package # package keyword
215220
\s+ # At least one whitespace
216-
(?P<id>[a-zA-Z][\w]*) # An identifier
221+
(?P<id>{_ID_PATTERN}) # An identifier
217222
\s+ # At least one whitespace
218223
is # is keyword
219224
""",
@@ -233,7 +238,7 @@ def _find_normal_packages(cls, code):
233238
end # end keyword
234239
(\s+package)? # Optional package keyword
235240
(\s+"""
236-
+ identifier
241+
+ re.escape(identifier)
237242
+ r""")? # Optional identifier
238243
[\s]* # Potential whitespaces
239244
; # Semicolon
@@ -324,11 +329,11 @@ def add_port(self, identifier, mode, subtype_code, init_value=None):
324329
)
325330

326331
_entity_start_re = re.compile(
327-
r"""
332+
rf"""
328333
\b # Word boundary
329334
entity # entity keyword
330335
\s+ # At least one whitespace
331-
(?P<id>[a-zA-Z][\w]*) # An identifier
336+
(?P<id>{_ID_PATTERN}) # An identifier
332337
\s+ # At least one whitespace
333338
is # is keyword
334339
""",
@@ -351,7 +356,7 @@ def find(cls, code):
351356
(entity)? # Optional entity keyword
352357
[\s]* # Potential whitespaces
353358
("""
354-
+ identifier
359+
+ re.escape(identifier)
355360
+ r""")? # Optional identifier
356361
[\s]* # Potential whitespaces
357362
; # Semicolon
@@ -371,11 +376,11 @@ def parse(cls, code):
371376
# Extract identifier
372377
re_flags = re.MULTILINE | re.IGNORECASE | re.VERBOSE
373378
entity_start = re.compile(
374-
r"""
379+
rf"""
375380
\b # Word boundary
376381
entity # entity keyword
377382
\s+ # At least one whitespace
378-
(?P<id>[a-zA-Z][\w]*) # An identifier
383+
(?P<id>{_ID_PATTERN}) # An identifier
379384
\s+ # At least one whitespace
380385
is # is keyword
381386
""",
@@ -454,12 +459,12 @@ def _find_port_clause(cls, code):
454459
@staticmethod
455460
def _split_not_in_par(string, sep):
456461
"""
457-
Split string at all occurences of sep but not inside of a parenthesis or quoute
462+
Split string at all occurrences of sep but not inside of a parenthesis or quote
458463
"""
459464
result = []
460465
count = 0
461466
split = []
462-
quouted = False
467+
quoted = False
463468
escaped = False
464469

465470
for idx, char in enumerate(string):
@@ -472,7 +477,7 @@ def _split_not_in_par(string, sep):
472477
if next_char == '"':
473478
escaped = True
474479
else:
475-
quouted = not quouted
480+
quoted = not quoted
476481
else:
477482
escaped = False
478483

@@ -481,7 +486,7 @@ def _split_not_in_par(string, sep):
481486
elif char in ")":
482487
count -= 1
483488

484-
if char == sep and count == 0 and not quouted:
489+
if char == sep and count == 0 and not quoted:
485490
result.append("".join(split))
486491
split = []
487492
else:
@@ -560,11 +565,11 @@ def __init__(self, identifier):
560565
self.identifier = identifier
561566

562567
_context_start_re = re.compile(
563-
r"""
568+
rf"""
564569
\b # Word boundary
565570
context # context keyword
566571
\s+ # At least one whitespace
567-
(?P<id>[a-zA-Z][\w]*) # An identifier
572+
(?P<id>{_ID_PATTERN}) # An identifier
568573
\s+ # At least one whitespace
569574
is # is keyword
570575
""",
@@ -600,10 +605,10 @@ def parse(cls, code):
600605
# Extract type mark and find out if it's an array type and if a constraint is given.
601606
re_flags = re.MULTILINE | re.IGNORECASE | re.VERBOSE
602607
subtype_indication_start = re.compile(
603-
r"""
608+
rf"""
604609
^ # Beginning of line
605610
[\s]* # Potential whitespaces
606-
(?P<type_mark>[a-zA-Z][\w]*) # An type mark
611+
(?P<type_mark>{_ID_PATTERN}) # An type mark
607612
[\s]* # Potential whitespaces
608613
(?P<constraint>\(.*\))?
609614
""",
@@ -705,16 +710,16 @@ def __init__(self, identifier, literals):
705710
self.literals = literals
706711

707712
_enum_declaration_re = re.compile(
708-
r"""
713+
rf"""
709714
\b # Word boundary
710715
type
711716
\s+
712-
(?P<id>[a-zA-Z][\w]*) # An identifier
717+
(?P<id>{_ID_PATTERN}) # An identifier
713718
\s+
714719
is
715720
\s*\(\s*
716-
(?P<literals>[a-zA-Z][\w]* # First enumeration literal
717-
(\s*,\s*[a-zA-Z][\w]*)*) # More enumeration literals
721+
(?P<literals>(?:{_ID_PATTERN}) # First enumeration literal
722+
(\s*,\s*(?:{_ID_PATTERN}))*) # More enumeration literals
718723
\s*\)\s*;""",
719724
re.MULTILINE | re.IGNORECASE | re.VERBOSE,
720725
)
@@ -746,11 +751,11 @@ def __init__(self, identifier, elements):
746751
self.elements = elements
747752

748753
_record_declaration_re = re.compile(
749-
r"""
754+
rf"""
750755
\b # Word boundary
751756
type
752757
\s+
753-
(?P<id>[a-zA-Z][\w]*) # An identifier
758+
(?P<id>{_ID_PATTERN}) # An identifier
754759
\s+
755760
is
756761
\s+
@@ -809,17 +814,17 @@ def __init__(self, identifier, subtype_indication, range1, range2):
809814
)
810815

811816
_range_attribute_ranges_re = re.compile(
812-
r"""
813-
\s*(?P<range_attribute>[a-zA-Z][\w]*'range)\s*""",
817+
rf"""
818+
\s*(?P<range_attribute>(?:{_ID_PATTERN})'range)\s*""",
814819
re.MULTILINE | re.IGNORECASE | re.VERBOSE | re.DOTALL,
815820
)
816821

817822
_unconstrained_ranges_re = re.compile(
818-
r"""
819-
\s*(?P<range_type1>[a-zA-Z][\w]*)
823+
rf"""
824+
\s*(?P<range_type1>{_ID_PATTERN})
820825
\s+range\s+<>\s*
821826
(,
822-
\s*(?P<range_type2>[a-zA-Z][\w]*)
827+
\s*(?P<range_type2>{_ID_PATTERN})
823828
\s+range\s+<>\s*)?""",
824829
re.MULTILINE | re.IGNORECASE | re.VERBOSE | re.DOTALL,
825830
)
@@ -833,24 +838,24 @@ def __init__(self, identifier, subtype_indication, range1, range2):
833838
)
834839

835840
_range_attribute_range_re = re.compile(
836-
r"""
837-
\s*(?P<range_attribute>[a-zA-Z][\w]*'range)\s*""",
841+
rf"""
842+
\s*(?P<range_attribute>(?:{_ID_PATTERN})'range)\s*""",
838843
re.MULTILINE | re.IGNORECASE | re.VERBOSE | re.DOTALL,
839844
)
840845

841846
_unconstrained_range_re = re.compile(
842-
r"""
843-
\s*(?P<range_type>[a-zA-Z][\w]*)
847+
rf"""
848+
\s*(?P<range_type>{_ID_PATTERN})
844849
\s+range\s+<>\s*""",
845850
re.MULTILINE | re.IGNORECASE | re.VERBOSE | re.DOTALL,
846851
)
847852

848853
_array_declaration_re = re.compile(
849-
r"""
854+
rf"""
850855
\b # Word boundary
851856
type
852857
\s+
853-
(?P<id>[a-zA-Z][\w]*)
858+
(?P<id>{_ID_PATTERN})
854859
\s+
855860
is
856861
\s+
@@ -950,12 +955,12 @@ class VHDLReference(object):
950955
_reference_types = ("package", "context", "entity", "configuration")
951956

952957
_uses_re = re.compile(
953-
r"""
958+
rf"""
954959
\b # Word boundary
955960
(?P<use_type>use|context) # use or context keyword
956961
\s+ # At least one whitespace
957-
(?P<id>[a-zA-Z][\w]*(\.[a-zA-Z][\w]*){1,2})
958-
(?P<extra>(\s*,\s*[a-zA-Z][\w]*(\.[a-zA-Z][\w]*){1,2})*)
962+
(?P<id>(?:{_ID_PATTERN})(\.(?:{_ID_PATTERN})){{1,2}})
963+
(?P<extra>(\s*,\s*(?:{_ID_PATTERN})(\.(?:{_ID_PATTERN})){{1,2}})*)
959964
\s* # Potential whitespaces
960965
; # Semi-colon
961966
""",
@@ -965,12 +970,12 @@ class VHDLReference(object):
965970
@classmethod
966971
def _find_uses(cls, code):
967972
"""
968-
Find all the libraries and use clasues within the code
973+
Find all the libraries and use clauses within the code
969974
"""
970975

971976
def get_ids(match):
972977
"""
973-
Get all ids found within the match taking the optinal extra ids of
978+
Get all ids found within the match taking the optional extra ids of
974979
library and use clauses into account such as:
975980
976981
use foo, bar;
@@ -1002,7 +1007,7 @@ def get_ids(match):
10021007
return references
10031008

10041009
_entity_reference_re = re.compile(
1005-
r"\bentity\s+(?P<lib>[a-zA-Z]\w*)\.(?P<ent>[a-zA-Z]\w*)\s*(\((?P<arch>[a-zA-Z]\w*)\))?",
1010+
rf"\bentity\s+(?P<lib>{_ID_PATTERN})\.(?P<ent>{_ID_PATTERN})\s*(\((?P<arch>{_ID_PATTERN})\))?",
10061011
re.MULTILINE | re.IGNORECASE,
10071012
)
10081013

@@ -1027,7 +1032,7 @@ def _find_entity_references(cls, code):
10271032
return references
10281033

10291034
_configuration_reference_re = re.compile(
1030-
r"\bconfiguration\s+(?P<lib>[a-zA-Z]\w*)\.(?P<cfg>[a-zA-Z]\w*)",
1035+
rf"\bconfiguration\s+(?P<lib>{_ID_PATTERN})\.(?P<cfg>{_ID_PATTERN})",
10311036
re.MULTILINE | re.IGNORECASE,
10321037
)
10331038

0 commit comments

Comments
 (0)