Skip to content

Commit 87c5adb

Browse files
committed
Handle hyperlink complex fields with unquoted hrefs
1 parent 685e1e0 commit 87c5adb

3 files changed

Lines changed: 67 additions & 9 deletions

File tree

NEWS

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22

33
* Handle hyperlinked wp:anchor and wp:inline elements.
44

5+
* Handle hyperlink complex fields with unquoted hrefs.
6+
57
# 1.11.0
68

79
* Ignore style definitions using a style ID that has already been used.

mammoth/docx/body_xml.py

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -228,13 +228,19 @@ def parse_current_instr_text(complex_field):
228228
return parse_instr_text(instr_text, fld_char=fld_char)
229229

230230
def parse_instr_text(instr_text, *, fld_char):
231-
external_link_result = re.match(r'\s*HYPERLINK "(.*)"', instr_text)
232-
if external_link_result is not None:
233-
return complex_fields.hyperlink(dict(href=external_link_result.group(1)))
231+
link_result = re.match(r'^\s*HYPERLINK\s+(\\l\s+)?(?:"(.*)"|([^\\]\S*))', instr_text)
232+
if link_result is not None:
233+
if link_result.group(2) is None:
234+
location = link_result.group(3)
235+
else:
236+
location = link_result.group(2)
237+
238+
if link_result.group(1) is None:
239+
hyperlink_args = dict(href=location)
240+
else:
241+
hyperlink_args = dict(anchor=location)
234242

235-
internal_link_result = re.match(r'\s*HYPERLINK\s+\\l\s+"(.*)"', instr_text)
236-
if internal_link_result is not None:
237-
return complex_fields.hyperlink(dict(anchor=internal_link_result.group(1)))
243+
return complex_fields.hyperlink(hyperlink_args)
238244

239245
checkbox_result = re.match(r'\s*FORMCHECKBOX\s*', instr_text)
240246
if checkbox_result is not None:

tests/docx/body_xml_tests.py

Lines changed: 53 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -451,10 +451,12 @@ def _is_hyperlinked_run(self, **kwargs):
451451
def _is_empty_hyperlinked_run(self):
452452
return self._is_hyperlinked_run(children=[])
453453

454-
def test_runs_in_a_complex_field_for_hyperlinks_without_switch_are_read_as_external_hyperlinks(self):
454+
def test_runs_in_a_complex_field_for_hyperlinks_without_switch_with_quoted_location_are_read_as_external_hyperlinks(self):
455455
element = xml_element("w:p", {}, [
456456
self._BEGIN_COMPLEX_FIELD,
457-
self._HYPERLINK_INSTRTEXT,
457+
xml_element("w:instrText", {}, [
458+
xml_text(' HYPERLINK "http://example.com"')
459+
]),
458460
self._SEPARATE_COMPLEX_FIELD,
459461
_run_element_with_text("this is a hyperlink"),
460462
self._END_COMPLEX_FIELD,
@@ -473,7 +475,31 @@ def test_runs_in_a_complex_field_for_hyperlinks_without_switch_are_read_as_exter
473475
is_empty_run,
474476
)))
475477

476-
def test_runs_in_a_complex_field_for_hyperlinks_with_l_switch_are_read_as_internal_hyperlinks(self):
478+
def test_runs_in_a_complex_field_for_hyperlinks_without_switch_with_unquoted_location_are_read_as_external_hyperlinks(self):
479+
element = xml_element("w:p", {}, [
480+
self._BEGIN_COMPLEX_FIELD,
481+
xml_element("w:instrText", {}, [
482+
xml_text(' HYPERLINK http://example.com')
483+
]),
484+
self._SEPARATE_COMPLEX_FIELD,
485+
_run_element_with_text("this is a hyperlink"),
486+
self._END_COMPLEX_FIELD,
487+
])
488+
paragraph = _read_and_get_document_xml_element(element)
489+
490+
assert_that(paragraph, is_paragraph(children=is_sequence(
491+
is_empty_run,
492+
self._is_empty_hyperlinked_run,
493+
self._is_hyperlinked_run(
494+
href=self._URI,
495+
children=is_sequence(
496+
is_text("this is a hyperlink"),
497+
),
498+
),
499+
is_empty_run,
500+
)))
501+
502+
def test_runs_in_a_complex_field_for_hyperlinks_with_l_switch_with_quoted_location_are_read_as_internal_hyperlinks(self):
477503
element = xml_element("w:p", {}, [
478504
self._BEGIN_COMPLEX_FIELD,
479505
xml_element("w:instrText", {}, [
@@ -497,6 +523,30 @@ def test_runs_in_a_complex_field_for_hyperlinks_with_l_switch_are_read_as_intern
497523
is_empty_run,
498524
)))
499525

526+
def test_runs_in_a_complex_field_for_hyperlinks_with_l_switch_with_unquoted_location_are_read_as_internal_hyperlinks(self):
527+
element = xml_element("w:p", {}, [
528+
self._BEGIN_COMPLEX_FIELD,
529+
xml_element("w:instrText", {}, [
530+
xml_text(' HYPERLINK \\l InternalLink'),
531+
]),
532+
self._SEPARATE_COMPLEX_FIELD,
533+
_run_element_with_text("this is a hyperlink"),
534+
self._END_COMPLEX_FIELD,
535+
])
536+
paragraph = _read_and_get_document_xml_element(element)
537+
538+
assert_that(paragraph, is_paragraph(children=is_sequence(
539+
is_empty_run,
540+
self._is_empty_hyperlinked_run,
541+
self._is_hyperlinked_run(
542+
anchor="InternalLink",
543+
children=is_sequence(
544+
is_text("this is a hyperlink"),
545+
),
546+
),
547+
is_empty_run,
548+
)))
549+
500550
def test_runs_after_a_complex_field_for_hyperlinks_are_not_read_as_hyperlinks(self):
501551
element = xml_element("w:p", {}, [
502552
self._BEGIN_COMPLEX_FIELD,

0 commit comments

Comments
 (0)