Skip to content

Commit 685e1e0

Browse files
committed
Handle hyperlinked wp:anchor and wp:inline elements
1 parent 300e749 commit 685e1e0

4 files changed

Lines changed: 69 additions & 12 deletions

File tree

NEWS

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
# 1.12.0
2+
3+
* Handle hyperlinked wp:anchor and wp:inline elements.
4+
15
# 1.11.0
26

37
* Ignore style definitions using a style ID that has already been used.

mammoth/docx/body_xml.py

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -516,29 +516,46 @@ def break_(element):
516516

517517

518518
def inline(element):
519-
properties = element.find_child_or_null("wp:docPr").attributes
519+
properties_element = element.find_child_or_null("wp:docPr")
520+
521+
properties = properties_element.attributes
520522
if properties.get("descr", "").strip():
521523
alt_text = properties.get("descr")
522524
else:
523525
alt_text = properties.get("title")
526+
527+
hlink_click_element = properties_element.find_child_or_null("a:hlinkClick")
528+
hyperlink_relationship_id = hlink_click_element.attributes.get("r:id")
529+
if hyperlink_relationship_id:
530+
href = relationships.find_target_by_relationship_id(hyperlink_relationship_id)
531+
else:
532+
href = None
533+
524534
blips = element.find_children("a:graphic") \
525535
.find_children("a:graphicData") \
526536
.find_children("pic:pic") \
527537
.find_children("pic:blipFill") \
528538
.find_children("a:blip")
529-
return _read_blips(blips, alt_text)
539+
return _read_blips(blips, alt_text=alt_text, href=href)
530540

531-
def _read_blips(blips, alt_text):
532-
return _ReadResult.concat(lists.map(lambda blip: _read_blip(blip, alt_text), blips))
541+
def _read_blips(blips, alt_text, href):
542+
return _ReadResult.concat(lists.map(lambda blip: _read_blip(blip, alt_text=alt_text, href=href), blips))
533543

534-
def _read_blip(element, alt_text):
544+
def _read_blip(element, alt_text, href):
535545
blip_image = _find_blip_image(element)
536546

537547
if blip_image is None:
538548
warning = results.warning("Could not find image file for a:blip element")
539549
return _empty_result_with_message(warning)
550+
551+
result = _read_image(blip_image, alt_text)
552+
if href is None:
553+
return result
540554
else:
541-
return _read_image(blip_image, alt_text)
555+
return result.map(lambda image_elements: documents.hyperlink(
556+
image_elements,
557+
href=href,
558+
))
542559

543560
def _read_image(image_file, alt_text):
544561
image_path, open_image = image_file

tests/docx/body_xml_tests.py

Lines changed: 41 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
is_text,
2323
is_table,
2424
is_row,
25+
is_image,
2526
)
2627
from ..testing import assert_equal
2728

@@ -1356,10 +1357,10 @@ class ImageTests(object):
13561357
IMAGE_BYTES = b"Not an image at all!"
13571358
IMAGE_RELATIONSHIP_ID = "rId5"
13581359

1359-
def _read_embedded_image(self, element):
1360+
def _read_embedded_image(self, element, relationships=None):
13601361
relationships = Relationships([
13611362
_image_relationship(self.IMAGE_RELATIONSHIP_ID, "media/hat.png"),
1362-
])
1363+
] + (relationships or []))
13631364

13641365
mocks = funk.Mocks()
13651366
docx_file = mocks.mock()
@@ -1555,6 +1556,31 @@ def test_no_elements_created_if_image_cannot_be_found_in_wp_inline(self):
15551556
assert_equal([], result.messages)
15561557
assert_equal(None, result.value)
15571558

1559+
def test_can_read_pictures_with_hyperlink_specified_in_document_properties(self):
1560+
drawing_element = _create_inline_image(
1561+
blip=_embedded_blip(self.IMAGE_RELATIONSHIP_ID),
1562+
doc_pr_children=[
1563+
xml_element("a:hlinkClick", {"r:id": "rId42"})
1564+
],
1565+
)
1566+
1567+
result = self._read_embedded_image(
1568+
drawing_element,
1569+
relationships=[_hyperlink_relationship("rId42", "http://example.com")],
1570+
)
1571+
1572+
assert_that(result, is_hyperlink(
1573+
href="http://example.com",
1574+
children=is_sequence(
1575+
is_image(
1576+
content_type="image/png",
1577+
)
1578+
)
1579+
))
1580+
image = result.children[0]
1581+
with image.open() as image_file:
1582+
assert_equal(self.IMAGE_BYTES, image_file.read())
1583+
15581584

15591585
def test_footnote_reference_has_id_read():
15601586
footnote_xml = xml_element("w:footnoteReference", {"w:id": "4"})
@@ -1710,9 +1736,18 @@ def _text_element(value):
17101736
return xml_element("w:t", {}, [xml_text(value)])
17111737

17121738

1713-
def _create_inline_image(blip, description=None, title=None):
1739+
def _create_inline_image(blip, description=None, doc_pr_children=None, title=None):
17141740
return xml_element("w:drawing", {}, [
1715-
xml_element("wp:inline", {}, _create_image_elements(blip, description=description, title=title))
1741+
xml_element(
1742+
"wp:inline",
1743+
{},
1744+
_create_image_elements(
1745+
blip,
1746+
description=description,
1747+
doc_pr_children=doc_pr_children,
1748+
title=title,
1749+
)
1750+
),
17161751
])
17171752

17181753

@@ -1722,15 +1757,15 @@ def _create_anchored_image(description, blip):
17221757
])
17231758

17241759

1725-
def _create_image_elements(blip, description=None, title=None):
1760+
def _create_image_elements(blip, description=None, doc_pr_children=None, title=None):
17261761
properties = {}
17271762
if description is not None:
17281763
properties["descr"] = description
17291764
if title is not None:
17301765
properties["title"] = title
17311766

17321767
return [
1733-
xml_element("wp:docPr", properties),
1768+
xml_element("wp:docPr", properties, doc_pr_children or []),
17341769
xml_element("a:graphic", {}, [
17351770
xml_element("a:graphicData", {}, [
17361771
xml_element("pic:pic", {}, [

tests/docx/document_matchers.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ def matcher(**kwargs):
1919
is_checkbox = create_element_matcher(documents.Checkbox)
2020
is_table = create_element_matcher(documents.Table)
2121
is_row = create_element_matcher(documents.TableRow)
22+
is_image = create_element_matcher(documents.Image)
2223

2324

2425
is_empty_run = is_run(children=[])

0 commit comments

Comments
 (0)