Skip to content

Commit 80e9fd0

Browse files
committed
Handle SDTs representing checkboxes wrapped in other elements
1 parent cc1ad1d commit 80e9fd0

5 files changed

Lines changed: 132 additions & 9 deletions

File tree

NEWS

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,10 @@
33
* Add "Heading" and "Body" styles, as found in documents created by Apple Pages,
44
to the default style map.
55

6+
* Handle structured document tags representing checkboxes wrapped in other
7+
elements, such as table cells. Previously, the wrapping elements would have
8+
been ignored.
9+
610
# 1.9.1
711

812
* Ignore AlternateContent elements when there is no Fallback element.

mammoth/documents.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,9 @@ class TableCellUnmerged:
9292
rowspan = cobble.field()
9393
vmerge = cobble.field()
9494

95+
def copy(self, **kwargs):
96+
return cobble.copy(self, **kwargs)
97+
9598
@cobble.data
9699
class Break(Element):
97100
break_type = cobble.field()

mammoth/docx/body_xml.py

Lines changed: 38 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from .. import documents
66
from .. import results
77
from .. import lists
8+
from .. import transforms
89
from . import complex_fields
910
from .dingbats import dingbats
1011
from .xmlparser import node_types, XmlElement, null_xml_element
@@ -577,17 +578,49 @@ def alternate_content(element):
577578
return read_child_elements(element.find_child_or_null("mc:Fallback"))
578579

579580
def read_sdt(element):
580-
checkbox = element.find_child_or_null("w:sdtPr").find_child("wordml:checkbox")
581+
content_result = read_child_elements(element.find_child_or_null("w:sdtContent"))
582+
583+
def handle_content(content):
584+
# From the WordML standard: https://learn.microsoft.com/en-us/openspecs/office_standards/ms-docx/3350cb64-931f-41f7-8824-f18b2568ce66
585+
#
586+
# > A CT_SdtCheckbox element that specifies that the parent
587+
# > structured document tag is a checkbox when displayed in the
588+
# > document. The parent structured document tag contents MUST
589+
# > contain a single character and optionally an additional
590+
# > character in a deleted run.
591+
checkbox = element.find_child_or_null("w:sdtPr").find_child("wordml:checkbox")
592+
593+
if checkbox is None:
594+
return content
581595

582-
if checkbox is not None:
583596
checked_element = checkbox.find_child("wordml:checked")
584597
is_checked = (
585598
checked_element is not None and
586599
read_boolean_attribute_value(checked_element.attributes.get("wordml:val"))
587600
)
588-
return _success(documents.checkbox(checked=is_checked))
589-
else:
590-
return read_child_elements(element.find_child_or_null("w:sdtContent"))
601+
document_checkbox = documents.checkbox(checked=is_checked)
602+
603+
has_checkbox = False
604+
605+
def transform_text(text):
606+
nonlocal has_checkbox
607+
if len(text.value) > 0 and not has_checkbox:
608+
has_checkbox = True
609+
return document_checkbox
610+
else:
611+
return text
612+
613+
replaced_content = list(map(
614+
transforms.element_of_type(documents.Text, transform_text),
615+
content,
616+
))
617+
618+
if has_checkbox:
619+
return replaced_content
620+
else:
621+
return document_checkbox
622+
623+
return content_result.map(handle_content)
591624

592625
handlers = {
593626
"w:t": text,

mammoth/transforms.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,18 +15,18 @@ def transform_element(element):
1515
return transform(element)
1616
else:
1717
return element
18-
18+
1919
return _each_element(transform_element)
2020

2121

2222
def _each_element(transform_element):
2323
def transform_element_and_children(element):
24-
if isinstance(element, documents.HasChildren):
24+
if isinstance(element, (documents.HasChildren, documents.TableCellUnmerged)):
2525
children = list(map(transform_element_and_children, element.children))
2626
element = element.copy(children=children)
27-
27+
2828
return transform_element(element)
29-
29+
3030
return transform_element_and_children
3131

3232

tests/docx/body_xml_tests.py

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -788,6 +788,89 @@ def test_structured_document_tag_checkbox_with_checked_1_is_checked(self):
788788

789789
assert_that(result, is_checkbox(checked=True))
790790

791+
def test_when_structured_document_tag_checkbox_has_sdt_content_then_checkbox_replaces_single_character(self):
792+
element = xml_element("w:tbl", {}, [
793+
w_tr(
794+
xml_element("w:sdt", {}, [
795+
xml_element("w:sdtPr", {}, [
796+
xml_element("wordml:checkbox", {}, [
797+
xml_element("wordml:checked", {"wordml:val": "1"}),
798+
]),
799+
]),
800+
xml_element("w:sdtContent", {}, [
801+
xml_element("w:tc", {}, [
802+
xml_element("w:p", {}, [
803+
xml_element("w:r", {}, [
804+
xml_element("w:t", {}, [
805+
xml_text("☐"),
806+
]),
807+
]),
808+
]),
809+
]),
810+
]),
811+
]),
812+
),
813+
])
814+
815+
result = _read_and_get_document_xml_element(element)
816+
817+
assert_equal(result, documents.table([
818+
documents.table_row([
819+
documents.table_cell([
820+
documents.paragraph([
821+
documents.run([
822+
documents.checkbox(checked=True),
823+
]),
824+
]),
825+
]),
826+
]),
827+
]))
828+
829+
def test_when_structured_document_tag_checkbox_has_sdt_content_then_deleted_content_is_ignored(self):
830+
element = xml_element("w:tbl", {}, [
831+
w_tr(
832+
xml_element("w:sdt", {}, [
833+
xml_element("w:sdtPr", {}, [
834+
xml_element("wordml:checkbox", {}, [
835+
xml_element("wordml:checked", {"wordml:val": "1"}),
836+
]),
837+
]),
838+
xml_element("w:sdtContent", {}, [
839+
xml_element("w:tc", {}, [
840+
xml_element("w:p", {}, [
841+
xml_element("w:r", {}, [
842+
xml_element("w:t", {}, [
843+
xml_text("☐"),
844+
]),
845+
]),
846+
xml_element("w:del", {}, [
847+
xml_element("w:r", {}, [
848+
xml_element("w:t", {}, [
849+
xml_text("☐")
850+
])
851+
])
852+
]),
853+
]),
854+
]),
855+
]),
856+
]),
857+
),
858+
])
859+
860+
result = _read_and_get_document_xml_element(element)
861+
862+
assert_equal(result, documents.table([
863+
documents.table_row([
864+
documents.table_cell([
865+
documents.paragraph([
866+
documents.run([
867+
documents.checkbox(checked=True),
868+
]),
869+
]),
870+
]),
871+
]),
872+
]))
873+
791874
def _complex_field_checkbox_paragraph(self, ff_data_children):
792875
return xml_element("w:p", {}, [
793876
xml_element("w:r", {}, [

0 commit comments

Comments
 (0)