shepmaster / sxd-document

An XML library in Rust

Geek Repo:Geek Repo

Github PK Tool:Github PK Tool

Add an option to bubble up all namespace declarations to the root

gsurrel opened this issue · comments

Loading and saving Flat LibreOffice Text document makes it unreadable:

fn main() {
    let xml = std::fs::read_to_string("template.fodt").expect("Failed to open");
    let doc = sxd_document::parser::parse(&xml).expect("Failed to parse");
    
    let mut output = Vec::new();
    sxd_document::writer::format_document(&doc.as_document(), &mut output).expect("unable to output XML");
    std::fs::write("output.fodt", &output).expect("Failed to write");
}

The very minimal template file is attached (but zipped because of Github).

template.fodt.zip

It appears that the primary differences are:

  • SXD doesn't output unused namespaces
  • SXD doesn't explicitly set the encoding
% diff --unified template.fodt output.fodt
--- template.fodt	2020-08-31 19:25:24.000000000 -0400
+++ output.fodt	2020-08-31 19:26:21.000000000 -0400
@@ -2,46 +2,11 @@
 <office:document
     office:mimetype="application/vnd.oasis.opendocument.text"
     office:version="1.3"
-    xmlns:calcext="urn:org:documentfoundation:names:experimental:calc:xmlns:calcext:1.0"
-    xmlns:chart="urn:oasis:names:tc:opendocument:xmlns:chart:1.0"
-    xmlns:config="urn:oasis:names:tc:opendocument:xmlns:config:1.0"
-    xmlns:css3t="http://www.w3.org/TR/css3-text/"
-    xmlns:dc="http://purl.org/dc/elements/1.1/"
-    xmlns:dom="http://www.w3.org/2001/xml-events"
-    xmlns:dr3d="urn:oasis:names:tc:opendocument:xmlns:dr3d:1.0"
-    xmlns:draw="urn:oasis:names:tc:opendocument:xmlns:drawing:1.0"
-    xmlns:drawooo="http://openoffice.org/2010/draw"
-    xmlns:field="urn:openoffice:names:experimental:ooo-ms-interop:xmlns:field:1.0"
-    xmlns:fo="urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0"
-    xmlns:form="urn:oasis:names:tc:opendocument:xmlns:form:1.0"
-    xmlns:formx="urn:openoffice:names:experimental:ooxml-odf-interop:xmlns:form:1.0"
-    xmlns:grddl="http://www.w3.org/2003/g/data-view#"
-    xmlns:loext="urn:org:documentfoundation:names:experimental:office:xmlns:loext:1.0"
-    xmlns:math="http://www.w3.org/1998/Math/MathML"
-    xmlns:meta="urn:oasis:names:tc:opendocument:xmlns:meta:1.0"
-    xmlns:number="urn:oasis:names:tc:opendocument:xmlns:datastyle:1.0"
-    xmlns:of="urn:oasis:names:tc:opendocument:xmlns:of:1.2"
     xmlns:office="urn:oasis:names:tc:opendocument:xmlns:office:1.0"
-    xmlns:officeooo="http://openoffice.org/2009/office"
-    xmlns:ooo="http://openoffice.org/2004/office"
-    xmlns:oooc="http://openoffice.org/2004/calc"
-    xmlns:ooow="http://openoffice.org/2004/writer"
-    xmlns:rpt="http://openoffice.org/2005/report"
-    xmlns:script="urn:oasis:names:tc:opendocument:xmlns:script:1.0"
-    xmlns:style="urn:oasis:names:tc:opendocument:xmlns:style:1.0"
-    xmlns:svg="urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0"
-    xmlns:table="urn:oasis:names:tc:opendocument:xmlns:table:1.0"
-    xmlns:tableooo="http://openoffice.org/2009/table"
-    xmlns:text="urn:oasis:names:tc:opendocument:xmlns:text:1.0"
-    xmlns:xforms="http://www.w3.org/2002/xforms"
-    xmlns:xhtml="http://www.w3.org/1999/xhtml"
-    xmlns:xlink="http://www.w3.org/1999/xlink"
-    xmlns:xsd="http://www.w3.org/2001/XMLSchema"
-    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
     >
  <office:body>
   <office:text>
-   <text:p/>
+   <text:p xmlns:text="urn:oasis:names:tc:opendocument:xmlns:text:1.0"/>
   </office:text>
  </office:body>
 </office:document>

It's silly that the presence of an unused namespace is important.

Ah, it's actually that xmlns:text="urn:oasis:names:tc:opendocument:xmlns:text:1.0" needs to be on the root (again, which is silly). I'm guessing that someone was playing fast and loose with some document type checking code.

So this document works —

<?xml version="1.0"?>
<office:document
    office:mimetype="application/vnd.oasis.opendocument.text"
    office:version="1.3"
    xmlns:office="urn:oasis:names:tc:opendocument:xmlns:office:1.0"
     xmlns:text="urn:oasis:names:tc:opendocument:xmlns:text:1.0"
    >
 <office:body>
  <office:text>
   <text:p />
  </office:text>
 </office:body>
</office:document>