plutext / docx4j

JAXB-based Java library for Word docx, Powerpoint pptx, and Excel xlsx files

Home Page:https://www.docx4java.org/

Geek Repo:Geek Repo

Github PK Tool:Github PK Tool

BUG: WORD to FO-based Header heavy missaligment

boyi01 opened this issue · comments

commented

I am trying to convert a DOCX file to PDF using FO-based PDF output. The converted PDF have mis-alignment in header/footer. We updated from 8.1.6 to 8.3.9, i tried some 8.2.x Version, all the same Problem. The missaligment is extremly heavy about factor 10, so the pdf is unsable...
In Version 8.1.6 it worked quite nice...
The error is already in the xml, when the custome render method in FORendererPdfA is called

test.docx
test.pdf
Main Code

try {
      if (fopFactory == null) {
          fopFactory = FopFactory.newInstance(templateResource.getFopConfigAsFile());
      }
  } catch (SAXException e) {
      logger.error(e.getMessage(), e);
  } catch (IOException e) {
      logger.error(e.getMessage()I am trying to convert a DOCX file to PDF using FO-based PDF output. The converted PDF have mis-alignment in header/footer. We updated from 8.1.6 to 8.3.9, i tried some 8.2.x Version, all the same Problem. The missaligment is extremly heavy about factor 10, so the pdf is unsable...
In Version 8.1.6 it worked quite nice...
The error is already in the xml, when the custome render method in FORendererPdfA is called

[test.docx](https://github.com/plutext/docx4j/files/11768107/test.docx)
[test.pdf](https://github.com/plutext/docx4j/files/11768109/test.pdf)
Main Code
  `try {
        if (fopFactory == null) {
            fopFactory = FopFactory.newInstance(templateResource.getFopConfigAsFile());
        }
    } catch (SAXException e) {
        logger.error(e.getMessage(), e);
    } catch (IOException e) {
        logger.error(e.getMessage(), e);
    }
  
    StartEvent startEvent = new StartEvent(wmlPackage, WellKnownProcessSteps.PDF);
    startEvent.publish();
  
    FOSettings settings = new FOSettings();
    settings.setWmlPackage(wmlPackage);
    settings.setApacheFopMime("application/pdf");
    settings.getSettings().put("fopFactory", fopFactory);
    settings.setCustomFoRenderer(FORendererPdfA.getFoRendererPdfA());
    Docx4J.toFO(settings, outputStream, Docx4J.FLAG_NONE);`, e);
    }
  
    StartEvent startEvent = new StartEvent(wmlPackage, WellKnownProcessSteps.PDF);
    startEvent.publish();
  
    FOSettings settings = new FOSettings();
    settings.setWmlPackage(wmlPackage);
    settings.setApacheFopMime("application/pdf");
    settings.getSettings().put("fopFactory", fopFactory);
    settings.setCustomFoRenderer(FORendererPdfA.getFoRendererPdfA());
    Docx4J.toFO(settings, outputStream, Docx4J.FLAG_NONE);

fop_config

<?xml version="1.0" encoding="utf-8"?>
<fop xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
     xsi:noNamespaceSchemaLocation="http://svn.apache.org/viewvc/xmlgraphics/fop/trunk/src/foschema/fop-configuration.xsd?view=co">

    <!-- Base URL for resolving relative URLs -->
    <base>.</base>

    <!-- Source resolution in dpi (dots/pixels per inch) for determining the size of pixels in SVG and bitmap images, default: 72dpi -->
    <source-resolution>72</source-resolution>
    <!-- Target resolution in dpi (dots/pixels per inch) for specifying the target resolution for generated bitmaps, default: 72dpi -->
    <target-resolution>72</target-resolution>

    <!-- Default page-height and page-width, in case value is specified as auto -->
    <default-page-settings height="29.7cm" width="21cm"/>

    <renderers>
        <renderer mime="application/pdf">
            <pdf-a-mode>PDF/A-1b</pdf-a-mode>
            <version>1.4</version>

            <fonts>
                <font embed-url="arial.ttf" embedding-mode="full">
                    <font-triplet name="Arial" style="normal" weight="normal"/>
                </font>
                <font embed-url="arialbd.ttf" embedding-mode="full">
                    <font-triplet name="Arial" style="normal" weight="bold"/>
                </font>
                <font embed-url="arialbi.ttf" embedding-mode="full">
                    <font-triplet name="Arial" style="italic" weight="bold"/>
                </font>
                <font embed-url="ariali.ttf" embedding-mode="full">
                    <font-triplet name="Arial" style="italic" weight="normal"/>
                </font>
                <auto-detect/>
            </fonts>
        </renderer>
    </renderers>

    <!--<fonts>-->
        <!--<substitutions>-->
            <!--<substitution>-->
                <!--<from font-family="Arial" font-weight="normal" font-style="normal"/>-->
                <!--<to font-family="OpenSans" font-weight="normal" font-style="normal"/>-->
            <!--</substitution>-->
            <!--<substitution>-->
                <!--<from font-family="Arial" font-weight="bold" font-style="normal"/>-->
                <!--<to font-family="OpenSans" font-weight="bold" font-style="normal"/>-->
            <!--</substitution>-->
            <!--<substitution>-->
                <!--<from font-family="Arial" font-weight="normal" font-style="italic"/>-->
                <!--<to font-family="OpenSans" font-weight="normal" font-style="italic"/>-->
            <!--</substitution>-->
            <!--<substitution>-->
                <!--<from font-family="Arial" font-weight="bold" font-style="italic"/>-->
                <!--<to font-family="OpenSans" font-weight="bold" font-style="italic"/>-->
            <!--</substitution>-->
        <!--</substitutions>-->
    <!--</fonts>-->

</fop>

FORendererPdfA

package ch.rab.backend.pdf.docx4j;

import org.docx4j.convert.out.FORenderer;
import org.docx4j.convert.out.FOSettings;
import org.docx4j.convert.out.fo.renderers.FORendererApacheFOP;
import org.docx4j.openpackaging.exceptions.Docx4JException;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.IOException;
import java.io.OutputStream;
import java.io.StringReader;
import java.io.StringWriter;
import java.util.List;

public class FORendererPdfA extends FORendererApacheFOP {
    private static final DocumentBuilderFactory DOCUMENT_BUILDER_FACTORY = DocumentBuilderFactory.newInstance();
    private static final TransformerFactory TRANSFORMER_FACTORY = TransformerFactory.newInstance();

    protected static FORendererPdfA foRendererPdfA = null;

    public static FORenderer getFoRendererPdfA() {
        if (foRendererPdfA == null) {
            foRendererPdfA = new FORendererPdfA();
        }
        return foRendererPdfA;
    }

    @Override
    public void render(String foDocument, FOSettings settings, boolean twoPass, List<SectionPageInformation> pageNumberInformation, OutputStream outputStream) throws Docx4JException {
        try {
            // replace Aufzählungszeichen from word because this is in font Symbol and this font is not included in the pdf/a
            foDocument = foDocument.replaceAll("<fo:block font-family=\"Symbol\">" + '\uf02d' + "</fo:block>", "<fo:block font-family=\"Arial\">–</fo:block>");
            foDocument = foDocument.replaceAll("<fo:block font-family=\"Symbol\">" + '\u00B7' + "</fo:block>", "<fo:block font-family=\"Arial\">–</fo:block>");
            foDocument = foDocument.replaceAll("<fo:block font-family=\"Symsbol\">" + '\u00A7' + "</fo:block>", "<fo:block font-family=\"Arial\">–</fo:block>");

            // make sure all "known" fonts are replaced
            foDocument = foDocument.replaceAll("Times New Roman", "Arial");

            Document document = stringToXML(foDocument);

            // set default font at the root, such that unreplacable Helvetica is not referenced in PDF
            Element root = (Element) document.getElementsByTagName("fo:root").item(0);
            root.setAttribute("font-family", "Arial");

            super.render(xmlToString(document), settings, twoPass, pageNumberInformation, outputStream);
        } catch (ParserConfigurationException | IOException | TransformerException | SAXException e) {
            throw new Docx4JException("Exception while parsing/constructing XML", e);
        }
    }

    private static DocumentBuilder getNewDocumentBuilder() throws ParserConfigurationException {
        DOCUMENT_BUILDER_FACTORY.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
        return DOCUMENT_BUILDER_FACTORY.newDocumentBuilder();
    }

    private static Document stringToXML(String xmlString) throws ParserConfigurationException, IOException, SAXException {
        DocumentBuilder builder = getNewDocumentBuilder();
        return builder.parse(new InputSource(new StringReader(xmlString)));
    }

    private static String xmlToString(Document xmlDocument) throws TransformerException {
        DOMSource domSource = new DOMSource(xmlDocument);
        StringWriter writer = new StringWriter();
        StreamResult result = new StreamResult(writer);
        Transformer transformer = TRANSFORMER_FACTORY.newTransformer();
        transformer.transform(domSource, result);

        return writer.toString();
    }
}

I must update it, the Bug starts in Version 8.3.4