My requirement is that I have a PDF Acroform Template with few text fields and based on logged in user these text fields are prepopulated with user’s name and city and is presented to the user (Note user is not manually entering data into PDF but my backend code is doing this). so these name and city fields are giving problem for non-english characters. Strange thing is this works perfectly fine on my loca (windows 10). However when the code is deployed in lunix machine those non-english characters are getting replaced with some other characters.
I am using pdfbox-app version 2.0.24
I have this font Arial_Narrow.ttf in my classpath. (to fix another issue – java.lang.IllegalArgumentException: … is not available in this font’s encoding: WinAnsiEncoding)
My assumption is since this Arial_Narrow.ttf font has WinAnsiEncoding, it is working on Windows 10 machinee but giving problems when deployed on Lunix machine.
Please help!
package com.example.demo; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDResources; import org.apache.pdfbox.pdmodel.font.PDType0Font; import org.apache.pdfbox.pdmodel.interactive.form.PDAcroForm; import org.apache.pdfbox.pdmodel.interactive.form.PDCheckBox; import org.apache.pdfbox.pdmodel.interactive.form.PDField; import org.apache.pdfbox.pdmodel.interactive.form.PDRadioButton; import org.apache.pdfbox.pdmodel.interactive.form.PDTextField; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.util.Iterator; import java.util.Map; import java.util.Set; public class PDFMailMergeUtil { /** * constants used to check or uncheck a checkbox in the PDF */ private static final String CHECKBOX_VALUE_ON = "ON"; private static final String CHECKBOX_VALUE_OFF = "OFF"; /** * This method takes PDF Template and a map of PDF Form field names and their values, * and returns Actual PDF Document with appropriate values. * Consider templateContent as Class and returned data as Object of that cklass. * * @param templateContent PDF Template * @param pdf_fieldName_fieldValue_map PDF form field names and their values * @return Actual PDF document with filled values */ public static byte[] createPdfDocumentFromPdfTemplate(byte[] templateContent, Map<String, String> pdf_fieldName_fieldValue_map) { byte[] mailMergedPDFContent = null; // validation if (templateContent == null) { System.out.println ("PDF Template Content is null."); return null; } ByteArrayOutputStream out = null; PDDocument pdDoc = null; try { pdDoc = PDDocument.load(templateContent); PDAcroForm pdAcroForm = pdDoc.getDocumentCatalog().getAcroForm(); if (pdAcroForm == null) { System.out.println("No Form Field present in the PDF Template."); } else { PDType0Font font = PDType0Font.load(pdDoc, PDFMailMergeUtil.class.getResourceAsStream("/Arial_Narrow.ttf")); // I have this Arial_Narrow.ttf font in my resources folder so available in classpath PDResources res = pdAcroForm.getDefaultResources(); String fontName = res.add(font).getName(); String defaultAppearanceString = "/" + fontName + " 10 Tf 0 g"; Iterator<PDField> fieldsIterator = pdAcroForm.getFieldIterator(); while (fieldsIterator.hasNext()) { PDField pdfield = fieldsIterator.next(); String formFieldName = pdfield.getFullyQualifiedName(); // check if fieldName-Value Map contains the form field name in template if (!pdf_fieldName_fieldValue_map.containsKey(formFieldName)) { continue; } // We are here - means the PDF Acro Form Field name is present in our name-value map // get field value from map String formFieldValue = pdf_fieldName_fieldValue_map.get(formFieldName); if (pdfield instanceof PDTextField) { // if the PDF Form field is a Text Field ((PDTextField) pdfield).setDefaultAppearance(defaultAppearanceString); pdfield.setValue(formFieldValue); pdfield.setReadOnly(true); } else if (pdfield instanceof PDRadioButton) { PDRadioButton pdRadioButton = (PDRadioButton)pdfield; // if the PDF Form field is a Radio Button Set<String> allowedValues = pdRadioButton.getOnValues(); if (allowedValues != null && !allowedValues.isEmpty() && allowedValues.contains(formFieldValue)) { pdfield.setValue(formFieldValue); } else { System.out.println("PDF Form Field with name '" + formFieldName + "' received value as '" + formFieldValue + "'. However allowed values for this field are " + allowedValues); } pdfield.setReadOnly(true); } else if (pdfield instanceof PDCheckBox) { // if the PDF Form field is a Checkbox PDCheckBox pdCheckBox = (PDCheckBox)pdfield; if (CHECKBOX_VALUE_ON.equalsIgnoreCase(formFieldValue)) { pdCheckBox.check(); } else if (CHECKBOX_VALUE_OFF.equalsIgnoreCase(formFieldValue)) { pdCheckBox.unCheck(); } pdCheckBox.setReadOnly(true); } } // extract to output file byte[] out = new ByteArrayOutputStream(); pdDoc.save(out); mailMergedPDFContent = out.toByteArray(); } } catch (Exception e) { e.printStackTrace(); } finally { // clear resources try { if (pdDoc != null) pdDoc.close(); if (out != null) out.close(); } catch (IOException e) { e.printStackTrace(); } } return mailMergedPDFContent; } } Input to above utility class is
byte[] template = Files.readAllBytes(Paths.get("SomePDFTemplate.pdf")); // set field name value map Map<String, String> map = new HashMap<>(); map.put("Signing_Place", "İstanbul, Poznań, Łodź"); // these wierd characters not rendering properly when code runs on Lunix server map.put("Participant_Name","Test Präjakta"); map.put("Radio_Button_Group","RB_Item_3"); // valid values are: [RB_Item_1, RB_Item_2, RB_Item_3] map.put("CB_Item_1","OFF"); map.put("CB_Item_2","ON"); // create document byte[] pdfDoc = PDFMailMergeUtil.createPdfDocumentFromPdfTemplate(template, map); // save document as .pdf try (FileOutputStream fos = new FileOutputStream("C:\data\Projects\demo\src\test\resources\Test_PROD.pdf")) { fos.write(pdfDoc); }
Advertisement
Answer
Change this
PDType0Font font = PDType0Font.load(pdDoc, PDFMailMergeUtil.class.getResourceAsStream("/Arial_Narrow.ttf"));
to this
PDType0Font font = PDType0Font.load(pdDoc, PDFMailMergeUtil.class.getResourceAsStream("/Arial_Narrow.ttf"), false);
to avoid subsetting. IIRC it’s because the font file in the subset font doesn’t really exist at the time you’re using it because the object you’re using is a different PDFont object.