Skip to content

Commit d69e8d8

Browse files
authored
Upgrade PDFBox API to v2.0.23 (#415)
1 parent dfeae2f commit d69e8d8

File tree

65 files changed

+832
-434
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

65 files changed

+832
-434
lines changed

library/src/androidTest/java/com/tom_roush/pdfbox/pdmodel/font/PDFontTest.java

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -395,4 +395,43 @@ public void testDeleteFont() throws IOException
395395

396396
Assert.assertTrue(tempPdfFile.delete());
397397
}
398+
399+
/**
400+
* PDFBOX-5115: U+00AD (soft hyphen) should work with WinAnsiEncoding.
401+
*/
402+
@Test
403+
public void testSoftHyphen() throws IOException
404+
{
405+
String text = "- \u00AD";
406+
ByteArrayOutputStream baos = new ByteArrayOutputStream();
407+
PDDocument doc = new PDDocument();
408+
PDPage page = new PDPage();
409+
doc.addPage(page);
410+
PDFont font1 = PDType1Font.HELVETICA;
411+
PDFont font2 = PDType0Font.load(doc, testContext.getAssets().open(
412+
"com/tom_roush/pdfbox/resources/ttf/LiberationSans-Regular.ttf"));
413+
414+
Assert.assertEquals(font1.getStringWidth("-"), font1.getStringWidth("\u00AD"), 0);
415+
Assert.assertEquals(font2.getStringWidth("-"), font2.getStringWidth("\u00AD"), 0);
416+
417+
PDPageContentStream cs = new PDPageContentStream(doc, page);
418+
cs.beginText();
419+
cs.newLineAtOffset(100, 500);
420+
cs.setFont(font1, 10);
421+
cs.showText(text);
422+
cs.newLineAtOffset(0, 100);
423+
cs.setFont(font2, 10);
424+
cs.showText(text);
425+
cs.endText();
426+
cs.close();
427+
doc.save(baos);
428+
doc.close();
429+
430+
doc = PDDocument.load(baos.toByteArray());
431+
PDFTextStripper stripper = new PDFTextStripper();
432+
stripper.setLineSeparator("\n");
433+
String extractedText = stripper.getText(doc);
434+
Assert.assertEquals(text + "\n" + text, extractedText.trim());
435+
doc.close();
436+
}
398437
}

library/src/androidTest/java/com/tom_roush/pdfbox/pdmodel/interactive/form/PDAcroFormFlattenTest.java

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -200,19 +200,6 @@ public void testFlattenPDFBOX3396_4() throws IOException
200200
flattenAndCompare(sourceUrl, targetFileName);
201201
}
202202

203-
/*
204-
* PDFBOX-3587 Empty template.
205-
*/
206-
// disabled as there is a missing character with the available fonts on the test server
207-
// @Test
208-
public void testFlattenOpenOfficeForm() throws IOException
209-
{
210-
String sourceUrl = "https://issues.apache.org/jira/secure/attachment/12839977/OpenOfficeForm.pdf";
211-
String targetFileName = "OpenOfficeForm.pdf";
212-
213-
flattenAndCompare(sourceUrl, targetFileName);
214-
}
215-
216203
/*
217204
* PDFBOX-3587 Filled template.
218205
*/

library/src/main/java/com/tom_roush/fontbox/afm/AFMParser.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -253,11 +253,11 @@ public class AFMParser
253253
*/
254254
public static final String START_KERN_PAIRS1 = "StartKernPairs1";
255255
/**
256-
* This is the start compisites data section.
256+
* This is the start composites data section.
257257
*/
258258
public static final String START_COMPOSITES = "StartComposites";
259259
/**
260-
* This is the end compisites data section.
260+
* This is the end composites data section.
261261
*/
262262
public static final String END_COMPOSITES = "EndComposites";
263263
/**

library/src/main/java/com/tom_roush/fontbox/cff/CFFParser.java

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ else if (TAG_TTFONLY.equals(firstTag))
111111
stringIndex = readStringIndexData(input);
112112
byte[][] globalSubrIndex = readIndexData(input);
113113

114-
List<CFFFont> fonts = new ArrayList<CFFFont>();
114+
List<CFFFont> fonts = new ArrayList<CFFFont>(nameIndex.length);
115115
for (int i = 0; i < nameIndex.length; i++)
116116
{
117117
CFFFont font = parseFont(input, nameIndex[i], topDictIndex[i]);
@@ -270,7 +270,7 @@ else if (b0 == 28 || b0 == 29)
270270
}
271271
else if (b0 == 30)
272272
{
273-
entry.operands.add(readRealNumber(input, b0));
273+
entry.operands.add(readRealNumber(input));
274274
}
275275
else if (b0 >= 32 && b0 <= 254)
276276
{
@@ -330,19 +330,18 @@ else if (b0 >= 251 && b0 <= 254)
330330
}
331331
}
332332

333-
/**
334-
* @param b0
335-
*/
336-
private static Double readRealNumber(CFFDataInput input, int b0) throws IOException
333+
private static Double readRealNumber(CFFDataInput input) throws IOException
337334
{
338335
StringBuilder sb = new StringBuilder();
339336
boolean done = false;
340337
boolean exponentMissing = false;
341338
boolean hasExponent = false;
339+
int[] nibbles = new int[2];
342340
while (!done)
343341
{
344342
int b = input.readUnsignedByte();
345-
int[] nibbles = { b / 16, b % 16 };
343+
nibbles[0] = b / 16;
344+
nibbles[1] = b % 16;
346345
for (int nibble : nibbles)
347346
{
348347
switch (nibble)
@@ -392,7 +391,8 @@ private static Double readRealNumber(CFFDataInput input, int b0) throws IOExcept
392391
done = true;
393392
break;
394393
default:
395-
throw new IllegalArgumentException();
394+
// can only be a programming error because a nibble is between 0 and F
395+
throw new IllegalArgumentException("illegal nibble " + nibble);
396396
}
397397
}
398398
}
@@ -435,11 +435,13 @@ private CFFFont parseFont(CFFDataInput input, String name, byte[] topDictIndex)
435435
boolean isCIDFont = topDict.getEntry("ROS") != null;
436436
if (isCIDFont)
437437
{
438-
font = new CFFCIDFont();
438+
CFFCIDFont cffCIDFont = new CFFCIDFont();
439439
DictData.Entry rosEntry = topDict.getEntry("ROS");
440-
((CFFCIDFont) font).setRegistry(readString(rosEntry.getNumber(0).intValue()));
441-
((CFFCIDFont) font).setOrdering(readString(rosEntry.getNumber(1).intValue()));
442-
((CFFCIDFont) font).setSupplement(rosEntry.getNumber(2).intValue());
440+
cffCIDFont.setRegistry(readString(rosEntry.getNumber(0).intValue()));
441+
cffCIDFont.setOrdering(readString(rosEntry.getNumber(1).intValue()));
442+
cffCIDFont.setSupplement(rosEntry.getNumber(2).intValue());
443+
444+
font = cffCIDFont;
443445
}
444446
else
445447
{

library/src/main/java/com/tom_roush/fontbox/cff/Type2CharString.java

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -325,15 +325,15 @@ private void drawAlternatingCurve(List<Number> numbers, boolean horizontal)
325325
if (horizontal)
326326
{
327327
addCommand(Arrays.asList(numbers.get(0), 0,
328-
numbers.get(1), numbers.get(2), last ? numbers.get(4)
329-
: 0, numbers.get(3)),
328+
numbers.get(1), numbers.get(2), last ? numbers.get(4)
329+
: 0, numbers.get(3)),
330330
new CharStringCommand(8));
331331
}
332332
else
333333
{
334334
addCommand(Arrays.asList(0, numbers.get(0),
335-
numbers.get(1), numbers.get(2), numbers.get(3),
336-
last ? numbers.get(4) : 0),
335+
numbers.get(1), numbers.get(2), numbers.get(3),
336+
last ? numbers.get(4) : 0),
337337
new CharStringCommand(8));
338338
}
339339
numbers = numbers.subList(last ? 5 : 4, numbers.size());
@@ -358,8 +358,8 @@ private void drawCurve(List<Number> numbers, boolean horizontal)
358358
else
359359
{
360360
addCommand(Arrays.asList(first ? numbers.get(0) : 0, numbers.get(first ? 1 : 0), numbers
361-
.get(first ? 2 : 1), numbers.get(first ? 3 : 2),
362-
0, numbers.get(first ? 4 : 3)),
361+
.get(first ? 2 : 1), numbers.get(first ? 3 : 2),
362+
0, numbers.get(first ? 4 : 3)),
363363
new CharStringCommand(8));
364364
}
365365
numbers = numbers.subList(first ? 5 : 4, numbers.size());
@@ -382,8 +382,9 @@ private void addCommand(List<Number> numbers, CharStringCommand command)
382382

383383
private static <E> List<List<E>> split(List<E> list, int size)
384384
{
385-
List<List<E>> result = new ArrayList<List<E>>();
386-
for (int i = 0; i < list.size() / size; i++)
385+
int listSize = list.size() / size;
386+
List<List<E>> result = new ArrayList<List<E>>(listSize);
387+
for (int i = 0; i < listSize; i++)
387388
{
388389
result.add(list.subList(i * size, (i + 1) * size));
389390
}

library/src/main/java/com/tom_roush/fontbox/cmap/CMap.java

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,9 @@ public class CMap
5050
// Unicode mappings
5151
private final Map<Integer,String> charToUnicode = new HashMap<Integer,String>();
5252

53+
// inverted map
54+
Map <String, byte[]> unicodeToByteCodes = new HashMap<String, byte[]>();
55+
5356
// CID mappings
5457
private final Map<Integer,Integer> codeToCid = new HashMap<Integer,Integer>();
5558
private final List<CIDRange> codeToCidRanges = new ArrayList<CIDRange>();
@@ -206,6 +209,7 @@ private int getCodeFromArray( byte[] data, int offset, int length )
206209
*/
207210
void addCharMapping(byte[] codes, String unicode)
208211
{
212+
unicodeToByteCodes.put(unicode, codes.clone()); // clone needed, bytes is modified later
209213
int code = getCodeFromArray(codes, 0, codes.length);
210214
charToUnicode.put(code, unicode);
211215

@@ -216,6 +220,17 @@ void addCharMapping(byte[] codes, String unicode)
216220
}
217221
}
218222

223+
/**
224+
* Get the code bytes for an unicode string.
225+
*
226+
* @param unicode
227+
* @return the code bytes or null if there is none.
228+
*/
229+
public byte[] getCodesFromUnicode(String unicode)
230+
{
231+
return unicodeToByteCodes.get(unicode);
232+
}
233+
219234
/**
220235
* This will add a CID mapping.
221236
*
@@ -275,6 +290,9 @@ void useCmap( CMap cmap )
275290
charToUnicode.putAll(cmap.charToUnicode);
276291
codeToCid.putAll(cmap.codeToCid);
277292
codeToCidRanges.addAll(cmap.codeToCidRanges);
293+
294+
// unicodeToByteCodes should be filled too, but this isn't possible in 2.0.*
295+
// because we don't know the code length
278296
}
279297

280298
/**

library/src/main/java/com/tom_roush/fontbox/cmap/CMapParser.java

Lines changed: 37 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -42,13 +42,25 @@ public class CMapParser
4242

4343
private final byte[] tokenParserByteBuffer = new byte[512];
4444

45+
private boolean strictMode = false;
46+
4547
/**
4648
* Creates a new instance of CMapParser.
4749
*/
4850
public CMapParser()
4951
{
5052
}
5153

54+
/**
55+
* Creates a new instance of CMapParser.
56+
*
57+
* @param strictMode activates the strict mode used for inline CMaps
58+
*/
59+
public CMapParser(boolean strictMode)
60+
{
61+
this.strictMode = strictMode;
62+
}
63+
5264
/**
5365
* Parse a CMAP file on the file system.
5466
*
@@ -86,6 +98,8 @@ public CMap parsePredefined(String name) throws IOException
8698
try
8799
{
88100
input = getExternalCMap(name);
101+
// deactivate strict mode
102+
strictMode = false;
89103
return parse(input);
90104
}
91105
finally
@@ -329,7 +343,7 @@ private void parseBegincidrange(int numberOfLines, PushbackInputStream cmapStrea
329343
{
330344
int mappedCID = createIntFromBytes(startCode);
331345
result.addCIDMapping(mappedCode++, mappedCID);
332-
increment(startCode);
346+
increment(startCode, startCode.length - 1, false);
333347
}
334348
}
335349
}
@@ -406,18 +420,16 @@ else if (nextToken instanceof byte[])
406420
{
407421
for (int i = 0; i < 256; i++)
408422
{
409-
startCode[1] = (byte) i;
410-
tokenBytes[1] = (byte) i;
411-
addMappingFrombfrange(result, startCode, 0xff, tokenBytes);
412-
423+
startCode[0] = (byte) i;
424+
startCode[1] = 0;
425+
tokenBytes[0] = (byte) i;
426+
tokenBytes[1] = 0;
427+
addMappingFrombfrange(result, startCode, 256, tokenBytes);
413428
}
414429
}
415430
else
416431
{
417-
// PDFBOX-4661: avoid overflow of the last byte, all following values are undefined
418-
int values = Math.min(end - start,
419-
255 - (tokenBytes[tokenBytes.length - 1] & 0xFF)) + 1;
420-
addMappingFrombfrange(result, startCode, values, tokenBytes);
432+
addMappingFrombfrange(result, startCode, end - start + 1, tokenBytes);
421433
}
422434
}
423435
}
@@ -430,7 +442,7 @@ private void addMappingFrombfrange(CMap cmap, byte[] startCode, List<byte[]> tok
430442
{
431443
String value = createStringFromBytes(tokenBytes);
432444
cmap.addCharMapping(startCode, value);
433-
increment(startCode);
445+
increment(startCode, startCode.length - 1, false);
434446
}
435447
}
436448

@@ -441,8 +453,12 @@ private void addMappingFrombfrange(CMap cmap, byte[] startCode, int values,
441453
{
442454
String value = createStringFromBytes(tokenBytes);
443455
cmap.addCharMapping(startCode, value);
444-
increment(startCode);
445-
increment(tokenBytes);
456+
if (!increment(tokenBytes, tokenBytes.length - 1, strictMode))
457+
{
458+
// overflow detected -> stop adding further mappings
459+
break;
460+
}
461+
increment(startCode, startCode.length - 1, false);
446462
}
447463
}
448464

@@ -718,22 +734,24 @@ private boolean isDelimiter(int aByte)
718734
}
719735
}
720736

721-
private void increment(byte[] data)
722-
{
723-
increment(data, data.length - 1);
724-
}
725-
726-
private void increment(byte[] data, int position)
737+
private boolean increment(byte[] data, int position, boolean useStrictMode)
727738
{
728739
if (position > 0 && (data[position] & 0xFF) == 255)
729740
{
741+
// PDFBOX-4661: avoid overflow of the last byte, all following values are undefined
742+
// PDFBOX-5090: strict mode has to be used for CMaps within pdfs
743+
if (useStrictMode)
744+
{
745+
return false;
746+
}
730747
data[position] = 0;
731-
increment(data, position - 1);
748+
increment(data, position - 1, useStrictMode);
732749
}
733750
else
734751
{
735752
data[position] = (byte) (data[position] + 1);
736753
}
754+
return true;
737755
}
738756

739757
private int createIntFromBytes(byte[] bytes)

0 commit comments

Comments
 (0)