[Git][java-team/tika][master] * Cherrypick upstream commit to address CVE-2020-1950 and CVE-2020-1951
Moritz Muehlenhoff
gitlab at salsa.debian.org
Sun Jan 31 21:23:46 GMT 2021
Moritz Muehlenhoff pushed to branch master at Debian Java Maintainers / tika
Commits:
1a8d36eb by Moritz Muehlenhoff at 2021-01-31T22:20:42+01:00
* Cherrypick upstream commit to address CVE-2020-1950 and CVE-2020-1951
(Closes: #954303, #954302)
- - - - -
3 changed files:
- debian/changelog
- + debian/patches/14-CVE-2020-1950_CVE-2020-1951.patch
- debian/patches/series
Changes:
=====================================
debian/changelog
=====================================
@@ -1,3 +1,10 @@
+tika (1.22-2) unstable; urgency=medium
+
+ * Cherrypick upstream commit to address CVE-2020-1950 and CVE-2020-1951
+ (Closes: #954303, #954302)
+
+ -- Moritz Muehlenhoff <jmm at debian.org> Sun, 31 Jan 2021 22:18:47 +0100
+
tika (1.22-1) unstable; urgency=medium
* New upstream release
=====================================
debian/patches/14-CVE-2020-1950_CVE-2020-1951.patch
=====================================
@@ -0,0 +1,211 @@
+From ab8a9ed830ec710a32e4ffdf4989aea3aaea92ef Mon Sep 17 00:00:00 2001
+From: tallison <tallison at apache.org>
+Date: Fri, 21 Feb 2020 13:27:13 -0500
+Subject: [PATCH] TIKA-3050 -- add xmp extraction from PSD files
+
+Fixes CVE-2020-1950 and CVE-2020-1951, adapted to 1.22
+
+diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/image/PSDParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/image/PSDParser.java
+index 4d0510c3fb..b78a366343 100644
+--- a/tika-parsers/src/main/java/org/apache/tika/parser/image/PSDParser.java
++++ b/tika-parsers/src/main/java/org/apache/tika/parser/image/PSDParser.java
+@@ -16,16 +16,23 @@
+ */
+ package org.apache.tika.parser.image;
+
++import java.io.ByteArrayInputStream;
++import java.io.EOFException;
+ import java.io.IOException;
+ import java.io.InputStream;
++import java.nio.file.Files;
++import java.nio.file.Paths;
+ import java.util.Arrays;
+ import java.util.Collections;
+ import java.util.HashSet;
+ import java.util.Set;
+
+-import org.apache.poi.util.IOUtils;
++import org.apache.commons.io.IOUtils;
+ import org.apache.tika.exception.TikaException;
++import org.apache.tika.extractor.EmbeddedDocumentExtractor;
++import org.apache.tika.extractor.EmbeddedDocumentUtil;
+ import org.apache.tika.io.EndianUtils;
++import org.apache.tika.io.TikaInputStream;
+ import org.apache.tika.metadata.Metadata;
+ import org.apache.tika.metadata.Photoshop;
+ import org.apache.tika.metadata.TIFF;
+@@ -33,6 +40,7 @@
+ import org.apache.tika.mime.MediaType;
+ import org.apache.tika.parser.AbstractParser;
+ import org.apache.tika.parser.ParseContext;
++import org.apache.tika.parser.image.xmp.JempboxExtractor;
+ import org.apache.tika.sax.XHTMLContentHandler;
+ import org.xml.sax.ContentHandler;
+ import org.xml.sax.SAXException;
+@@ -44,6 +52,9 @@
+ * <p/>
+ * Documentation on the file format is available from
+ * http://www.adobe.com/devnet-apps/photoshop/fileformatashtml/PhotoshopFileFormats.htm
++ *
++ * An MIT-licensed python parser with test files is:
++ * https://github.com/psd-tools/psd-tools
+ */
+ public class PSDParser extends AbstractParser {
+
+@@ -56,6 +67,9 @@
+ Collections.unmodifiableSet(new HashSet<MediaType>(Arrays.asList(
+ MediaType.image("vnd.adobe.photoshop"))));
+
++ private static final int MAX_DATA_LENGTH_BYTES = 1000000;
++ private static final int MAX_BLOCKS = 10000;
++
+ public Set<MediaType> getSupportedTypes(ParseContext context) {
+ return SUPPORTED_TYPES;
+ }
+@@ -101,19 +115,27 @@ public void parse(
+
+ // Colour mode, eg Bitmap or RGB
+ int colorMode = EndianUtils.readUShortBE(stream);
+- metadata.set(Photoshop.COLOR_MODE, Photoshop._COLOR_MODE_CHOICES_INDEXED[colorMode]);
++ if (colorMode < Photoshop._COLOR_MODE_CHOICES_INDEXED.length) {
++ metadata.set(Photoshop.COLOR_MODE, Photoshop._COLOR_MODE_CHOICES_INDEXED[colorMode]);
++ }
+
+ // Next is the Color Mode section
+ // We don't care about this bit
+ long colorModeSectionSize = EndianUtils.readIntBE(stream);
+- stream.skip(colorModeSectionSize);
++ IOUtils.skipFully(stream, colorModeSectionSize);
+
+ // Next is the Image Resources section
+ // Check for certain interesting keys here
+ long imageResourcesSectionSize = EndianUtils.readIntBE(stream);
+ long read = 0;
+- while (read < imageResourcesSectionSize) {
++ //if something is corrupt about this number, prevent an
++ //infinite loop by only reading 10000 blocks
++ int blocks = 0;
++ while (read < imageResourcesSectionSize && blocks < MAX_BLOCKS) {
+ ResourceBlock rb = new ResourceBlock(stream);
++ if (rb.totalLength <= 0) {
++ //break;
++ }
+ read += rb.totalLength;
+
+ // Is it one we can do something useful with?
+@@ -124,8 +146,12 @@ public void parse(
+ } else if (rb.id == ResourceBlock.ID_EXIF_3) {
+ // TODO Parse the EXIF info via ImageMetadataExtractor
+ } else if (rb.id == ResourceBlock.ID_XMP) {
+- // TODO Parse the XMP info via ImageMetadataExtractor
++ //if there are multiple xmps in a file, this will
++ //overwrite the data from the earlier xmp
++ JempboxExtractor ex = new JempboxExtractor(metadata);
++ ex.parse(new ByteArrayInputStream(rb.data));
+ }
++ blocks++;
+ }
+
+ // Next is the Layer and Mask Info
+@@ -141,17 +167,21 @@ public void parse(
+ private static class ResourceBlock {
+ private static final long SIGNATURE = 0x3842494d; // 8BIM
+ private static final int ID_CAPTION = 0x03F0;
+- private static final int ID_URL = 0x040B;
+ private static final int ID_EXIF_1 = 0x0422;
+ private static final int ID_EXIF_3 = 0x0423;
+ private static final int ID_XMP = 0x0424;
++ //TODO
++ private static final int ID_URL = 0x040B;
++ private static final int ID_AUTO_SAVE_FILE_PATH = 0x043E;
++ private static final int ID_THUMBNAIL_RESOURCE = 0x040C;
+
+ private int id;
+ private String name;
+ private byte[] data;
+ private int totalLength;
+-
++ static int counter = 0;
+ private ResourceBlock(InputStream stream) throws IOException, TikaException {
++ counter++;
+ // Verify the signature
+ long sig = EndianUtils.readIntBE(stream);
+ if (sig != SIGNATURE) {
+@@ -166,6 +196,9 @@ private ResourceBlock(InputStream stream) throws IOException, TikaException {
+ int nameLen = 0;
+ while (true) {
+ int v = stream.read();
++ if (v < 0) {
++ throw new EOFException();
++ }
+ nameLen++;
+
+ if (v == 0) {
+@@ -182,16 +215,26 @@ private ResourceBlock(InputStream stream) throws IOException, TikaException {
+ }
+
+ int dataLen = EndianUtils.readIntBE(stream);
++ if (dataLen < 0) {
++ throw new TikaException("data length must be >= 0: "+dataLen);
++ }
+ if (dataLen % 2 == 1) {
+ // Data Length is even padded
+ dataLen = dataLen + 1;
+ }
++ //protect against overflow
++ if (Integer.MAX_VALUE-dataLen < nameLen+10) {
++ throw new TikaException("data length is too long:"+dataLen);
++ }
+ totalLength = 4 + 2 + nameLen + 4 + dataLen;
+-
+ // Do we have use for the data segment?
+ if (captureData(id)) {
+- data = new byte[dataLen];
+- IOUtils.readFully(stream, data);
++ if (dataLen > MAX_DATA_LENGTH_BYTES) {
++ throw new TikaException("data length must be < "+MAX_DATA_LENGTH_BYTES+
++ ": "+dataLen);
++ }
++ data = new byte[dataLen];
++ IOUtils.readFully(stream, data);
+ } else {
+ data = new byte[0];
+ IOUtils.skipFully(stream, dataLen);
+diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/image/PSDParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/image/PSDParserTest.java
+index 82ebe7bf92..f748ec5159 100644
+--- a/tika-parsers/src/test/java/org/apache/tika/parser/image/PSDParserTest.java
++++ b/tika-parsers/src/test/java/org/apache/tika/parser/image/PSDParserTest.java
+@@ -19,14 +19,20 @@
+ import static org.junit.Assert.assertEquals;
+
+ import java.io.InputStream;
++import java.nio.file.Files;
++import java.nio.file.Paths;
++import java.util.List;
+
++import org.apache.tika.TikaTest;
++import org.apache.tika.io.TikaInputStream;
+ import org.apache.tika.metadata.Metadata;
++import org.apache.tika.metadata.XMPMM;
+ import org.apache.tika.parser.ParseContext;
+ import org.apache.tika.parser.Parser;
+ import org.junit.Test;
+ import org.xml.sax.helpers.DefaultHandler;
+
+-public class PSDParserTest {
++public class PSDParserTest extends TikaTest {
+
+ private final Parser parser = new PSDParser();
+
+@@ -61,4 +67,11 @@ public void testOddPSD() throws Exception {
+ assertEquals("70", metadata.get(Metadata.IMAGE_LENGTH));
+ assertEquals("8", metadata.get(Metadata.BITS_PER_SAMPLE));
+ }
++
++ @Test
++ public void testXMP() throws Exception {
++ Metadata metadata = getXML("testPSD_xmp.psd").metadata;
++ assertEquals("Adobe Photoshop CC 2014 (Macintosh)", metadata.get(XMPMM.HISTORY_SOFTWARE_AGENT));
++ assertEquals("xmp.iid:63681182-81a0-4035-b4b2-19bea6201c05", metadata.get(XMPMM.HISTORY_EVENT_INSTANCEID));
++ }
+ }
=====================================
debian/patches/series
=====================================
@@ -6,3 +6,4 @@
11-java11-compatibility.patch
12-json-compatibility.patch
13-missing-dependencies.patch
+14-CVE-2020-1950_CVE-2020-1951.patch
View it on GitLab: https://salsa.debian.org/java-team/tika/-/commit/1a8d36ebff54d00c2dda5d27671305ba16680855
--
View it on GitLab: https://salsa.debian.org/java-team/tika/-/commit/1a8d36ebff54d00c2dda5d27671305ba16680855
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/pkg-java-commits/attachments/20210131/3ecd3ae8/attachment.html>
More information about the pkg-java-commits
mailing list