[Git][java-team/tika][master] * Cherrypick upstream commit to address CVE-2020-1950 and CVE-2020-1951

Moritz Muehlenhoff gitlab at salsa.debian.org
Sun Jan 31 21:23:46 GMT 2021



Moritz Muehlenhoff pushed to branch master at Debian Java Maintainers / tika


Commits:
1a8d36eb by Moritz Muehlenhoff at 2021-01-31T22:20:42+01:00
* Cherrypick upstream commit to address CVE-2020-1950 and CVE-2020-1951
 (Closes: #954303, #954302)

- - - - -


3 changed files:

- debian/changelog
- + debian/patches/14-CVE-2020-1950_CVE-2020-1951.patch
- debian/patches/series


Changes:

=====================================
debian/changelog
=====================================
@@ -1,3 +1,10 @@
+tika (1.22-2) unstable; urgency=medium
+
+  * Cherrypick upstream commit to address CVE-2020-1950 and CVE-2020-1951
+   (Closes: #954303, #954302)
+
+ -- Moritz Muehlenhoff <jmm at debian.org>  Sun, 31 Jan 2021 22:18:47 +0100
+
 tika (1.22-1) unstable; urgency=medium
 
   * New upstream release


=====================================
debian/patches/14-CVE-2020-1950_CVE-2020-1951.patch
=====================================
@@ -0,0 +1,211 @@
+From ab8a9ed830ec710a32e4ffdf4989aea3aaea92ef Mon Sep 17 00:00:00 2001
+From: tallison <tallison at apache.org>
+Date: Fri, 21 Feb 2020 13:27:13 -0500
+Subject: [PATCH] TIKA-3050 -- add xmp extraction from PSD files
+
+Fixes CVE-2020-1950 and CVE-2020-1951, adapted to 1.22
+
+diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/image/PSDParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/image/PSDParser.java
+index 4d0510c3fb..b78a366343 100644
+--- a/tika-parsers/src/main/java/org/apache/tika/parser/image/PSDParser.java
++++ b/tika-parsers/src/main/java/org/apache/tika/parser/image/PSDParser.java
+@@ -16,16 +16,23 @@
+  */
+ package org.apache.tika.parser.image;
+ 
++import java.io.ByteArrayInputStream;
++import java.io.EOFException;
+ import java.io.IOException;
+ import java.io.InputStream;
++import java.nio.file.Files;
++import java.nio.file.Paths;
+ import java.util.Arrays;
+ import java.util.Collections;
+ import java.util.HashSet;
+ import java.util.Set;
+ 
+-import org.apache.poi.util.IOUtils;
++import org.apache.commons.io.IOUtils;
+ import org.apache.tika.exception.TikaException;
++import org.apache.tika.extractor.EmbeddedDocumentExtractor;
++import org.apache.tika.extractor.EmbeddedDocumentUtil;
+ import org.apache.tika.io.EndianUtils;
++import org.apache.tika.io.TikaInputStream;
+ import org.apache.tika.metadata.Metadata;
+ import org.apache.tika.metadata.Photoshop;
+ import org.apache.tika.metadata.TIFF;
+@@ -33,6 +40,7 @@
+ import org.apache.tika.mime.MediaType;
+ import org.apache.tika.parser.AbstractParser;
+ import org.apache.tika.parser.ParseContext;
++import org.apache.tika.parser.image.xmp.JempboxExtractor;
+ import org.apache.tika.sax.XHTMLContentHandler;
+ import org.xml.sax.ContentHandler;
+ import org.xml.sax.SAXException;
+@@ -44,6 +52,9 @@
+  * <p/>
+  * Documentation on the file format is available from
+  * http://www.adobe.com/devnet-apps/photoshop/fileformatashtml/PhotoshopFileFormats.htm
++ *
++ * An MIT-licensed python parser with test files is:
++ * https://github.com/psd-tools/psd-tools
+  */
+ public class PSDParser extends AbstractParser {
+ 
+@@ -56,6 +67,9 @@
+             Collections.unmodifiableSet(new HashSet<MediaType>(Arrays.asList(
+                     MediaType.image("vnd.adobe.photoshop"))));
+ 
++    private static final int MAX_DATA_LENGTH_BYTES = 1000000;
++    private static final int MAX_BLOCKS = 10000;
++
+     public Set<MediaType> getSupportedTypes(ParseContext context) {
+         return SUPPORTED_TYPES;
+     }
+@@ -101,19 +115,27 @@ public void parse(
+ 
+         // Colour mode, eg Bitmap or RGB
+         int colorMode = EndianUtils.readUShortBE(stream);
+-        metadata.set(Photoshop.COLOR_MODE, Photoshop._COLOR_MODE_CHOICES_INDEXED[colorMode]);
++        if (colorMode < Photoshop._COLOR_MODE_CHOICES_INDEXED.length) {
++            metadata.set(Photoshop.COLOR_MODE, Photoshop._COLOR_MODE_CHOICES_INDEXED[colorMode]);
++        }
+ 
+         // Next is the Color Mode section
+         // We don't care about this bit
+         long colorModeSectionSize = EndianUtils.readIntBE(stream);
+-        stream.skip(colorModeSectionSize);
++        IOUtils.skipFully(stream, colorModeSectionSize);
+ 
+         // Next is the Image Resources section
+         // Check for certain interesting keys here
+         long imageResourcesSectionSize = EndianUtils.readIntBE(stream);
+         long read = 0;
+-        while (read < imageResourcesSectionSize) {
++        //if something is corrupt about this number, prevent an
++        //infinite loop by only reading 10000 blocks
++        int blocks = 0;
++        while (read < imageResourcesSectionSize && blocks < MAX_BLOCKS) {
+             ResourceBlock rb = new ResourceBlock(stream);
++            if (rb.totalLength <= 0) {
++                //break;
++            }
+             read += rb.totalLength;
+ 
+             // Is it one we can do something useful with?
+@@ -124,8 +146,12 @@ public void parse(
+             } else if (rb.id == ResourceBlock.ID_EXIF_3) {
+                 // TODO Parse the EXIF info via ImageMetadataExtractor
+             } else if (rb.id == ResourceBlock.ID_XMP) {
+-                // TODO Parse the XMP info via ImageMetadataExtractor
++                //if there are multiple xmps in a file, this will
++                //overwrite the data from the earlier xmp
++                JempboxExtractor ex = new JempboxExtractor(metadata);
++                ex.parse(new ByteArrayInputStream(rb.data));
+             }
++            blocks++;
+         }
+ 
+         // Next is the Layer and Mask Info
+@@ -141,17 +167,21 @@ public void parse(
+     private static class ResourceBlock {
+         private static final long SIGNATURE = 0x3842494d; // 8BIM
+         private static final int ID_CAPTION = 0x03F0;
+-        private static final int ID_URL = 0x040B;
+         private static final int ID_EXIF_1 = 0x0422;
+         private static final int ID_EXIF_3 = 0x0423;
+         private static final int ID_XMP = 0x0424;
++        //TODO
++        private static final int ID_URL = 0x040B;
++        private static final int ID_AUTO_SAVE_FILE_PATH = 0x043E;
++        private static final int ID_THUMBNAIL_RESOURCE = 0x040C;
+ 
+         private int id;
+         private String name;
+         private byte[] data;
+         private int totalLength;
+-
++        static int counter = 0;
+         private ResourceBlock(InputStream stream) throws IOException, TikaException {
++            counter++;
+             // Verify the signature
+             long sig = EndianUtils.readIntBE(stream);
+             if (sig != SIGNATURE) {
+@@ -166,6 +196,9 @@ private ResourceBlock(InputStream stream) throws IOException, TikaException {
+             int nameLen = 0;
+             while (true) {
+                 int v = stream.read();
++                if (v < 0) {
++                    throw new EOFException();
++                }
+                 nameLen++;
+ 
+                 if (v == 0) {
+@@ -182,16 +215,26 @@ private ResourceBlock(InputStream stream) throws IOException, TikaException {
+             }
+ 
+             int dataLen = EndianUtils.readIntBE(stream);
++            if (dataLen < 0) {
++                throw new TikaException("data length must be >= 0: "+dataLen);
++            }
+             if (dataLen % 2 == 1) {
+                 // Data Length is even padded
+                 dataLen = dataLen + 1;
+             }
++            //protect against overflow
++            if (Integer.MAX_VALUE-dataLen < nameLen+10) {
++                throw new TikaException("data length is too long:"+dataLen);
++            }
+             totalLength = 4 + 2 + nameLen + 4 + dataLen;
+-
+             // Do we have use for the data segment?
+             if (captureData(id)) {
+-               data = new byte[dataLen];
+-               IOUtils.readFully(stream, data);
++                if (dataLen > MAX_DATA_LENGTH_BYTES) {
++                    throw new TikaException("data length must be < "+MAX_DATA_LENGTH_BYTES+
++                            ": "+dataLen);
++                }
++                data = new byte[dataLen];
++                IOUtils.readFully(stream, data);
+             } else {
+                 data = new byte[0];
+                 IOUtils.skipFully(stream, dataLen);
+diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/image/PSDParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/image/PSDParserTest.java
+index 82ebe7bf92..f748ec5159 100644
+--- a/tika-parsers/src/test/java/org/apache/tika/parser/image/PSDParserTest.java
++++ b/tika-parsers/src/test/java/org/apache/tika/parser/image/PSDParserTest.java
+@@ -19,14 +19,20 @@
+ import static org.junit.Assert.assertEquals;
+ 
+ import java.io.InputStream;
++import java.nio.file.Files;
++import java.nio.file.Paths;
++import java.util.List;
+ 
++import org.apache.tika.TikaTest;
++import org.apache.tika.io.TikaInputStream;
+ import org.apache.tika.metadata.Metadata;
++import org.apache.tika.metadata.XMPMM;
+ import org.apache.tika.parser.ParseContext;
+ import org.apache.tika.parser.Parser;
+ import org.junit.Test;
+ import org.xml.sax.helpers.DefaultHandler;
+ 
+-public class PSDParserTest {
++public class PSDParserTest extends TikaTest {
+ 
+     private final Parser parser = new PSDParser();
+ 
+@@ -61,4 +67,11 @@ public void testOddPSD() throws Exception {
+         assertEquals("70", metadata.get(Metadata.IMAGE_LENGTH));
+         assertEquals("8", metadata.get(Metadata.BITS_PER_SAMPLE));
+     }
++
++    @Test
++    public void testXMP() throws Exception {
++        Metadata metadata = getXML("testPSD_xmp.psd").metadata;
++        assertEquals("Adobe Photoshop CC 2014 (Macintosh)", metadata.get(XMPMM.HISTORY_SOFTWARE_AGENT));
++        assertEquals("xmp.iid:63681182-81a0-4035-b4b2-19bea6201c05", metadata.get(XMPMM.HISTORY_EVENT_INSTANCEID));
++    }
+ }


=====================================
debian/patches/series
=====================================
@@ -6,3 +6,4 @@
 11-java11-compatibility.patch
 12-json-compatibility.patch
 13-missing-dependencies.patch
+14-CVE-2020-1950_CVE-2020-1951.patch



View it on GitLab: https://salsa.debian.org/java-team/tika/-/commit/1a8d36ebff54d00c2dda5d27671305ba16680855

-- 
View it on GitLab: https://salsa.debian.org/java-team/tika/-/commit/1a8d36ebff54d00c2dda5d27671305ba16680855
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/pkg-java-commits/attachments/20210131/3ecd3ae8/attachment.html>


More information about the pkg-java-commits mailing list