[tika] 10/11: Ignore some tika-parser classes...

Markus Koschany apo-guest at moszumanska.debian.org
Tue Dec 1 19:19:50 UTC 2015


This is an automated email from the git hooks/post-receive script.

apo-guest pushed a commit to branch master
in repository tika.

commit 4cc0552258fb048eb7ab4b6104c9c0ed5a88e5aa
Author: Markus Koschany <apo at debian.org>
Date:   Tue Dec 1 19:16:02 2015 +0100

    Ignore some tika-parser classes...
---
 debian/patches/ignore-com.drew.imaging.webp.patch  |  563 +++++++++++
 .../ignore-com.github.junrar.exception.patch       |  125 +++
 .../ignore-com.healthmarketscience.jackcess.patch  |  497 ++++++++++
 debian/patches/ignore-com.pff.patch                |  218 +++++
 debian/patches/ignore-javax.ws.rs.core.patch       |  127 +++
 debian/patches/ignore-opennlp.tools.namefind.patch |  142 +++
 debian/patches/ignore-org.apache.ctakes.patch      | 1014 ++++++++++++++++++++
 .../ignore-org.apache.poi.hslf.usermodel.patch     |  353 +++++++
 .../ignore-org.apache.poi.hssf.extractor.patch     |  112 +++
 debian/patches/ignore-org.json.XML.patch           |  908 ++++++++++++++++++
 .../ignore-package-org.apache.poi.xwpf.patch       |  647 +++++++++++++
 debian/patches/ignore-sqlite-jdbc.patch            |  125 +++
 debian/patches/ignore-ucar.nc2.patch               |  137 +++
 debian/patches/series                              |   13 +
 14 files changed, 4981 insertions(+)

diff --git a/debian/patches/ignore-com.drew.imaging.webp.patch b/debian/patches/ignore-com.drew.imaging.webp.patch
new file mode 100644
index 0000000..f58b1d1
--- /dev/null
+++ b/debian/patches/ignore-com.drew.imaging.webp.patch
@@ -0,0 +1,563 @@
+From: Markus Koschany <apo at debian.org>
+Date: Tue, 1 Dec 2015 19:10:52 +0100
+Subject: ignore com.drew.imaging.webp
+
+---
+ .../tika/parser/image/ImageMetadataExtractor.java  | 548 ---------------------
+ 1 file changed, 548 deletions(-)
+ delete mode 100644 tika-parsers/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java
+
+diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java b/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java
+deleted file mode 100644
+index dd732f4..0000000
+--- a/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java
++++ /dev/null
+@@ -1,548 +0,0 @@
+-/*
+- * Licensed to the Apache Software Foundation (ASF) under one or more
+- * contributor license agreements.  See the NOTICE file distributed with
+- * this work for additional information regarding copyright ownership.
+- * The ASF licenses this file to You under the Apache License, Version 2.0
+- * (the "License"); you may not use this file except in compliance with
+- * the License.  You may obtain a copy of the License at
+- *
+- *     http://www.apache.org/licenses/LICENSE-2.0
+- *
+- * Unless required by applicable law or agreed to in writing, software
+- * distributed under the License is distributed on an "AS IS" BASIS,
+- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+- * See the License for the specific language governing permissions and
+- * limitations under the License.
+- */
+-package org.apache.tika.parser.image;
+-
+-import java.io.File;
+-import java.io.IOException;
+-import java.io.InputStream;
+-import java.text.DecimalFormat;
+-import java.text.DecimalFormatSymbols;
+-import java.text.SimpleDateFormat;
+-import java.util.Date;
+-import java.util.Iterator;
+-import java.util.Locale;
+-import java.util.regex.Matcher;
+-import java.util.regex.Pattern;
+-
+-import com.drew.imaging.jpeg.JpegMetadataReader;
+-import com.drew.imaging.jpeg.JpegProcessingException;
+-import com.drew.imaging.riff.RiffProcessingException;
+-import com.drew.imaging.tiff.TiffMetadataReader;
+-import com.drew.imaging.tiff.TiffProcessingException;
+-import com.drew.imaging.webp.WebpMetadataReader;
+-import com.drew.lang.ByteArrayReader;
+-import com.drew.lang.GeoLocation;
+-import com.drew.lang.Rational;
+-import com.drew.metadata.Directory;
+-import com.drew.metadata.MetadataException;
+-import com.drew.metadata.Tag;
+-import com.drew.metadata.exif.ExifIFD0Directory;
+-import com.drew.metadata.exif.ExifReader;
+-import com.drew.metadata.exif.ExifSubIFDDirectory;
+-import com.drew.metadata.exif.ExifThumbnailDirectory;
+-import com.drew.metadata.exif.GpsDirectory;
+-import com.drew.metadata.iptc.IptcDirectory;
+-import com.drew.metadata.jpeg.JpegCommentDirectory;
+-import com.drew.metadata.jpeg.JpegDirectory;
+-import com.drew.metadata.xmp.XmpReader;
+-import org.apache.poi.util.IOUtils;
+-import org.apache.tika.exception.TikaException;
+-import org.apache.tika.metadata.IPTC;
+-import org.apache.tika.metadata.Metadata;
+-import org.apache.tika.metadata.Property;
+-import org.apache.tika.metadata.TikaCoreProperties;
+-import org.xml.sax.SAXException;
+-
+-/**
+- * Uses the <a href="http://www.drewnoakes.com/code/exif/">Metadata Extractor</a> library
+- * to read EXIF and IPTC image metadata and map to Tika fields.
+- * <p/>
+- * As of 2.4.0 the library supports jpeg and tiff.
+- * As of 2.8.0 the library supports webp.
+- */
+-public class ImageMetadataExtractor {
+-
+-    private static final String GEO_DECIMAL_FORMAT_STRING = "#.######"; // 6 dp seems to be reasonable
+-    private final Metadata metadata;
+-    private DirectoryHandler[] handlers;
+-
+-    /**
+-     * @param metadata to extract to, using default directory handlers
+-     */
+-    public ImageMetadataExtractor(Metadata metadata) {
+-        this(metadata,
+-                new CopyUnknownFieldsHandler(),
+-                new JpegCommentHandler(),
+-                new ExifHandler(),
+-                new DimensionsHandler(),
+-                new GeotagHandler(),
+-                new IptcHandler()
+-        );
+-    }
+-
+-    /**
+-     * @param metadata to extract to
+-     * @param handlers handlers in order, note that handlers may override values from earlier handlers
+-     */
+-    public ImageMetadataExtractor(Metadata metadata, DirectoryHandler... handlers) {
+-        this.metadata = metadata;
+-        this.handlers = handlers;
+-    }
+-
+-    private static String trimPixels(String s) {
+-        //if height/width appears as "100 pixels", trim " pixels"
+-        if (s != null) {
+-            int i = s.lastIndexOf(" pixels");
+-            s = s.substring(0, i);
+-        }
+-        return s;
+-    }
+-
+-    public void parseJpeg(File file)
+-            throws IOException, SAXException, TikaException {
+-        try {
+-            com.drew.metadata.Metadata jpegMetadata = JpegMetadataReader.readMetadata(file);
+-            handle(jpegMetadata);
+-        } catch (JpegProcessingException e) {
+-            throw new TikaException("Can't read JPEG metadata", e);
+-        } catch (MetadataException e) {
+-            throw new TikaException("Can't read JPEG metadata", e);
+-        }
+-    }
+-
+-    public void parseTiff(File file)
+-            throws IOException, SAXException, TikaException {
+-        try {
+-            com.drew.metadata.Metadata tiffMetadata = TiffMetadataReader.readMetadata(file);
+-            handle(tiffMetadata);
+-        } catch (MetadataException e) {
+-            throw new TikaException("Can't read TIFF metadata", e);
+-        } catch (TiffProcessingException e) {
+-            throw new TikaException("Can't read TIFF metadata", e);
+-        }
+-    }
+-
+-    public void parseWebP(File file) throws IOException, TikaException {
+-
+-        try {
+-            com.drew.metadata.Metadata webPMetadata = new com.drew.metadata.Metadata();
+-            webPMetadata = WebpMetadataReader.readMetadata(file);
+-            handle(webPMetadata);
+-        } catch (IOException e) {
+-            throw e;
+-        } catch (RiffProcessingException e) {
+-            throw new TikaException("Can't process Riff data", e);
+-        } catch (MetadataException e) {
+-            throw new TikaException("Can't process Riff data", e);
+-        }
+-    }
+-
+-    public void parseRawExif(InputStream stream, int length, boolean needsExifHeader)
+-            throws IOException, SAXException, TikaException {
+-        byte[] exif;
+-        if (needsExifHeader) {
+-            exif = new byte[length + 6];
+-            exif[0] = (byte) 'E';
+-            exif[1] = (byte) 'x';
+-            exif[2] = (byte) 'i';
+-            exif[3] = (byte) 'f';
+-            IOUtils.readFully(stream, exif, 6, length);
+-        } else {
+-            exif = new byte[length];
+-            IOUtils.readFully(stream, exif, 0, length);
+-        }
+-        parseRawExif(exif);
+-    }
+-
+-    public void parseRawExif(byte[] exifData)
+-            throws IOException, SAXException, TikaException {
+-        com.drew.metadata.Metadata metadata = new com.drew.metadata.Metadata();
+-        ExifReader reader = new ExifReader();
+-        reader.extract(new ByteArrayReader(exifData), metadata, ExifReader.JPEG_SEGMENT_PREAMBLE.length());
+-
+-        try {
+-            handle(metadata);
+-        } catch (MetadataException e) {
+-            throw new TikaException("Can't process the EXIF Data", e);
+-        }
+-    }
+-
+-    public void parseRawXMP(byte[] xmpData)
+-            throws IOException, SAXException, TikaException {
+-        com.drew.metadata.Metadata metadata = new com.drew.metadata.Metadata();
+-        XmpReader reader = new XmpReader();
+-        reader.extract(xmpData, metadata);
+-
+-        try {
+-            handle(metadata);
+-        } catch (MetadataException e) {
+-            throw new TikaException("Can't process the XMP Data", e);
+-        }
+-    }
+-
+-    /**
+-     * Copies extracted tags to tika metadata using registered handlers.
+-     *
+-     * @param metadataExtractor Tag directories from a Metadata Extractor "reader"
+-     * @throws MetadataException This method does not handle exceptions from Metadata Extractor
+-     */
+-    protected void handle(com.drew.metadata.Metadata metadataExtractor)
+-            throws MetadataException {
+-        handle(metadataExtractor.getDirectories().iterator());
+-    }
+-
+-    /**
+-     * Copies extracted tags to tika metadata using registered handlers.
+-     *
+-     * @param directories Metadata Extractor {@link com.drew.metadata.Directory} instances.
+-     * @throws MetadataException This method does not handle exceptions from Metadata Extractor
+-     */
+-    protected void handle(Iterator<Directory> directories) throws MetadataException {
+-        while (directories.hasNext()) {
+-            Directory directory = directories.next();
+-            for (DirectoryHandler handler : handlers) {
+-                if (handler.supports(directory.getClass())) {
+-                    handler.handle(directory, metadata);
+-                }
+-            }
+-        }
+-    }
+-
+-    /**
+-     * Reads one or more type of Metadata Extractor fields.
+-     */
+-    static interface DirectoryHandler {
+-        /**
+-         * @param directoryType A Metadata Extractor directory class
+-         * @return true if the directory type is supported by this handler
+-         */
+-        boolean supports(Class<? extends Directory> directoryType);
+-
+-        /**
+-         * @param directory extracted tags
+-         * @param metadata  current tika metadata
+-         * @throws MetadataException typically field extraction error, aborts all further extraction
+-         */
+-        void handle(Directory directory, Metadata metadata)
+-                throws MetadataException;
+-    }
+-
+-    /**
+-     * Mimics the behavior from TIKA-314 of copying all extracted tags
+-     * to tika metadata using field names from Metadata Extractor.
+-     */
+-    static class CopyAllFieldsHandler implements DirectoryHandler {
+-        public boolean supports(Class<? extends Directory> directoryType) {
+-            return true;
+-        }
+-
+-        public void handle(Directory directory, Metadata metadata)
+-                throws MetadataException {
+-            if (directory.getTags() != null) {
+-                for (Tag tag : directory.getTags()) {
+-                    metadata.set(tag.getTagName(), tag.getDescription());
+-                }
+-            }
+-        }
+-    }
+-
+-    /**
+-     * Copies all fields regardless of directory, if the tag name
+-     * is not identical to a known Metadata field name.
+-     * This leads to more predictable behavior than {@link CopyAllFieldsHandler}.
+-     */
+-    static class CopyUnknownFieldsHandler implements DirectoryHandler {
+-        public boolean supports(Class<? extends Directory> directoryType) {
+-            return true;
+-        }
+-
+-        public void handle(Directory directory, Metadata metadata)
+-                throws MetadataException {
+-            if (directory.getTags() != null) {
+-                for (Tag tag : directory.getTags()) {
+-                    String name = tag.getTagName();
+-                    if (!MetadataFields.isMetadataField(name) && tag.getDescription() != null) {
+-                        String value = tag.getDescription().trim();
+-                        if (Boolean.TRUE.toString().equalsIgnoreCase(value)) {
+-                            value = Boolean.TRUE.toString();
+-                        } else if (Boolean.FALSE.toString().equalsIgnoreCase(value)) {
+-                            value = Boolean.FALSE.toString();
+-                        }
+-                        metadata.set(name, value);
+-                    }
+-                }
+-            }
+-        }
+-    }
+-
+-    /**
+-     * Basic image properties for TIFF and JPEG, at least.
+-     */
+-    static class DimensionsHandler implements DirectoryHandler {
+-        private final Pattern LEADING_NUMBERS = Pattern.compile("(\\d+)\\s*.*");
+-
+-        public boolean supports(Class<? extends Directory> directoryType) {
+-            return directoryType == JpegDirectory.class ||
+-                    directoryType == ExifSubIFDDirectory.class ||
+-                    directoryType == ExifThumbnailDirectory.class ||
+-                    directoryType == ExifIFD0Directory.class;
+-        }
+-
+-        public void handle(Directory directory, Metadata metadata) throws MetadataException {
+-            // The test TIFF has width and height stored as follows according to exiv2
+-            //Exif.Image.ImageWidth                        Short       1  100
+-            //Exif.Image.ImageLength                       Short       1  75
+-            // and the values are found in "Thumbnail Image Width" (and Height) from Metadata Extractor
+-            set(directory, metadata, JpegDirectory.TAG_IMAGE_WIDTH, Metadata.IMAGE_WIDTH);
+-            set(directory, metadata, JpegDirectory.TAG_IMAGE_HEIGHT, Metadata.IMAGE_LENGTH);
+-            // Bits per sample, two methods of extracting, exif overrides jpeg
+-            set(directory, metadata, JpegDirectory.TAG_DATA_PRECISION, Metadata.BITS_PER_SAMPLE);
+-            set(directory, metadata, ExifSubIFDDirectory.TAG_BITS_PER_SAMPLE, Metadata.BITS_PER_SAMPLE);
+-            // Straightforward
+-            set(directory, metadata, ExifSubIFDDirectory.TAG_SAMPLES_PER_PIXEL, Metadata.SAMPLES_PER_PIXEL);
+-        }
+-
+-        private void set(Directory directory, Metadata metadata, int extractTag, Property metadataField) {
+-            if (directory.containsTag(extractTag)) {
+-                Matcher m = LEADING_NUMBERS.matcher(directory.getString(extractTag));
+-                if (m.matches()) {
+-                    metadata.set(metadataField, m.group(1));
+-                }
+-            }
+-        }
+-    }
+-
+-    static class JpegCommentHandler implements DirectoryHandler {
+-        public boolean supports(Class<? extends Directory> directoryType) {
+-            return directoryType == JpegCommentDirectory.class;
+-        }
+-
+-        public void handle(Directory directory, Metadata metadata) throws MetadataException {
+-            if (directory.containsTag(JpegCommentDirectory.TAG_COMMENT)) {
+-                metadata.add(TikaCoreProperties.COMMENTS, directory.getString(JpegCommentDirectory.TAG_COMMENT));
+-            }
+-        }
+-    }
+-
+-    static class ExifHandler implements DirectoryHandler {
+-        // There's a new ExifHandler for each file processed, so this is thread safe
+-        private static final ThreadLocal<SimpleDateFormat> DATE_UNSPECIFIED_TZ = new ThreadLocal<SimpleDateFormat>() {
+-            @Override
+-            protected SimpleDateFormat initialValue() {
+-                return new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss", Locale.US);
+-            }
+-        };
+-
+-        public boolean supports(Class<? extends Directory> directoryType) {
+-            return directoryType == ExifIFD0Directory.class ||
+-                    directoryType == ExifSubIFDDirectory.class;
+-        }
+-
+-        public void handle(Directory directory, Metadata metadata) {
+-            try {
+-                handleDateTags(directory, metadata);
+-                handlePhotoTags(directory, metadata);
+-                handleCommentTags(directory, metadata);
+-            } catch (MetadataException e) {
+-                // ignore date parse errors and proceed with other tags
+-            }
+-        }
+-
+-        /**
+-         * EXIF may contain image description, although with undefined encoding.
+-         * Use IPTC for other annotation fields, and XMP for unicode support.
+-         */
+-        public void handleCommentTags(Directory directory, Metadata metadata) {
+-            if (metadata.get(TikaCoreProperties.DESCRIPTION) == null &&
+-                    directory.containsTag(ExifIFD0Directory.TAG_IMAGE_DESCRIPTION)) {
+-                metadata.set(TikaCoreProperties.DESCRIPTION,
+-                        directory.getString(ExifIFD0Directory.TAG_IMAGE_DESCRIPTION));
+-            }
+-        }
+-
+-        /**
+-         * Maps common TIFF and EXIF tags onto the Tika
+-         * TIFF image metadata namespace.
+-         */
+-        public void handlePhotoTags(Directory directory, Metadata metadata) {
+-            if (directory.containsTag(ExifSubIFDDirectory.TAG_EXPOSURE_TIME)) {
+-                Object exposure = directory.getObject(ExifSubIFDDirectory.TAG_EXPOSURE_TIME);
+-                if (exposure instanceof Rational) {
+-                    metadata.set(Metadata.EXPOSURE_TIME, ((Rational) exposure).doubleValue());
+-                } else {
+-                    metadata.set(Metadata.EXPOSURE_TIME, directory.getString(ExifSubIFDDirectory.TAG_EXPOSURE_TIME));
+-                }
+-            }
+-
+-            if (directory.containsTag(ExifSubIFDDirectory.TAG_FLASH)) {
+-                String flash = directory.getDescription(ExifSubIFDDirectory.TAG_FLASH);
+-                if (flash.contains("Flash fired")) {
+-                    metadata.set(Metadata.FLASH_FIRED, Boolean.TRUE.toString());
+-                } else if (flash.contains("Flash did not fire")) {
+-                    metadata.set(Metadata.FLASH_FIRED, Boolean.FALSE.toString());
+-                } else {
+-                    metadata.set(Metadata.FLASH_FIRED, flash);
+-                }
+-            }
+-
+-            if (directory.containsTag(ExifSubIFDDirectory.TAG_FNUMBER)) {
+-                Object fnumber = directory.getObject(ExifSubIFDDirectory.TAG_FNUMBER);
+-                if (fnumber instanceof Rational) {
+-                    metadata.set(Metadata.F_NUMBER, ((Rational) fnumber).doubleValue());
+-                } else {
+-                    metadata.set(Metadata.F_NUMBER, directory.getString(ExifSubIFDDirectory.TAG_FNUMBER));
+-                }
+-            }
+-
+-            if (directory.containsTag(ExifSubIFDDirectory.TAG_FOCAL_LENGTH)) {
+-                Object length = directory.getObject(ExifSubIFDDirectory.TAG_FOCAL_LENGTH);
+-                if (length instanceof Rational) {
+-                    metadata.set(Metadata.FOCAL_LENGTH, ((Rational) length).doubleValue());
+-                } else {
+-                    metadata.set(Metadata.FOCAL_LENGTH, directory.getString(ExifSubIFDDirectory.TAG_FOCAL_LENGTH));
+-                }
+-            }
+-
+-            if (directory.containsTag(ExifSubIFDDirectory.TAG_ISO_EQUIVALENT)) {
+-                metadata.set(Metadata.ISO_SPEED_RATINGS, directory.getString(ExifSubIFDDirectory.TAG_ISO_EQUIVALENT));
+-            }
+-
+-            if (directory.containsTag(ExifIFD0Directory.TAG_MAKE)) {
+-                metadata.set(Metadata.EQUIPMENT_MAKE, directory.getString(ExifIFD0Directory.TAG_MAKE));
+-            }
+-            if (directory.containsTag(ExifIFD0Directory.TAG_MODEL)) {
+-                metadata.set(Metadata.EQUIPMENT_MODEL, directory.getString(ExifIFD0Directory.TAG_MODEL));
+-            }
+-
+-            if (directory.containsTag(ExifIFD0Directory.TAG_ORIENTATION)) {
+-                Object length = directory.getObject(ExifIFD0Directory.TAG_ORIENTATION);
+-                if (length instanceof Integer) {
+-                    metadata.set(Metadata.ORIENTATION, Integer.toString((Integer) length));
+-                } else {
+-                    metadata.set(Metadata.ORIENTATION, directory.getString(ExifIFD0Directory.TAG_ORIENTATION));
+-                }
+-            }
+-
+-            if (directory.containsTag(ExifIFD0Directory.TAG_SOFTWARE)) {
+-                metadata.set(Metadata.SOFTWARE, directory.getString(ExifIFD0Directory.TAG_SOFTWARE));
+-            }
+-
+-            if (directory.containsTag(ExifIFD0Directory.TAG_X_RESOLUTION)) {
+-                Object resolution = directory.getObject(ExifIFD0Directory.TAG_X_RESOLUTION);
+-                if (resolution instanceof Rational) {
+-                    metadata.set(Metadata.RESOLUTION_HORIZONTAL, ((Rational) resolution).doubleValue());
+-                } else {
+-                    metadata.set(Metadata.RESOLUTION_HORIZONTAL, directory.getString(ExifIFD0Directory.TAG_X_RESOLUTION));
+-                }
+-            }
+-            if (directory.containsTag(ExifIFD0Directory.TAG_Y_RESOLUTION)) {
+-                Object resolution = directory.getObject(ExifIFD0Directory.TAG_Y_RESOLUTION);
+-                if (resolution instanceof Rational) {
+-                    metadata.set(Metadata.RESOLUTION_VERTICAL, ((Rational) resolution).doubleValue());
+-                } else {
+-                    metadata.set(Metadata.RESOLUTION_VERTICAL, directory.getString(ExifIFD0Directory.TAG_Y_RESOLUTION));
+-                }
+-            }
+-            if (directory.containsTag(ExifIFD0Directory.TAG_RESOLUTION_UNIT)) {
+-                metadata.set(Metadata.RESOLUTION_UNIT, directory.getDescription(ExifIFD0Directory.TAG_RESOLUTION_UNIT));
+-            }
+-            if (directory.containsTag(ExifThumbnailDirectory.TAG_IMAGE_WIDTH)) {
+-                metadata.set(Metadata.IMAGE_WIDTH,
+-                        trimPixels(directory.getDescription(ExifThumbnailDirectory.TAG_IMAGE_WIDTH)));
+-            }
+-            if (directory.containsTag(ExifThumbnailDirectory.TAG_IMAGE_HEIGHT)) {
+-                metadata.set(Metadata.IMAGE_LENGTH,
+-                        trimPixels(directory.getDescription(ExifThumbnailDirectory.TAG_IMAGE_HEIGHT)));
+-            }
+-        }
+-
+-        /**
+-         * Maps exif dates to metadata fields.
+-         */
+-        public void handleDateTags(Directory directory, Metadata metadata)
+-                throws MetadataException {
+-            // Date/Time Original overrides value from ExifDirectory.TAG_DATETIME
+-            Date original = null;
+-            if (directory.containsTag(ExifSubIFDDirectory.TAG_DATETIME_ORIGINAL)) {
+-                original = directory.getDate(ExifSubIFDDirectory.TAG_DATETIME_ORIGINAL);
+-                // Unless we have GPS time we don't know the time zone so date must be set
+-                // as ISO 8601 datetime without timezone suffix (no Z or +/-)
+-                if (original != null) {
+-                    String datetimeNoTimeZone = DATE_UNSPECIFIED_TZ.get().format(original); // Same time zone as Metadata Extractor uses
+-                    metadata.set(TikaCoreProperties.CREATED, datetimeNoTimeZone);
+-                    metadata.set(Metadata.ORIGINAL_DATE, datetimeNoTimeZone);
+-                }
+-            }
+-            if (directory.containsTag(ExifIFD0Directory.TAG_DATETIME)) {
+-                Date datetime = directory.getDate(ExifIFD0Directory.TAG_DATETIME);
+-                if (datetime != null) {
+-                    String datetimeNoTimeZone = DATE_UNSPECIFIED_TZ.get().format(datetime);
+-                    metadata.set(TikaCoreProperties.MODIFIED, datetimeNoTimeZone);
+-                    // If Date/Time Original does not exist this might be creation date
+-                    if (metadata.get(TikaCoreProperties.CREATED) == null) {
+-                        metadata.set(TikaCoreProperties.CREATED, datetimeNoTimeZone);
+-                    }
+-                }
+-            }
+-        }
+-    }
+-
+-    /**
+-     * Reads image comments, originally TIKA-472.
+-     * Metadata Extractor does not read XMP so we need to use the values from Iptc or EXIF
+-     */
+-    static class IptcHandler implements DirectoryHandler {
+-        public boolean supports(Class<? extends Directory> directoryType) {
+-            return directoryType == IptcDirectory.class;
+-        }
+-
+-        public void handle(Directory directory, Metadata metadata)
+-                throws MetadataException {
+-            if (directory.containsTag(IptcDirectory.TAG_KEYWORDS)) {
+-                String[] keywords = directory.getStringArray(IptcDirectory.TAG_KEYWORDS);
+-                for (String k : keywords) {
+-                    metadata.add(TikaCoreProperties.KEYWORDS, k);
+-                }
+-            }
+-            if (directory.containsTag(IptcDirectory.TAG_HEADLINE)) {
+-                metadata.set(TikaCoreProperties.TITLE, directory.getString(IptcDirectory.TAG_HEADLINE));
+-            } else if (directory.containsTag(IptcDirectory.TAG_OBJECT_NAME)) {
+-                metadata.set(TikaCoreProperties.TITLE, directory.getString(IptcDirectory.TAG_OBJECT_NAME));
+-            }
+-            if (directory.containsTag(IptcDirectory.TAG_BY_LINE)) {
+-                metadata.set(TikaCoreProperties.CREATOR, directory.getString(IptcDirectory.TAG_BY_LINE));
+-                metadata.set(IPTC.CREATOR, directory.getString(IptcDirectory.TAG_BY_LINE));
+-            }
+-            if (directory.containsTag(IptcDirectory.TAG_CAPTION)) {
+-                metadata.set(TikaCoreProperties.DESCRIPTION,
+-                        // Looks like metadata extractor returns IPTC newlines as a single carriage return,
+-                        // but the exiv2 command does not so we change to line feed here because that is less surprising to users                        
+-                        directory.getString(IptcDirectory.TAG_CAPTION).replaceAll("\r\n?", "\n"));
+-            }
+-        }
+-    }
+-
+-    /**
+-     * Maps EXIF Geo Tags onto the Tika Geo metadata namespace.
+-     */
+-    static class GeotagHandler implements DirectoryHandler {
+-        public boolean supports(Class<? extends Directory> directoryType) {
+-            return directoryType == GpsDirectory.class;
+-        }
+-
+-        public void handle(Directory directory, Metadata metadata) throws MetadataException {
+-            GeoLocation geoLocation = ((GpsDirectory) directory).getGeoLocation();
+-            if (geoLocation != null) {
+-                DecimalFormat geoDecimalFormat = new DecimalFormat(GEO_DECIMAL_FORMAT_STRING,
+-                        new DecimalFormatSymbols(Locale.ENGLISH));
+-                metadata.set(TikaCoreProperties.LATITUDE, geoDecimalFormat.format(geoLocation.getLatitude()));
+-                metadata.set(TikaCoreProperties.LONGITUDE, geoDecimalFormat.format(geoLocation.getLongitude()));
+-            }
+-        }
+-    }
+-
+-}
diff --git a/debian/patches/ignore-com.github.junrar.exception.patch b/debian/patches/ignore-com.github.junrar.exception.patch
new file mode 100644
index 0000000..8cf7cd3
--- /dev/null
+++ b/debian/patches/ignore-com.github.junrar.exception.patch
@@ -0,0 +1,125 @@
+From: Markus Koschany <apo at debian.org>
+Date: Tue, 1 Dec 2015 19:13:43 +0100
+Subject: ignore com.github.junrar.exception
+
+---
+ .../java/org/apache/tika/parser/pkg/RarParser.java | 110 ---------------------
+ 1 file changed, 110 deletions(-)
+ delete mode 100644 tika-parsers/src/main/java/org/apache/tika/parser/pkg/RarParser.java
+
+diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/pkg/RarParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/pkg/RarParser.java
+deleted file mode 100644
+index 99508b0..0000000
+--- a/tika-parsers/src/main/java/org/apache/tika/parser/pkg/RarParser.java
++++ /dev/null
+@@ -1,110 +0,0 @@
+-/*
+- * Licensed to the Apache Software Foundation (ASF) under one or more
+- * contributor license agreements.  See the NOTICE file distributed with
+- * this work for additional information regarding copyright ownership.
+- * The ASF licenses this file to You under the Apache License, Version 2.0
+- * (the "License"); you may not use this file except in compliance with
+- * the License.  You may obtain a copy of the License at
+- *
+- *     http://www.apache.org/licenses/LICENSE-2.0
+- *
+- * Unless required by applicable law or agreed to in writing, software
+- * distributed under the License is distributed on an "AS IS" BASIS,
+- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+- * See the License for the specific language governing permissions and
+- * limitations under the License.
+- */
+-package org.apache.tika.parser.pkg;
+-
+-import java.io.IOException;
+-import java.io.InputStream;
+-import java.util.Collections;
+-import java.util.Set;
+-
+-import org.apache.tika.exception.EncryptedDocumentException;
+-import org.apache.tika.exception.TikaException;
+-import org.apache.tika.extractor.EmbeddedDocumentExtractor;
+-import org.apache.tika.extractor.ParsingEmbeddedDocumentExtractor;
+-import org.apache.tika.io.TemporaryResources;
+-import org.apache.tika.io.TikaInputStream;
+-import org.apache.tika.metadata.Metadata;
+-import org.apache.tika.mime.MediaType;
+-import org.apache.tika.parser.AbstractParser;
+-import org.apache.tika.parser.ParseContext;
+-import org.apache.tika.sax.XHTMLContentHandler;
+-import org.xml.sax.ContentHandler;
+-import org.xml.sax.SAXException;
+-
+-import com.github.junrar.Archive;
+-import com.github.junrar.exception.RarException;
+-import com.github.junrar.rarfile.FileHeader;
+-
+-/**
+- * Parser for Rar files.
+- */
+-public class RarParser extends AbstractParser {
+-    private static final long serialVersionUID = 6157727985054451501L;
+-    
+-    private static final Set<MediaType> SUPPORTED_TYPES = Collections
+-            .singleton(MediaType.application("x-rar-compressed"));
+-
+-    @Override
+-    public Set<MediaType> getSupportedTypes(ParseContext arg0) {
+-        return SUPPORTED_TYPES;
+-    }
+-
+-    @Override
+-    public void parse(InputStream stream, ContentHandler handler,
+-            Metadata metadata, ParseContext context) throws IOException,
+-            SAXException, TikaException {
+-
+-        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
+-        xhtml.startDocument();
+-
+-        EmbeddedDocumentExtractor extractor = context.get(
+-                EmbeddedDocumentExtractor.class,
+-                new ParsingEmbeddedDocumentExtractor(context));
+-
+-        Archive rar = null;
+-        try (TemporaryResources tmp = new TemporaryResources()) {
+-            TikaInputStream tis = TikaInputStream.get(stream, tmp);
+-            rar = new Archive(tis.getFile());
+-
+-            if (rar.isEncrypted()) {
+-                throw new EncryptedDocumentException();
+-            }
+-
+-            //Without this BodyContentHandler does not work
+-            xhtml.element("div", " ");
+-
+-            FileHeader header = rar.nextFileHeader();
+-            while (header != null && !Thread.currentThread().isInterrupted()) {
+-                if (!header.isDirectory()) {
+-                    try (InputStream subFile = rar.getInputStream(header)) {
+-                        Metadata entrydata = PackageParser.handleEntryMetadata(
+-                                "".equals(header.getFileNameW()) ? header.getFileNameString() : header.getFileNameW(),
+-                                header.getCTime(), header.getMTime(),
+-                                header.getFullUnpackSize(),
+-                                xhtml
+-                        );
+-
+-                        if (extractor.shouldParseEmbedded(entrydata)) {
+-                            extractor.parseEmbedded(subFile, handler, entrydata, true);
+-                        }
+-                    }
+-                }
+-
+-                header = rar.nextFileHeader();
+-            }
+-
+-        } catch (RarException e) {
+-            throw new TikaException("RarParser Exception", e);
+-        } finally {
+-            if (rar != null)
+-                rar.close();
+-
+-        }
+-
+-        xhtml.endDocument();
+-    }
+-}
diff --git a/debian/patches/ignore-com.healthmarketscience.jackcess.patch b/debian/patches/ignore-com.healthmarketscience.jackcess.patch
new file mode 100644
index 0000000..f3d771b
--- /dev/null
+++ b/debian/patches/ignore-com.healthmarketscience.jackcess.patch
@@ -0,0 +1,497 @@
+From: Markus Koschany <apo at debian.org>
+Date: Tue, 1 Dec 2015 19:14:07 +0100
+Subject: ignore com.healthmarketscience.jackcess
+
+---
+ .../tika/parser/microsoft/JackcessExtractor.java   | 345 ---------------------
+ .../tika/parser/microsoft/JackcessParser.java      | 129 --------
+ 2 files changed, 474 deletions(-)
+ delete mode 100644 tika-parsers/src/main/java/org/apache/tika/parser/microsoft/JackcessExtractor.java
+ delete mode 100644 tika-parsers/src/main/java/org/apache/tika/parser/microsoft/JackcessParser.java
+
+diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/JackcessExtractor.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/JackcessExtractor.java
+deleted file mode 100644
+index e224d54..0000000
+--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/JackcessExtractor.java
++++ /dev/null
+@@ -1,345 +0,0 @@
+-/*
+- * Licensed to the Apache Software Foundation (ASF) under one or more
+- * contributor license agreements.  See the NOTICE file distributed with
+- * this work for additional information regarding copyright ownership.
+- * The ASF licenses this file to You under the Apache License, Version 2.0
+- * (the "License"); you may not use this file except in compliance with
+- * the License.  You may obtain a copy of the License at
+- *
+- *     http://www.apache.org/licenses/LICENSE-2.0
+- *
+- * Unless required by applicable law or agreed to in writing, software
+- * distributed under the License is distributed on an "AS IS" BASIS,
+- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+- * See the License for the specific language governing permissions and
+- * limitations under the License.
+- */
+-
+-package org.apache.tika.parser.microsoft;
+-
+-
+-import static java.nio.charset.StandardCharsets.UTF_8;
+-
+-import java.io.ByteArrayInputStream;
+-import java.io.IOException;
+-import java.math.BigDecimal;
+-import java.text.DateFormat;
+-import java.text.NumberFormat;
+-import java.util.Date;
+-import java.util.HashSet;
+-import java.util.Iterator;
+-import java.util.List;
+-import java.util.Locale;
+-import java.util.Set;
+-
+-import com.healthmarketscience.jackcess.Column;
+-import com.healthmarketscience.jackcess.DataType;
+-import com.healthmarketscience.jackcess.Database;
+-import com.healthmarketscience.jackcess.PropertyMap;
+-import com.healthmarketscience.jackcess.Row;
+-import com.healthmarketscience.jackcess.Table;
+-import com.healthmarketscience.jackcess.query.Query;
+-import com.healthmarketscience.jackcess.util.OleBlob;
+-import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
+-import org.apache.tika.exception.TikaException;
+-import org.apache.tika.io.TikaInputStream;
+-import org.apache.tika.metadata.Metadata;
+-import org.apache.tika.metadata.OfficeOpenXMLExtended;
+-import org.apache.tika.metadata.TikaCoreProperties;
+-import org.apache.tika.parser.ParseContext;
+-import org.apache.tika.parser.html.HtmlParser;
+-import org.apache.tika.sax.BodyContentHandler;
+-import org.apache.tika.sax.XHTMLContentHandler;
+-import org.xml.sax.SAXException;
+-
+-/**
+- * Internal class.  Needs to be instantiated for each parse because of
+- * the lack of thread safety with the dateTimeFormatter
+- */
+-class JackcessExtractor extends AbstractPOIFSExtractor {
+-
+-    final static String TITLE_PROP_KEY = "Title";
+-    final static String AUTHOR_PROP_KEY = "Author";
+-    final static String COMPANY_PROP_KEY = "Company";
+-
+-    final static String TEXT_FORMAT_KEY = "TextFormat";
+-    final static String CURRENCY_FORMAT_KEY = "Format";
+-    final static byte TEXT_FORMAT = 0;
+-    final static byte RICH_TEXT_FORMAT = 1;
+-    final static ParseContext EMPTY_PARSE_CONTEXT = new ParseContext();
+-
+-    final NumberFormat currencyFormatter;
+-    final DateFormat shortDateTimeFormatter;
+-
+-    final HtmlParser htmlParser = new HtmlParser();
+-
+-    protected JackcessExtractor(ParseContext context, Locale locale) {
+-        super(context);
+-        currencyFormatter = NumberFormat.getCurrencyInstance(locale);
+-        shortDateTimeFormatter = DateFormat.getDateInstance(DateFormat.SHORT, locale);
+-    }
+-
+-    public void parse(Database db, XHTMLContentHandler xhtml, Metadata metadata) throws IOException, SAXException, TikaException {
+-
+-
+-        String pw = db.getDatabasePassword();
+-        if (pw != null) {
+-            metadata.set(JackcessParser.MDB_PW, pw);
+-        }
+-
+-        PropertyMap dbp = db.getDatabaseProperties();
+-        for (PropertyMap.Property p : dbp) {
+-            metadata.add(JackcessParser.MDB_PROPERTY_PREFIX + p.getName(),
+-                    toString(p.getValue(), p.getType()));
+-        }
+-
+-        PropertyMap up = db.getUserDefinedProperties();
+-        for (PropertyMap.Property p : up) {
+-            metadata.add(JackcessParser.USER_DEFINED_PROPERTY_PREFIX+ p.getName(),
+-                    toString(p.getValue(), p.getType()));
+-        }
+-
+-        Set<String> found = new HashSet<>();
+-        PropertyMap summaryProperties = db.getSummaryProperties();
+-        if (summaryProperties != null) {
+-            //try to get core properties
+-            PropertyMap.Property title = summaryProperties.get(TITLE_PROP_KEY);
+-            if (title != null) {
+-                metadata.set(TikaCoreProperties.TITLE, toString(title.getValue(), title.getType()));
+-                found.add(title.getName());
+-            }
+-            PropertyMap.Property author = summaryProperties.get(AUTHOR_PROP_KEY);
+-            if (author != null && author.getValue() != null) {
+-                String authorString = toString(author.getValue(), author.getType());
+-                SummaryExtractor.addMulti(metadata, TikaCoreProperties.CREATOR, authorString);
+-                found.add(author.getName());
+-            }
+-            PropertyMap.Property company = summaryProperties.get(COMPANY_PROP_KEY);
+-            if (company != null) {
+-                metadata.set(OfficeOpenXMLExtended.COMPANY, toString(company.getValue(), company.getType()));
+-                found.add(company.getName());
+-            }
+-
+-            for (PropertyMap.Property p : db.getSummaryProperties()) {
+-                if (! found.contains(p.getName())) {
+-                    metadata.add(JackcessParser.SUMMARY_PROPERTY_PREFIX + p.getName(),
+-                            toString(p.getValue(), p.getType()));
+-                }
+-            }
+-
+-        }
+-
+-        Iterator<Table> it = db.newIterable().
+-                setIncludeLinkedTables(false).
+-                setIncludeSystemTables(false).iterator();
+-
+-        while (it.hasNext()) {
+-            Table table = it.next();
+-            String tableName = table.getName();
+-            List<? extends Column> columns = table.getColumns();
+-            xhtml.startElement("table", "name", tableName);
+-            addHeaders(columns, xhtml);
+-            xhtml.startElement("tbody");
+-
+-            Row r = table.getNextRow();
+-
+-            while (r != null) {
+-                xhtml.startElement("tr");
+-                for (Column c : columns) {
+-                    handleCell(r, c, xhtml);
+-                }
+-                xhtml.endElement("tr");
+-                r = table.getNextRow();
+-            }
+-            xhtml.endElement("tbody");
+-            xhtml.endElement("table");
+-        }
+-
+-        for (Query q : db.getQueries()) {
+-            xhtml.startElement("div", "type", "sqlQuery");
+-            xhtml.characters(q.toSQLString());
+-            xhtml.endElement("div");
+-        }
+-    }
+-
+-    private void addHeaders(List<? extends Column> columns, XHTMLContentHandler xhtml) throws SAXException {
+-        xhtml.startElement("thead");
+-        xhtml.startElement("tr");
+-        for (Column c : columns) {
+-            xhtml.startElement("th");
+-            xhtml.characters(c.getName());
+-            xhtml.endElement("th");
+-        }
+-        xhtml.endElement("tr");
+-        xhtml.endElement("thead");
+-
+-    }
+-
+-    private void handleCell(Row r, Column c, XHTMLContentHandler handler)
+-            throws SAXException, IOException, TikaException {
+-
+-        handler.startElement("td");
+-        if (c.getType().equals(DataType.OLE)) {
+-            handleOLE(r, c.getName(), handler);
+-        } else if (c.getType().equals(DataType.BINARY)) {
+-            Object obj = r.get(c.getName());
+-            if (obj != null) {
+-                byte[] bytes = (byte[])obj;
+-                handleEmbeddedResource(
+-                        TikaInputStream.get(bytes),
+-                        null,//filename
+-                        null,//relationshipId
+-                        null,//mediatype
+-                        handler, false);
+-            }
+-        } else {
+-            Object obj = r.get(c.getName());
+-            String v = toString(obj, c.getType());
+-            if (isRichText(c)) {
+-                BodyContentHandler h = new BodyContentHandler();
+-                Metadata m = new Metadata();
+-                m.set(Metadata.CONTENT_TYPE, "text/html; charset=UTF-8");
+-                try {
+-                    htmlParser.parse(new ByteArrayInputStream(v.getBytes(UTF_8)),
+-                            h,
+-                           m, EMPTY_PARSE_CONTEXT);
+-                    handler.characters(h.toString());
+-                } catch (SAXException e) {
+-                    //if something went wrong in htmlparser, just append the characters
+-                    handler.characters(v);
+-                }
+-            } else {
+-                handler.characters(v);
+-            }
+-        }
+-        handler.endElement("td");
+-    }
+-
+-    private boolean isRichText(Column c) throws IOException {
+-
+-        if (c == null) {
+-            return false;
+-        }
+-
+-        PropertyMap m = c.getProperties();
+-        if (m == null) {
+-            return false;
+-        }
+-        if (c.getType() == null || ! c.getType().equals(DataType.MEMO)) {
+-            return false;
+-        }
+-        Object b = m.getValue(TEXT_FORMAT_KEY);
+-        if (b instanceof Byte) {
+-            if (((Byte)b).byteValue() == RICH_TEXT_FORMAT) {
+-                return true;
+-            }
+-        }
+-        return false;
+-    }
+-
+-    private String toString(Object value, DataType type) {
+-        if (value == null) {
+-            return "";
+-        }
+-        if (type == null) {
+-            //this shouldn't happen
+-            return value.toString();
+-        }
+-        switch (type) {
+-            case LONG:
+-                return Integer.toString((Integer)value);
+-            case TEXT:
+-                return (String)value;
+-            case MONEY:
+-                //TODO: consider getting parsing "Format" field from
+-                //field properties.
+-                return formatCurrency(((BigDecimal)value).doubleValue(), type);
+-            case SHORT_DATE_TIME:
+-                return formatShortDateTime((Date)value);
+-            case BOOLEAN:
+-                return Boolean.toString((Boolean) value);
+-            case MEMO:
+-                return (String)value;
+-            case INT:
+-                return Short.toString((Short)value);
+-            case DOUBLE:
+-                return Double.toString((Double)value);
+-            case FLOAT:
+-                return Float.toString((Float)value);
+-            case NUMERIC:
+-                return value.toString();
+-            case BYTE:
+-                return Byte.toString((Byte)value);
+-            case GUID:
+-                return value.toString();
+-            case COMPLEX_TYPE: //skip all these
+-            case UNKNOWN_0D:
+-            case UNKNOWN_11:
+-            case UNSUPPORTED_FIXEDLEN:
+-            case UNSUPPORTED_VARLEN:
+-            default:
+-                return "";
+-
+-        }
+-    }
+-
+-    private void handleOLE(Row row, String cName, XHTMLContentHandler xhtml) throws IOException, SAXException, TikaException {
+-        OleBlob blob = row.getBlob(cName);
+-        //lifted shamelessly from Jackcess's OleBlobTest
+-        if (blob == null)
+-            return;
+-
+-        OleBlob.Content content = blob.getContent();
+-        if (content == null)
+-            return;
+-
+-        switch (content.getType()) {
+-            case LINK:
+-                xhtml.characters(((OleBlob.LinkContent) content).getLinkPath());
+-                break;
+-            case SIMPLE_PACKAGE:
+-                OleBlob.SimplePackageContent spc = (OleBlob.SimplePackageContent) content;
+-
+-                handleEmbeddedResource(
+-                        TikaInputStream.get(spc.getStream()),
+-                        spc.getFileName(),//filename
+-                        null,//relationshipId
+-                        spc.getTypeName(),//mediatype
+-                        xhtml, false);
+-                break;
+-            case OTHER:
+-                OleBlob.OtherContent oc = (OleBlob.OtherContent) content;
+-                handleEmbeddedResource(
+-                        TikaInputStream.get(oc.getStream()),
+-                        null,//filename
+-                        null,//relationshipId
+-                        oc.getTypeName(),//mediatype
+-                        xhtml, false);
+-                break;
+-            case COMPOUND_STORAGE:
+-                OleBlob.CompoundContent cc = (OleBlob.CompoundContent) content;
+-                handleCompoundContent(cc, xhtml);
+-                break;
+-        }
+-    }
+-
+-    private void handleCompoundContent(OleBlob.CompoundContent cc, XHTMLContentHandler xhtml) throws IOException, SAXException, TikaException {
+-        NPOIFSFileSystem nfs = new NPOIFSFileSystem(cc.getStream());
+-        handleEmbeddedOfficeDoc(nfs.getRoot(), xhtml);
+-    }
+-
+-    String formatCurrency(Double d, DataType type) {
+-        if (d == null) {
+-            return "";
+-        }
+-        return currencyFormatter.format(d);
+-    }
+-
+-    String formatShortDateTime(Date d) {
+-        if (d == null) {
+-            return "";
+-        }
+-        return shortDateTimeFormatter.format(d);
+-    }
+-}
+-
+diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/JackcessParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/JackcessParser.java
+deleted file mode 100644
+index 9704fbb..0000000
+--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/JackcessParser.java
++++ /dev/null
+@@ -1,129 +0,0 @@
+-/*
+- * Licensed to the Apache Software Foundation (ASF) under one or more
+- * contributor license agreements.  See the NOTICE file distributed with
+- * this work for additional information regarding copyright ownership.
+- * The ASF licenses this file to You under the Apache License, Version 2.0
+- * (the "License"); you may not use this file except in compliance with
+- * the License.  You may obtain a copy of the License at
+- *
+- *     http://www.apache.org/licenses/LICENSE-2.0
+- *
+- * Unless required by applicable law or agreed to in writing, software
+- * distributed under the License is distributed on an "AS IS" BASIS,
+- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+- * See the License for the specific language governing permissions and
+- * limitations under the License.
+- */
+-
+-package org.apache.tika.parser.microsoft;
+-
+-
+-import java.io.IOException;
+-import java.io.InputStream;
+-import java.util.Collections;
+-import java.util.Locale;
+-import java.util.Set;
+-
+-import com.healthmarketscience.jackcess.CryptCodecProvider;
+-import com.healthmarketscience.jackcess.Database;
+-import com.healthmarketscience.jackcess.DatabaseBuilder;
+-import com.healthmarketscience.jackcess.util.LinkResolver;
+-import org.apache.tika.exception.EncryptedDocumentException;
+-import org.apache.tika.exception.TikaException;
+-import org.apache.tika.io.TikaInputStream;
+-import org.apache.tika.metadata.Metadata;
+-import org.apache.tika.metadata.Property;
+-import org.apache.tika.mime.MediaType;
+-import org.apache.tika.parser.AbstractParser;
+-import org.apache.tika.parser.ParseContext;
+-import org.apache.tika.parser.PasswordProvider;
+-import org.apache.tika.sax.XHTMLContentHandler;
+-import org.xml.sax.ContentHandler;
+-import org.xml.sax.SAXException;
+-
+-/**
+- * Parser that handles Microsoft Access files via
+- * <a href="http://jackcess.sourceforge.net/>Jackcess</a>
+- * <p>
+- * Many, many thanks to LexisNexis®/Health Market Science (HMS), Brian O'Neill,
+- * and James Ahlborn for relicensing Jackcess to Apache v2.0!
+- */
+-public class JackcessParser extends AbstractParser {
+-
+-    public static final String SUMMARY_PROPERTY_PREFIX = "MDB_SUMMARY_PROP" + Metadata.NAMESPACE_PREFIX_DELIMITER;
+-    public static String MDB_PROPERTY_PREFIX = "MDB_PROP" + Metadata.NAMESPACE_PREFIX_DELIMITER;
+-    public static String USER_DEFINED_PROPERTY_PREFIX = "MDB_USER_PROP" + Metadata.NAMESPACE_PREFIX_DELIMITER;
+-    public static Property MDB_PW = Property.externalText("Password");
+-    private final static LinkResolver IGNORE_LINK_RESOLVER = new IgnoreLinkResolver();
+-
+-    //TODO: figure out how to get this info
+-    // public static Property LINKED_DATABASES = Property.externalTextBag("LinkedDatabases");
+-
+-    private static final long serialVersionUID = -752276948656079347L;
+-
+-    private static final MediaType MEDIA_TYPE = MediaType.application("x-msaccess");
+-
+-    private static final Set<MediaType> SUPPORTED_TYPES = Collections.singleton(MEDIA_TYPE);
+-
+-    private Locale locale = Locale.ROOT;
+-
+-    @Override
+-    public Set<MediaType> getSupportedTypes(ParseContext context) {
+-        return SUPPORTED_TYPES;
+-    }
+-
+-    @Override
+-    public void parse(InputStream stream, ContentHandler handler, Metadata metadata,
+-                      ParseContext context) throws IOException, SAXException, TikaException {
+-        TikaInputStream tis = TikaInputStream.get(stream);
+-        Database db = null;
+-        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
+-        xhtml.startDocument();
+-
+-        String password = null;
+-        PasswordProvider passwordProvider = context.get(PasswordProvider.class);
+-        if (passwordProvider != null) {
+-            password = passwordProvider.getPassword(metadata);
+-        }
+-        try {
+-            if (password == null) {
+-                //do this to ensure encryption/wrong password exception vs. more generic
+-                //"need right codec" error message.
+-                db = new DatabaseBuilder(tis.getFile())
+-                        .setCodecProvider(new CryptCodecProvider())
+-                        .setReadOnly(true).open();
+-            } else {
+-                db = new DatabaseBuilder(tis.getFile())
+-                        .setCodecProvider(new CryptCodecProvider(password))
+-                        .setReadOnly(true).open();
+-            }
+-            db.setLinkResolver(IGNORE_LINK_RESOLVER);//just in case
+-            JackcessExtractor ex = new JackcessExtractor(context, locale);
+-            ex.parse(db, xhtml, metadata);
+-        } catch (IllegalStateException e) {
+-            if (e.getMessage() != null && e.getMessage().contains("Incorrect password")) {
+-                throw new EncryptedDocumentException(e);
+-            }
+-            throw e;
+-        } finally {
+-            if (db != null) {
+-                try {
+-                    db.close();
+-                } catch (IOException e) {
+-                    //swallow = silent close
+-                }
+-            }
+-        }
+-        xhtml.endDocument();
+-    }
+-
+-    private static final class IgnoreLinkResolver implements LinkResolver {
+-        //If links are resolved, Jackcess might try to open and process
+-        //any file on the current system that is specified as a linked db.
+-        //This could be a nasty security issue.
+-        @Override
+-        public Database resolveLinkedDatabase(Database database, String s) throws IOException {
+-            throw new AssertionError("DO NOT ALLOW RESOLVING OF LINKS!!!");
+-        }
+-    }
+-}
diff --git a/debian/patches/ignore-com.pff.patch b/debian/patches/ignore-com.pff.patch
new file mode 100644
index 0000000..77d7a95
--- /dev/null
+++ b/debian/patches/ignore-com.pff.patch
@@ -0,0 +1,218 @@
+From: Markus Koschany <apo at debian.org>
+Date: Tue, 1 Dec 2015 19:08:44 +0100
+Subject: ignore com.pff
+
+---
+ .../apache/tika/parser/mbox/OutlookPSTParser.java  | 203 ---------------------
+ 1 file changed, 203 deletions(-)
+ delete mode 100644 tika-parsers/src/main/java/org/apache/tika/parser/mbox/OutlookPSTParser.java
+
+diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/mbox/OutlookPSTParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/mbox/OutlookPSTParser.java
+deleted file mode 100644
+index 5883bd5..0000000
+--- a/tika-parsers/src/main/java/org/apache/tika/parser/mbox/OutlookPSTParser.java
++++ /dev/null
+@@ -1,203 +0,0 @@
+-/*
+- * Licensed to the Apache Software Foundation (ASF) under one or more
+- * contributor license agreements.  See the NOTICE file distributed with
+- * this work for additional information regarding copyright ownership.
+- * The ASF licenses this file to You under the Apache License, Version 2.0
+- * (the "License"); you may not use this file except in compliance with
+- * the License.  You may obtain a copy of the License at
+- *
+- *     http://www.apache.org/licenses/LICENSE-2.0
+- *
+- * Unless required by applicable law or agreed to in writing, software
+- * distributed under the License is distributed on an "AS IS" BASIS,
+- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+- * See the License for the specific language governing permissions and
+- * limitations under the License.
+- */
+-package org.apache.tika.parser.mbox;
+-
+-import static java.lang.String.valueOf;
+-import static java.nio.charset.StandardCharsets.UTF_8;
+-import static java.util.Collections.singleton;
+-
+-import java.io.ByteArrayInputStream;
+-import java.io.File;
+-import java.io.IOException;
+-import java.io.InputStream;
+-import java.util.Set;
+-
+-import com.pff.PSTAttachment;
+-import com.pff.PSTFile;
+-import com.pff.PSTFolder;
+-import com.pff.PSTMessage;
+-import org.apache.tika.exception.TikaException;
+-import org.apache.tika.extractor.EmbeddedDocumentExtractor;
+-import org.apache.tika.extractor.ParsingEmbeddedDocumentExtractor;
+-import org.apache.tika.io.TemporaryResources;
+-import org.apache.tika.io.TikaInputStream;
+-import org.apache.tika.metadata.Metadata;
+-import org.apache.tika.metadata.TikaCoreProperties;
+-import org.apache.tika.mime.MediaType;
+-import org.apache.tika.parser.AbstractParser;
+-import org.apache.tika.parser.ParseContext;
+-import org.apache.tika.sax.XHTMLContentHandler;
+-import org.xml.sax.ContentHandler;
+-import org.xml.sax.SAXException;
+-import org.xml.sax.helpers.AttributesImpl;
+-
+-/**
+- * Parser for MS Outlook PST email storage files
+- */
+-public class OutlookPSTParser extends AbstractParser {
+-
+-    private static final long serialVersionUID = 620998217748364063L;
+-
+-    public static final MediaType MS_OUTLOOK_PST_MIMETYPE = MediaType.application("vnd.ms-outlook-pst");
+-    private static final Set<MediaType> SUPPORTED_TYPES = singleton(MS_OUTLOOK_PST_MIMETYPE);
+-
+-    private static AttributesImpl createAttribute(String attName, String attValue) {
+-        AttributesImpl attributes = new AttributesImpl();
+-        attributes.addAttribute("", attName, attName, "CDATA", attValue);
+-        return attributes;
+-    }
+-
+-    public Set<MediaType> getSupportedTypes(ParseContext context) {
+-        return SUPPORTED_TYPES;
+-    }
+-
+-    public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context)
+-            throws IOException, SAXException, TikaException {
+-
+-        // Use the delegate parser to parse the contained document
+-        EmbeddedDocumentExtractor embeddedExtractor = context.get(EmbeddedDocumentExtractor.class,
+-                new ParsingEmbeddedDocumentExtractor(context));
+-
+-        metadata.set(Metadata.CONTENT_TYPE, MS_OUTLOOK_PST_MIMETYPE.toString());
+-
+-        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
+-        xhtml.startDocument();
+-
+-        TikaInputStream in = TikaInputStream.get(stream);
+-        PSTFile pstFile = null;
+-        try {
+-            pstFile = new PSTFile(in.getFile().getPath());
+-            metadata.set(Metadata.CONTENT_LENGTH, valueOf(pstFile.getFileHandle().length()));
+-            boolean isValid = pstFile.getFileHandle().getFD().valid();
+-            metadata.set("isValid", valueOf(isValid));
+-            if (isValid) {
+-                parseFolder(xhtml, pstFile.getRootFolder(), embeddedExtractor);
+-            }
+-        } catch (Exception e) {
+-            throw new TikaException(e.getMessage(), e);
+-        } finally {
+-            if (pstFile != null && pstFile.getFileHandle() != null) {
+-                try {
+-                    pstFile.getFileHandle().close();
+-                } catch (IOException e) {
+-                    //swallow closing exception
+-                }
+-            }
+-        }
+-
+-        xhtml.endDocument();
+-    }
+-
+-    private void parseFolder(XHTMLContentHandler handler, PSTFolder pstFolder, EmbeddedDocumentExtractor embeddedExtractor)
+-            throws Exception {
+-        if (pstFolder.getContentCount() > 0) {
+-            PSTMessage pstMail = (PSTMessage) pstFolder.getNextChild();
+-            while (pstMail != null) {
+-                AttributesImpl attributes = new AttributesImpl();
+-                attributes.addAttribute("", "class", "class", "CDATA", "embedded");
+-                attributes.addAttribute("", "id", "id", "CDATA", pstMail.getInternetMessageId());
+-                handler.startElement("div", attributes);
+-                handler.element("h1", pstMail.getSubject());
+-
+-                parserMailItem(handler, pstMail, embeddedExtractor);
+-                parseMailAttachments(handler, pstMail, embeddedExtractor);
+-
+-                handler.endElement("div");
+-
+-                pstMail = (PSTMessage) pstFolder.getNextChild();
+-            }
+-        }
+-
+-        if (pstFolder.hasSubfolders()) {
+-            for (PSTFolder pstSubFolder : pstFolder.getSubFolders()) {
+-                handler.startElement("div", createAttribute("class", "email-folder"));
+-                handler.element("h1", pstSubFolder.getDisplayName());
+-                parseFolder(handler, pstSubFolder, embeddedExtractor);
+-                handler.endElement("div");
+-            }
+-        }
+-    }
+-
+-    private void parserMailItem(XHTMLContentHandler handler, PSTMessage pstMail, EmbeddedDocumentExtractor embeddedExtractor) throws SAXException, IOException {
+-        Metadata mailMetadata = new Metadata();
+-        mailMetadata.set(Metadata.RESOURCE_NAME_KEY, pstMail.getInternetMessageId());
+-        mailMetadata.set(Metadata.EMBEDDED_RELATIONSHIP_ID, pstMail.getInternetMessageId());
+-        mailMetadata.set(TikaCoreProperties.IDENTIFIER, pstMail.getInternetMessageId());
+-        mailMetadata.set(TikaCoreProperties.TITLE, pstMail.getSubject());
+-        mailMetadata.set(Metadata.MESSAGE_FROM, pstMail.getSenderName());
+-        mailMetadata.set(TikaCoreProperties.CREATOR, pstMail.getSenderName());
+-        mailMetadata.set(TikaCoreProperties.CREATED, pstMail.getCreationTime());
+-        mailMetadata.set(TikaCoreProperties.MODIFIED, pstMail.getLastModificationTime());
+-        mailMetadata.set(TikaCoreProperties.COMMENTS, pstMail.getComment());
+-        mailMetadata.set("descriptorNodeId", valueOf(pstMail.getDescriptorNodeId()));
+-        mailMetadata.set("senderEmailAddress", pstMail.getSenderEmailAddress());
+-        mailMetadata.set("recipients", pstMail.getRecipientsString());
+-        mailMetadata.set("displayTo", pstMail.getDisplayTo());
+-        mailMetadata.set("displayCC", pstMail.getDisplayCC());
+-        mailMetadata.set("displayBCC", pstMail.getDisplayBCC());
+-        mailMetadata.set("importance", valueOf(pstMail.getImportance()));
+-        mailMetadata.set("priority", valueOf(pstMail.getPriority()));
+-        mailMetadata.set("flagged", valueOf(pstMail.isFlagged()));
+-
+-        byte[] mailContent = pstMail.getBody().getBytes(UTF_8);
+-        embeddedExtractor.parseEmbedded(new ByteArrayInputStream(mailContent), handler, mailMetadata, true);
+-    }
+-
+-    private void parseMailAttachments(XHTMLContentHandler xhtml, PSTMessage email, EmbeddedDocumentExtractor embeddedExtractor)
+-            throws TikaException {
+-        int numberOfAttachments = email.getNumberOfAttachments();
+-        for (int i = 0; i < numberOfAttachments; i++) {
+-            File tempFile = null;
+-            try {
+-                PSTAttachment attach = email.getAttachment(i);
+-
+-                // Get the filename; both long and short filenames can be used for attachments
+-                String filename = attach.getLongFilename();
+-                if (filename.isEmpty()) {
+-                    filename = attach.getFilename();
+-                }
+-
+-                xhtml.element("p", filename);
+-
+-                Metadata attachMeta = new Metadata();
+-                attachMeta.set(Metadata.RESOURCE_NAME_KEY, filename);
+-                attachMeta.set(Metadata.EMBEDDED_RELATIONSHIP_ID, filename);
+-                AttributesImpl attributes = new AttributesImpl();
+-                attributes.addAttribute("", "class", "class", "CDATA", "embedded");
+-                attributes.addAttribute("", "id", "id", "CDATA", filename);
+-                xhtml.startElement("div", attributes);
+-                if (embeddedExtractor.shouldParseEmbedded(attachMeta)) {
+-                    TemporaryResources tmp = new TemporaryResources();
+-                    try {
+-                        TikaInputStream tis = TikaInputStream.get(attach.getFileInputStream(), tmp);
+-                        embeddedExtractor.parseEmbedded(tis, xhtml, attachMeta, true);
+-                    } finally {
+-                        tmp.dispose();
+-                    }
+-                }
+-                xhtml.endElement("div");
+-
+-            } catch (Exception e) {
+-                throw new TikaException("Unable to unpack document stream", e);
+-            } finally {
+-                if (tempFile != null)
+-                    tempFile.delete();
+-            }
+-        }
+-    }
+-
+-}
diff --git a/debian/patches/ignore-javax.ws.rs.core.patch b/debian/patches/ignore-javax.ws.rs.core.patch
new file mode 100644
index 0000000..07ca9ff
--- /dev/null
+++ b/debian/patches/ignore-javax.ws.rs.core.patch
@@ -0,0 +1,127 @@
+From: Markus Koschany <apo at debian.org>
+Date: Tue, 1 Dec 2015 19:12:46 +0100
+Subject: ignore javax.ws.rs.core
+
+---
+ .../tika/parser/journal/GrobidRESTParser.java      | 112 ---------------------
+ 1 file changed, 112 deletions(-)
+ delete mode 100644 tika-parsers/src/main/java/org/apache/tika/parser/journal/GrobidRESTParser.java
+
+diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/journal/GrobidRESTParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/journal/GrobidRESTParser.java
+deleted file mode 100644
+index 05b09fc..0000000
+--- a/tika-parsers/src/main/java/org/apache/tika/parser/journal/GrobidRESTParser.java
++++ /dev/null
+@@ -1,112 +0,0 @@
+-/**
+- * Licensed to the Apache Software Foundation (ASF) under one or more
+- * contributor license agreements.  See the NOTICE file distributed with
+- * this work for additional information regarding copyright ownership.
+- * The ASF licenses this file to You under the Apache License, Version 2.0
+- * (the "License"); you may not use this file except in compliance with
+- * the License.  You may obtain a copy of the License at
+- *
+- *     http://www.apache.org/licenses/LICENSE-2.0
+- *
+- * Unless required by applicable law or agreed to in writing, software
+- * distributed under the License is distributed on an "AS IS" BASIS,
+- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+- * See the License for the specific language governing permissions and
+- * limitations under the License.
+- */
+-
+-package org.apache.tika.parser.journal;
+-
+-import java.io.File;
+-import java.io.FileInputStream;
+-import java.io.FileNotFoundException;
+-import java.io.IOException;
+-import java.util.Properties;
+-
+-import javax.ws.rs.core.MediaType;
+-import javax.ws.rs.core.Response;
+-
+-import org.apache.cxf.jaxrs.client.WebClient;
+-import org.apache.cxf.jaxrs.ext.multipart.Attachment;
+-import org.apache.cxf.jaxrs.ext.multipart.ContentDisposition;
+-import org.apache.cxf.jaxrs.ext.multipart.MultipartBody;
+-import org.apache.tika.metadata.Metadata;
+-import org.apache.tika.parser.ParseContext;
+-import org.xml.sax.ContentHandler;
+-
+-public class GrobidRESTParser {
+-
+-  private static final String GROBID_REST_HOST = "http://localhost:8080";
+-
+-  private static final String GROBID_ISALIVE_PATH = "/grobid"; // isalive
+-                                                               // doesn't work
+-                                                               // nfc why
+-
+-  private static final String GROBID_PROCESSHEADER_PATH = "/processHeaderDocument";
+-
+-  private String restHostUrlStr;
+-
+-  public GrobidRESTParser() {
+-    String restHostUrlStr = null;
+-    try {
+-      restHostUrlStr = readRestUrl();
+-    } catch (IOException e) {
+-      e.printStackTrace();
+-    }
+-
+-    if (restHostUrlStr == null
+-        || (restHostUrlStr != null && restHostUrlStr.equals(""))) {
+-      this.restHostUrlStr = GROBID_REST_HOST;
+-    } else {
+-      this.restHostUrlStr = restHostUrlStr;
+-    }
+-  }
+-
+-  public void parse(String filePath, ContentHandler handler, Metadata metadata,
+-      ParseContext context) throws FileNotFoundException {
+-
+-    File pdfFile = new File(filePath);
+-    ContentDisposition cd = new ContentDisposition(
+-        "form-data; name=\"input\"; filename=\"" + pdfFile.getName() + "\"");
+-    Attachment att = new Attachment("input", new FileInputStream(pdfFile), cd);
+-    MultipartBody body = new MultipartBody(att);
+-
+-    Response response = WebClient
+-        .create(restHostUrlStr + GROBID_PROCESSHEADER_PATH)
+-        .accept(MediaType.APPLICATION_XML).type(MediaType.MULTIPART_FORM_DATA)
+-        .post(body);
+-
+-    try {
+-      String resp = response.readEntity(String.class);
+-      Metadata teiMet = new TEIParser().parse(resp);
+-      for (String key : teiMet.names()) {
+-        metadata.add("grobid:header_" + key, teiMet.get(key));
+-      }
+-    } catch (Exception e) {
+-      e.printStackTrace();
+-    }
+-  }
+-
+-  private static String readRestUrl() throws IOException {
+-    Properties grobidProperties = new Properties();
+-    grobidProperties.load(GrobidRESTParser.class
+-        .getResourceAsStream("GrobidExtractor.properties"));
+-
+-    return grobidProperties.getProperty("grobid.server.url");
+-  }
+-
+-  protected static boolean canRun() {
+-    Response response = null;
+-
+-    try {
+-      response = WebClient.create(readRestUrl() + GROBID_ISALIVE_PATH)
+-          .accept(MediaType.TEXT_HTML).get();
+-      String resp = response.readEntity(String.class);
+-      return resp != null && !resp.equals("") && resp.startsWith("<h4>");
+-    } catch (Exception e) {
+-      e.printStackTrace();
+-      return false;
+-    }
+-  }
+-
+-}
diff --git a/debian/patches/ignore-opennlp.tools.namefind.patch b/debian/patches/ignore-opennlp.tools.namefind.patch
new file mode 100644
index 0000000..912fa44
--- /dev/null
+++ b/debian/patches/ignore-opennlp.tools.namefind.patch
@@ -0,0 +1,142 @@
+From: Markus Koschany <apo at debian.org>
+Date: Tue, 1 Dec 2015 19:12:18 +0100
+Subject: ignore opennlp.tools.namefind
+
+---
+ .../tika/parser/geo/topic/NameEntityExtractor.java | 127 ---------------------
+ 1 file changed, 127 deletions(-)
+ delete mode 100644 tika-parsers/src/main/java/org/apache/tika/parser/geo/topic/NameEntityExtractor.java
+
+diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/geo/topic/NameEntityExtractor.java b/tika-parsers/src/main/java/org/apache/tika/parser/geo/topic/NameEntityExtractor.java
+deleted file mode 100644
+index e7435d1..0000000
+--- a/tika-parsers/src/main/java/org/apache/tika/parser/geo/topic/NameEntityExtractor.java
++++ /dev/null
+@@ -1,127 +0,0 @@
+-/*
+- * Licensed to the Apache Software Foundation (ASF) under one or more
+- * contributor license agreements.  See the NOTICE file distributed with
+- * this work for additional information regarding copyright ownership.
+- * The ASF licenses this file to You under the Apache License, Version 2.0
+- * (the "License"); you may not use this file except in compliance with
+- * the License.  You may obtain a copy of the License at
+- *
+- *     http://www.apache.org/licenses/LICENSE-2.0
+- *
+- * Unless required by applicable law or agreed to in writing, software
+- * distributed under the License is distributed on an "AS IS" BASIS,
+- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+- * See the License for the specific language governing permissions and
+- * limitations under the License.
+- */
+-
+-package org.apache.tika.parser.geo.topic;
+-
+-import java.io.FileInputStream;
+-import java.io.IOException;
+-import java.io.InputStream;
+-import java.util.ArrayList;
+-import java.util.Arrays;
+-import java.util.Collections;
+-import java.util.Comparator;
+-import java.util.HashMap;
+-import java.util.List;
+-import java.util.Map;
+-
+-import opennlp.tools.namefind.NameFinderME;
+-import opennlp.tools.namefind.TokenNameFinderModel;
+-import opennlp.tools.util.InvalidFormatException;
+-import opennlp.tools.util.Span;
+-
+-import org.apache.commons.io.IOUtils;
+-
+-import static java.nio.charset.StandardCharsets.UTF_8;
+-
+-public class NameEntityExtractor {
+-	private String nerModelPath = null;
+-	ArrayList<String> locationNameEntities;
+-	String bestNameEntity;
+-	private HashMap<String, Integer> tf;
+-
+-	public NameEntityExtractor(String nerModelpath) {
+-		this.locationNameEntities = new ArrayList<String>();
+-		this.bestNameEntity = null;
+-		this.nerModelPath = nerModelpath;
+-		tf = new HashMap<String, Integer>();
+-
+-	}
+-
+-	/*
+-	 * Use OpenNLP to extract location names that's appearing in the steam.
+-	 * OpenNLP's default Name Finder accuracy is not very good, please refer to
+-	 * its documentation.
+-	 * 
+-	 * @param stream stream that passed from this.parse()
+-	 */
+-
+-	public void getAllNameEntitiesfromInput(InputStream stream)
+-			throws InvalidFormatException, IOException {
+-
+-		InputStream modelIn = new FileInputStream(nerModelPath);
+-		TokenNameFinderModel model = new TokenNameFinderModel(modelIn);
+-		NameFinderME nameFinder = new NameFinderME(model);
+-		String[] in = IOUtils.toString(stream, UTF_8).split(" ");
+-
+-		Span nameE[] = nameFinder.find(in);
+-
+-		String spanNames = Arrays.toString(Span.spansToStrings(nameE, in));
+-		spanNames = spanNames.substring(1, spanNames.length() - 1);
+-		modelIn.close();
+-		String[] tmp = spanNames.split(",");
+-
+-		for (String name : tmp) {
+-			name = name.trim();
+-			this.locationNameEntities.add(name);
+-		}
+-
+-	}
+-
+-	/*
+-	 * Get the best location entity extracted from the input stream. Simply
+-	 * return the most frequent entity, If there several highest frequent
+-	 * entity, pick one randomly. May not be the optimal solution, but works.
+-	 * 
+-	 * @param locationNameEntities OpenNLP name finder's results, stored in
+-	 * ArrayList
+-	 */
+-	public void getBestNameEntity() {
+-		if (this.locationNameEntities.size() == 0)
+-			return;
+-
+-		for (int i = 0; i < this.locationNameEntities.size(); ++i) {
+-			if (tf.containsKey(this.locationNameEntities.get(i)))
+-				tf.put(this.locationNameEntities.get(i),
+-						tf.get(this.locationNameEntities.get(i)) + 1);
+-			else
+-				tf.put(this.locationNameEntities.get(i), 1);
+-		}
+-		int max = 0;
+-		List<Map.Entry<String, Integer>> list = new ArrayList<Map.Entry<String, Integer>>(
+-				tf.entrySet());
+-		Collections.shuffle(list);
+-		Collections.sort(list, new Comparator<Map.Entry<String, Integer>>() {
+-			public int compare(Map.Entry<String, Integer> o1,
+-					Map.Entry<String, Integer> o2) {
+-				return o2.getValue().compareTo(o1.getValue()); // descending
+-				// order
+-
+-			}
+-		});
+-
+-		this.locationNameEntities.clear();// update so that they are in
+-											// descending order
+-		for (Map.Entry<String, Integer> entry : list) {
+-			this.locationNameEntities.add(entry.getKey());
+-			if (entry.getValue() > max) {
+-				max = entry.getValue();
+-				this.bestNameEntity = entry.getKey();
+-			}
+-		}
+-	}
+-
+-}
diff --git a/debian/patches/ignore-org.apache.ctakes.patch b/debian/patches/ignore-org.apache.ctakes.patch
new file mode 100644
index 0000000..d362d8d
--- /dev/null
+++ b/debian/patches/ignore-org.apache.ctakes.patch
@@ -0,0 +1,1014 @@
+From: Markus Koschany <apo at debian.org>
+Date: Tue, 1 Dec 2015 19:06:44 +0100
+Subject: ignore org.apache.ctakes
+
+---
+ .../parser/ctakes/CTAKESAnnotationProperty.java    |  46 ---
+ .../apache/tika/parser/ctakes/CTAKESConfig.java    | 336 ---------------------
+ .../tika/parser/ctakes/CTAKESContentHandler.java   | 176 -----------
+ .../apache/tika/parser/ctakes/CTAKESParser.java    |  92 ------
+ .../tika/parser/ctakes/CTAKESSerializer.java       |  42 ---
+ .../org/apache/tika/parser/ctakes/CTAKESUtils.java | 265 ----------------
+ 6 files changed, 957 deletions(-)
+ delete mode 100644 tika-parsers/src/main/java/org/apache/tika/parser/ctakes/CTAKESAnnotationProperty.java
+ delete mode 100644 tika-parsers/src/main/java/org/apache/tika/parser/ctakes/CTAKESConfig.java
+ delete mode 100644 tika-parsers/src/main/java/org/apache/tika/parser/ctakes/CTAKESContentHandler.java
+ delete mode 100644 tika-parsers/src/main/java/org/apache/tika/parser/ctakes/CTAKESParser.java
+ delete mode 100644 tika-parsers/src/main/java/org/apache/tika/parser/ctakes/CTAKESSerializer.java
+ delete mode 100644 tika-parsers/src/main/java/org/apache/tika/parser/ctakes/CTAKESUtils.java
+
+diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/ctakes/CTAKESAnnotationProperty.java b/tika-parsers/src/main/java/org/apache/tika/parser/ctakes/CTAKESAnnotationProperty.java
+deleted file mode 100644
+index e6d261d..0000000
+--- a/tika-parsers/src/main/java/org/apache/tika/parser/ctakes/CTAKESAnnotationProperty.java
++++ /dev/null
+@@ -1,46 +0,0 @@
+-/*
+- * Licensed to the Apache Software Foundation (ASF) under one or more
+- * contributor license agreements.  See the NOTICE file distributed with
+- * this work for additional information regarding copyright ownership.
+- * The ASF licenses this file to You under the Apache License, Version 2.0
+- * (the "License"); you may not use this file except in compliance with
+- * the License.  You may obtain a copy of the License at
+- *
+- *     http://www.apache.org/licenses/LICENSE-2.0
+- *
+- * Unless required by applicable law or agreed to in writing, software
+- * distributed under the License is distributed on an "AS IS" BASIS,
+- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+- * See the License for the specific language governing permissions and
+- * limitations under the License.
+- */
+-package org.apache.tika.parser.ctakes;
+-
+-import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+-
+-/**
+- * This enumeration includes the properties that an {@see IdentifiedAnnotation} object can provide.
+- *
+- */
+-public enum CTAKESAnnotationProperty {
+-    BEGIN("start"),
+-    END("end"),
+-    CONDITIONAL("conditional"),
+-    CONFIDENCE("confidence"),
+-    DISCOVERY_TECNIQUE("discoveryTechnique"),
+-    GENERIC("generic"),
+-    HISTORY_OF("historyOf"),
+-    ID("id"),
+-    ONTOLOGY_CONCEPT_ARR("ontologyConceptArr"),
+-    POLARITY("polarity");
+-
+-    private String name;
+-
+-    CTAKESAnnotationProperty(String name) {
+-        this.name = name;
+-    }
+-
+-    public String getName() {
+-        return name;
+-    }
+-}
+\ No newline at end of file
+diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/ctakes/CTAKESConfig.java b/tika-parsers/src/main/java/org/apache/tika/parser/ctakes/CTAKESConfig.java
+deleted file mode 100644
+index 67ba993..0000000
+--- a/tika-parsers/src/main/java/org/apache/tika/parser/ctakes/CTAKESConfig.java
++++ /dev/null
+@@ -1,336 +0,0 @@
+-/*
+- * Licensed to the Apache Software Foundation (ASF) under one or more
+- * contributor license agreements.  See the NOTICE file distributed with
+- * this work for additional information regarding copyright ownership.
+- * The ASF licenses this file to You under the Apache License, Version 2.0
+- * (the "License"); you may not use this file except in compliance with
+- * the License.  You may obtain a copy of the License at
+- *
+- *     http://www.apache.org/licenses/LICENSE-2.0
+- *
+- * Unless required by applicable law or agreed to in writing, software
+- * distributed under the License is distributed on an "AS IS" BASIS,
+- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+- * See the License for the specific language governing permissions and
+- * limitations under the License.
+- */
+-package org.apache.tika.parser.ctakes;
+-
+-import java.io.IOException;
+-import java.io.InputStream;
+-import java.io.OutputStream;
+-import java.io.Serializable;
+-import java.util.Properties;
+-
+-import static org.apache.commons.io.output.NullOutputStream.NULL_OUTPUT_STREAM;
+-
+-/**
+- * Configuration for {@see CTAKESContentHandler}.
+- * 
+- * This class allows to enable cTAKES and set its parameters.
+- */
+-public class CTAKESConfig implements Serializable {
+-    /**
+-     * Serial version UID
+-     */
+-    private static final long serialVersionUID = -1599741171775528923L;
+-
+-    // Path to XML descriptor for AnalysisEngine
+-    private String aeDescriptorPath = "/ctakes-core/desc/analysis_engine/SentencesAndTokensAggregate.xml";
+-
+-    // UMLS username
+-    private String UMLSUser = "";
+-
+-    // UMLS password
+-    private String UMLSPass = "";
+-
+-    // Enables formatted output
+-    private boolean prettyPrint = true; 
+-
+-    // Type of cTAKES (UIMA) serializer
+-    private CTAKESSerializer serializerType = CTAKESSerializer.XMI;
+-
+-    // OutputStream object used for CAS serialization
+-    private OutputStream stream = NULL_OUTPUT_STREAM;
+-
+-    // Enables CAS serialization
+-    private boolean serialize = false;
+-
+-    // Enables text analysis using cTAKES
+-    private boolean text = true;
+-
+-    // List of metadata to analyze using cTAKES
+-    private String[] metadata = null;
+-
+-    // List of annotation properties to add to metadata in addition to text covered by an annotation
+-    private CTAKESAnnotationProperty[] annotationProps = null;
+-
+-    // Character used to separate the annotation properties into metadata
+-    private char separatorChar = ':';
+-
+-    /**
+-     * Default constructor.
+-     */
+-    public CTAKESConfig() {
+-        init(this.getClass().getResourceAsStream("CTAKESConfig.properties"));
+-    }
+-
+-    /**
+-     * Loads properties from InputStream and then tries to close InputStream.
+-     * @param stream {@see InputStream} object used to read properties.
+-     */
+-    public CTAKESConfig(InputStream stream) {
+-        init(stream);
+-    }
+-
+-    private void init(InputStream stream) {
+-        if (stream == null) {
+-            return;
+-        }
+-        Properties props = new Properties();
+-
+-        try {
+-            props.load(stream);
+-        } catch (IOException e) {
+-            // TODO warning
+-        } finally {
+-            if (stream != null) {
+-                try {
+-                    stream.close();
+-                } catch (IOException ioe) {
+-                    // TODO warning
+-                }
+-            }
+-        }
+-
+-        setAeDescriptorPath(props.getProperty("aeDescriptorPath", getAeDescriptorPath()));
+-        setUMLSUser(props.getProperty("UMLSUser", getUMLSUser()));
+-        setUMLSPass(props.getProperty("UMLSPass", getUMLSPass()));
+-        setText(Boolean.valueOf(props.getProperty("text", Boolean.toString(isText()))));
+-        setMetadata(props.getProperty("metadata", getMetadataAsString()).split(","));
+-        setAnnotationProps(props.getProperty("annotationProps", getAnnotationPropsAsString()).split(","));
+-        setSeparatorChar(props.getProperty("separatorChar", Character.toString(getSeparatorChar())).charAt(0));
+-    }
+-
+-    /**
+-     * Returns the path to XML descriptor for AnalysisEngine.
+-     * @return the path to XML descriptor for AnalysisEngine.
+-     */
+-    public String getAeDescriptorPath() {
+-        return aeDescriptorPath;
+-    }
+-
+-    /**
+-     * Returns the UMLS username.
+-     * @return the UMLS username.
+-     */
+-    public String getUMLSUser() {
+-        return UMLSUser;
+-    }
+-
+-    /**
+-     * Returns the UMLS password.
+-     * @return the UMLS password.
+-     */
+-    public String getUMLSPass() {
+-        return UMLSPass;
+-    }
+-
+-    /**
+-     * Returns {@code true} if formatted output is enabled, {@code false} otherwise.
+-     * @return {@code true} if formatted output is enabled, {@code false} otherwise.
+-     */
+-    public boolean isPrettyPrint() {
+-        return prettyPrint;
+-    }
+-
+-    /**
+-     * Returns the type of cTAKES (UIMA) serializer used to write the CAS.
+-     * @return the type of cTAKES serializer.
+-     */
+-    public CTAKESSerializer getSerializerType() {
+-        return serializerType;
+-    }
+-
+-    /**
+-     * Returns an {@see OutputStream} object used write the CAS.
+-     * @return {@see OutputStream} object used write the CAS.
+-     */
+-    public OutputStream getOutputStream() {
+-        return stream;
+-    }
+-
+-    /**
+-     * Returns {@code true} if CAS serialization is enabled, {@code false} otherwise.
+-     * @return {@code true} if CAS serialization output is enabled, {@code false} otherwise.
+-     */
+-    public boolean isSerialize() {
+-        return serialize;
+-    }
+-
+-    /**
+-     * Returns {@code true} if content text analysis is enabled {@code false} otherwise.
+-     * @return {@code true} if content text analysis is enabled {@code false} otherwise.
+-     */
+-    public boolean isText() {
+-        return text;
+-    }
+-
+-    /**
+-     * Returns an array of metadata whose values will be analyzed using cTAKES.
+-     * @return an array of metadata whose values will be analyzed using cTAKES.
+-     */
+-    public String[] getMetadata() {
+-        return metadata;
+-    }
+-
+-    /**
+-     * Returns a string containing a comma-separated list of metadata whose values will be analyzed using cTAKES.
+-     * @return a string containing a comma-separated list of metadata whose values will be analyzed using cTAKES.
+-     */
+-    public String getMetadataAsString() {
+-        if (metadata == null) {
+-            return "";
+-        }
+-        StringBuilder sb = new StringBuilder();
+-        for (int i = 0; i < metadata.length; i++) {
+-            sb.append(metadata[i]);
+-            if (i < metadata.length-1) {
+-                sb.append(",");
+-            }
+-        }
+-        return sb.toString();
+-    }
+-
+-    /**
+-     * Returns an array of {@see CTAKESAnnotationProperty}'s that will be included into cTAKES metadata.
+-     * @return an array of {@see CTAKESAnnotationProperty}'s that will be included into cTAKES metadata.
+-     */
+-    public CTAKESAnnotationProperty[] getAnnotationProps() {
+-        return annotationProps;
+-    }
+-
+-    /**
+-     * Returns a string containing a comma-separated list of {@see CTAKESAnnotationProperty} names that will be included into cTAKES metadata.
+-     * @return
+-     */
+-    public String getAnnotationPropsAsString() {
+-        StringBuilder sb = new StringBuilder();
+-        sb.append("coveredText");
+-        if (annotationProps != null) {
+-            for (CTAKESAnnotationProperty property : annotationProps) {
+-                sb.append(separatorChar);
+-                sb.append(property.getName());
+-            }
+-        }
+-        return sb.toString();
+-    }
+-
+-    /**
+-     * Returns the separator character used for annotation properties.
+-     * @return the separator character used for annotation properties.
+-     */
+-    public char getSeparatorChar() {
+-        return separatorChar;
+-    }
+-
+-    /**
+-     * Sets the path to XML descriptor for AnalysisEngine.
+-     * @param aeDescriptorPath the path to XML descriptor for AnalysisEngine.
+-     */
+-    public void setAeDescriptorPath(String aeDescriptorPath) {
+-        this.aeDescriptorPath = aeDescriptorPath;
+-    }
+-
+-    /**
+-     * Sets the UMLS username.
+-     * @param uMLSUser the UMLS username.
+-     */
+-    public void setUMLSUser(String uMLSUser) {
+-        this.UMLSUser = uMLSUser;
+-    }
+-
+-    /**
+-     * Sets the UMLS password.
+-     * @param uMLSPass the UMLS password.
+-     */
+-    public void setUMLSPass(String uMLSPass) {
+-        this.UMLSPass = uMLSPass;
+-    }
+-
+-    /**
+-     * Enables the formatted output for serializer.
+-     * @param prettyPrint {@true} to enable formatted output, {@code false} otherwise.
+-     */
+-    public void setPrettyPrint(boolean prettyPrint) {
+-        this.prettyPrint = prettyPrint;
+-    }
+-
+-    /**
+-     * Sets the type of cTAKES (UIMA) serializer used to write CAS. 
+-     * @param serializerType the type of cTAKES serializer.
+-     */
+-    public void setSerializerType(CTAKESSerializer serializerType) {
+-        this.serializerType = serializerType;
+-    }
+-
+-    /**
+-     * Sets the {@see OutputStream} object used to write the CAS.
+-     * @param stream the {@see OutputStream} object used to write the CAS.
+-     */
+-    public void setOutputStream(OutputStream stream) {
+-        this.stream = stream;
+-    }
+-
+-    /**
+-     * Enables CAS serialization.
+-     * @param serialize {@true} to enable CAS serialization, {@code false} otherwise.
+-     */
+-    public void setSerialize(boolean serialize) {
+-        this.serialize = serialize;
+-    }
+-
+-    /**
+-     * Enables content text analysis using cTAKES.
+-     * @param text {@true} to enable content text analysis, {@code false} otherwise.
+-     */
+-    public void setText(boolean text) {
+-        this.text = text;
+-    }
+-
+-    /**
+-     * Sets the metadata whose values will be analyzed using cTAKES.
+-     * @param metadata the metadata whose values will be analyzed using cTAKES.
+-     */
+-    public void setMetadata(String[] metadata) {
+-        this.metadata = metadata;
+-    }
+-
+-    /**
+-     * Sets the {@see CTAKESAnnotationProperty}'s that will be included into cTAKES metadata.
+-     * @param annotationProps the {@see CTAKESAnnotationProperty}'s that will be included into cTAKES metadata.
+-     */
+-    public void setAnnotationProps(CTAKESAnnotationProperty[] annotationProps) {
+-        this.annotationProps = annotationProps;
+-    }
+-
+-    /**
+-     * ets the {@see CTAKESAnnotationProperty}'s that will be included into cTAKES metadata.
+-     * @param annotationProps the {@see CTAKESAnnotationProperty}'s that will be included into cTAKES metadata.
+-     */
+-    public void setAnnotationProps(String[] annotationProps) {
+-        CTAKESAnnotationProperty[] properties = new CTAKESAnnotationProperty[annotationProps.length];
+-        for (int i = 0; i < annotationProps.length; i++) {
+-            properties[i] = CTAKESAnnotationProperty.valueOf(annotationProps[i]);
+-        }
+-        setAnnotationProps(properties);
+-    }
+-
+-    /**
+-     * Sets the separator character used for annotation properties.
+-     * @param separatorChar the separator character used for annotation properties.
+-     */
+-    public void setSeparatorChar(char separatorChar) {
+-        this.separatorChar = separatorChar;
+-    }
+-}
+\ No newline at end of file
+diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/ctakes/CTAKESContentHandler.java b/tika-parsers/src/main/java/org/apache/tika/parser/ctakes/CTAKESContentHandler.java
+deleted file mode 100644
+index 38326e3..0000000
+--- a/tika-parsers/src/main/java/org/apache/tika/parser/ctakes/CTAKESContentHandler.java
++++ /dev/null
+@@ -1,176 +0,0 @@
+-/*
+- * Licensed to the Apache Software Foundation (ASF) under one or more
+- * contributor license agreements.  See the NOTICE file distributed with
+- * this work for additional information regarding copyright ownership.
+- * The ASF licenses this file to You under the Apache License, Version 2.0
+- * (the "License"); you may not use this file except in compliance with
+- * the License.  You may obtain a copy of the License at
+- *
+- *     http://www.apache.org/licenses/LICENSE-2.0
+- *
+- * Unless required by applicable law or agreed to in writing, software
+- * distributed under the License is distributed on an "AS IS" BASIS,
+- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+- * See the License for the specific language governing permissions and
+- * limitations under the License.
+- */
+-package org.apache.tika.parser.ctakes;
+-
+-import java.util.Collection;
+-import java.util.Iterator;
+-
+-import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+-import org.apache.tika.metadata.Metadata;
+-import org.apache.tika.sax.ContentHandlerDecorator;
+-import org.apache.uima.analysis_engine.AnalysisEngine;
+-import org.apache.uima.fit.util.JCasUtil;
+-import org.apache.uima.jcas.JCas;
+-import org.xml.sax.ContentHandler;
+-import org.xml.sax.SAXException;
+-import org.xml.sax.helpers.DefaultHandler;
+-
+-/**
+- * Class used to extract biomedical information while parsing.
+- *
+- * <p>
+- * This class relies on <a href="http://ctakes.apache.org/">Apache cTAKES</a>
+- * that is a natural language processing system for extraction of information
+- * from electronic medical record clinical free-text.
+- * </p>
+- */
+-public class CTAKESContentHandler extends ContentHandlerDecorator {
+-	// Prefix used for metadata including cTAKES annotations
+-	public static String CTAKES_META_PREFIX = "ctakes:";
+-
+-	// Configuration object for CTAKESContentHandler
+-	private CTAKESConfig config = null;
+-
+-	// StringBuilder object used to build the clinical free-text for cTAKES
+-	private StringBuilder sb = null;
+-
+-	// Metadata object used for cTAKES annotations
+-	private Metadata metadata = null;
+-
+-	// UIMA Analysis Engine
+-	private AnalysisEngine ae = null;
+-
+-	// JCas object for working with the CAS (Common Analysis System)
+-	private JCas jcas = null;
+-
+-	/**
+-	 * Creates a new {@see CTAKESContentHandler} for the given {@see
+-	 * ContentHandler} and Metadata objects.
+-	 * 
+-	 * @param handler
+-	 *            the {@see ContentHandler} object to be decorated.
+-	 * @param metadata
+-	 *            the {@see Metadata} object that will be populated using
+-	 *            biomedical information extracted by cTAKES.
+-	 * @param config
+-	 *            the {@see CTAKESConfig} object used to configure the handler.
+-	 */
+-	public CTAKESContentHandler(ContentHandler handler, Metadata metadata,
+-			CTAKESConfig config) {
+-		super(handler);
+-		this.metadata = metadata;
+-		this.config = config;
+-		this.sb = new StringBuilder();
+-	}
+-
+-	/**
+-	 * Creates a new {@see CTAKESContentHandler} for the given {@see
+-	 * ContentHandler} and Metadata objects.
+-	 * 
+-	 * @param handler
+-	 *            the {@see ContentHandler} object to be decorated.
+-	 * @param metadata
+-	 *            the {@see Metadata} object that will be populated using
+-	 *            biomedical information extracted by cTAKES.
+-	 */
+-	public CTAKESContentHandler(ContentHandler handler, Metadata metadata) {
+-		this(handler, metadata, new CTAKESConfig());
+-	}
+-
+-	/**
+-	 * Default constructor.
+-	 */
+-	public CTAKESContentHandler() {
+-		this(new DefaultHandler(), new Metadata());
+-	}
+-
+-	@Override
+-	public void characters(char[] ch, int start, int length)
+-			throws SAXException {
+-		if (config.isText()) {
+-			sb.append(ch, start, length);
+-		}
+-		super.characters(ch, start, length);
+-	}
+-
+-	@Override
+-    public void endDocument() throws SAXException {
+-        try {
+-            // create an Analysis Engine
+-        	if (ae == null) {
+-        		ae = CTAKESUtils.getAnalysisEngine(config.getAeDescriptorPath(), config.getUMLSUser(), config.getUMLSPass());
+-        	}
+-
+-            // create a JCas, given an AE
+-        	if (jcas == null) {
+-        		jcas = CTAKESUtils.getJCas(ae);
+-        	}
+-
+-            // get metadata to process
+-            StringBuilder metaText = new StringBuilder();
+-            String[] metadataToProcess = config.getMetadata();
+-            if (metadataToProcess != null) {
+-                for (String name : config.getMetadata()) {
+-                    for (String value : metadata.getValues(name)) {
+-                        metaText.append(value);
+-                        metaText.append(System.lineSeparator());
+-                    }
+-                }
+-            }
+-
+-            // analyze text
+-            jcas.setDocumentText(metaText.toString() + sb.toString());
+-            ae.process(jcas);
+-
+-            // add annotations to metadata
+-            metadata.add(CTAKES_META_PREFIX + "schema", config.getAnnotationPropsAsString());
+-            CTAKESAnnotationProperty[] annotationPros = config.getAnnotationProps();
+-            Collection<IdentifiedAnnotation> collection = JCasUtil.select(jcas, IdentifiedAnnotation.class);
+-            Iterator<IdentifiedAnnotation> iterator = collection.iterator();
+-            while (iterator.hasNext()) {
+-                IdentifiedAnnotation annotation = iterator.next();
+-                StringBuilder annotationBuilder = new StringBuilder();
+-                annotationBuilder.append(annotation.getCoveredText());
+-                if (annotationPros != null) {
+-                    for (CTAKESAnnotationProperty property : annotationPros) {
+-                        annotationBuilder.append(config.getSeparatorChar());
+-                        annotationBuilder.append(CTAKESUtils.getAnnotationProperty(annotation, property));
+-                    }
+-                }
+-                metadata.add(CTAKES_META_PREFIX + annotation.getType().getShortName(), annotationBuilder.toString());
+-            }
+-
+-            if (config.isSerialize()) {
+-                // serialize data
+-                CTAKESUtils.serialize(jcas, config.getSerializerType(), config.isPrettyPrint(), config.getOutputStream());
+-            }
+-        } catch (Exception e) {
+-            throw new SAXException(e.getMessage());
+-        } finally {
+-            CTAKESUtils.resetCAS(jcas);
+-        }
+-    }
+-
+-	/**
+-	 * Returns metadata that includes cTAKES annotations.
+-	 * 
+-	 * @return {@Metadata} object that includes cTAKES annotations.
+-	 */
+-	public Metadata getMetadata() {
+-		return metadata;
+-	}
+-}
+diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/ctakes/CTAKESParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/ctakes/CTAKESParser.java
+deleted file mode 100644
+index acd1965..0000000
+--- a/tika-parsers/src/main/java/org/apache/tika/parser/ctakes/CTAKESParser.java
++++ /dev/null
+@@ -1,92 +0,0 @@
+-/*
+- * Licensed to the Apache Software Foundation (ASF) under one or more
+- * contributor license agreements.  See the NOTICE file distributed with
+- * this work for additional information regarding copyright ownership.
+- * The ASF licenses this file to You under the Apache License, Version 2.0
+- * (the "License"); you may not use this file except in compliance with
+- * the License.  You may obtain a copy of the License at
+- *
+- *     http://www.apache.org/licenses/LICENSE-2.0
+- *
+- * Unless required by applicable law or agreed to in writing, software
+- * distributed under the License is distributed on an "AS IS" BASIS,
+- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+- * See the License for the specific language governing permissions and
+- * limitations under the License.
+- */
+-package org.apache.tika.parser.ctakes;
+-
+-import java.io.IOException;
+-import java.io.InputStream;
+-
+-import org.apache.tika.config.TikaConfig;
+-import org.apache.tika.exception.TikaException;
+-import org.apache.tika.metadata.Metadata;
+-import org.apache.tika.parser.AutoDetectParser;
+-import org.apache.tika.parser.ParseContext;
+-import org.apache.tika.parser.Parser;
+-import org.apache.tika.parser.ParserDecorator;
+-import org.xml.sax.ContentHandler;
+-import org.xml.sax.SAXException;
+-
+-/**
+- * CTAKESParser decorates a {@see Parser} and leverages on 
+- * {@see CTAKESContentHandler} to extract biomedical information from 
+- * clinical text using Apache cTAKES.
+- * <p>It is normally called by supplying an instance to 
+- *  {@link AutoDetectParser}, such as:
+- * <code>AutoDetectParser parser = new AutoDetectParser(new CTAKESParser());</code>
+- * <p>It can also be used by giving a Tika Config file similar to:
+- * <code>
+- *  <properties>
+- *    <parsers>
+- *      <parser class="org.apache.tika.parser.ctakes.CTAKESParser">
+- *        <parser class="org.apache.tika.parser.DefaultParser"/>
+- *      </parser>
+- *    </parsers>
+- *  </properties>
+- * </code>
+- * <p>Because this is a Parser Decorator, and not a normal Parser in
+- *  it's own right, it isn't normally selected via the Parser Service Loader.
+- */
+-public class CTAKESParser extends ParserDecorator {
+-    /**
+-     * Serial version UID
+-     */
+-    private static final long serialVersionUID = -2313482748027097961L;
+-
+-    /**
+-     * Wraps the default Parser
+-     */
+-    public CTAKESParser() {
+-        this(TikaConfig.getDefaultConfig());
+-    }
+-    /**
+-     * Wraps the default Parser for this Config
+-     */
+-    public CTAKESParser(TikaConfig config) {
+-        this(config.getParser());
+-    }
+-    /**
+-     * Wraps the specified Parser
+-     */
+-    public CTAKESParser(Parser parser) {
+-        super(parser);
+-    }
+-
+-    @Override
+-    public void parse(InputStream stream, ContentHandler handler,
+-            Metadata metadata, ParseContext context) throws IOException,
+-            SAXException, TikaException {
+-        CTAKESConfig config = context.get(CTAKESConfig.class,
+-                new CTAKESConfig());
+-        CTAKESContentHandler ctakesHandler = new CTAKESContentHandler(handler,
+-                metadata, config);
+-        super.parse(stream, ctakesHandler, metadata, context);
+-    }
+-    
+-    //@Override
+-    public String getDecorationName() {
+-        return "CTakes";
+-    }            
+-}
+diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/ctakes/CTAKESSerializer.java b/tika-parsers/src/main/java/org/apache/tika/parser/ctakes/CTAKESSerializer.java
+deleted file mode 100644
+index 4d4e4e2..0000000
+--- a/tika-parsers/src/main/java/org/apache/tika/parser/ctakes/CTAKESSerializer.java
++++ /dev/null
+@@ -1,42 +0,0 @@
+-/*
+- * Licensed to the Apache Software Foundation (ASF) under one or more
+- * contributor license agreements.  See the NOTICE file distributed with
+- * this work for additional information regarding copyright ownership.
+- * The ASF licenses this file to You under the Apache License, Version 2.0
+- * (the "License"); you may not use this file except in compliance with
+- * the License.  You may obtain a copy of the License at
+- *
+- *     http://www.apache.org/licenses/LICENSE-2.0
+- *
+- * Unless required by applicable law or agreed to in writing, software
+- * distributed under the License is distributed on an "AS IS" BASIS,
+- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+- * See the License for the specific language governing permissions and
+- * limitations under the License.
+- */
+-package org.apache.tika.parser.ctakes;
+-
+-import org.apache.uima.cas.impl.XCASSerializer;
+-import org.apache.uima.cas.impl.XmiCasSerializer;
+-import org.apache.uima.util.XmlCasSerializer;
+-
+-/**
+- * Enumeration for types of cTAKES (UIMA) CAS serializer supported by cTAKES.
+- * 
+- * A CAS serializer writes a CAS in the given format.
+- */
+-public enum CTAKESSerializer {
+-    XCAS(XCASSerializer.class.getName()),
+-    XMI(XmiCasSerializer.class.getName()),
+-    XML(XmlCasSerializer.class.getName());
+-
+-    private final String className;
+-
+-    private CTAKESSerializer(String className) {
+-        this.className = className;
+-    }
+-
+-    public String getClassName() {
+-        return className;
+-    }
+-}
+diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/ctakes/CTAKESUtils.java b/tika-parsers/src/main/java/org/apache/tika/parser/ctakes/CTAKESUtils.java
+deleted file mode 100644
+index 23f281a..0000000
+--- a/tika-parsers/src/main/java/org/apache/tika/parser/ctakes/CTAKESUtils.java
++++ /dev/null
+@@ -1,265 +0,0 @@
+-/*
+- * Licensed to the Apache Software Foundation (ASF) under one or more
+- * contributor license agreements.  See the NOTICE file distributed with
+- * this work for additional information regarding copyright ownership.
+- * The ASF licenses this file to You under the Apache License, Version 2.0
+- * (the "License"); you may not use this file except in compliance with
+- * the License.  You may obtain a copy of the License at
+- *
+- *     http://www.apache.org/licenses/LICENSE-2.0
+- *
+- * Unless required by applicable law or agreed to in writing, software
+- * distributed under the License is distributed on an "AS IS" BASIS,
+- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+- * See the License for the specific language governing permissions and
+- * limitations under the License.
+- */
+-package org.apache.tika.parser.ctakes;
+-
+-import java.io.IOException;
+-import java.io.OutputStream;
+-import java.net.URISyntaxException;
+-
+-import org.apache.ctakes.typesystem.type.refsem.UmlsConcept;
+-import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+-import org.apache.uima.UIMAFramework;
+-import org.apache.uima.analysis_engine.AnalysisEngine;
+-import org.apache.uima.cas.impl.XCASSerializer;
+-import org.apache.uima.cas.impl.XmiCasSerializer;
+-import org.apache.uima.cas.impl.XmiSerializationSharedData;
+-import org.apache.uima.jcas.JCas;
+-import org.apache.uima.jcas.cas.FSArray;
+-import org.apache.uima.resource.ResourceInitializationException;
+-import org.apache.uima.resource.ResourceSpecifier;
+-import org.apache.uima.util.InvalidXMLException;
+-import org.apache.uima.util.XMLInputSource;
+-import org.apache.uima.util.XmlCasSerializer;
+-import org.xml.sax.SAXException;
+-
+-/**
+- * This class provides methods to extract biomedical information from plain text
+- * using {@see CTAKESContentHandler} that relies on Apache cTAKES.
+- * 
+- * <p>
+- * Apache cTAKES is built on top of <a href="https://uima.apache.org/">Apache
+- * UIMA</a> framework and <a href="https://opennlp.apache.org/">OpenNLP</a>
+- * toolkit.
+- * </p>
+- */
+-public class CTAKESUtils {
+-	// UMLS username property
+-	private final static String CTAKES_UMLS_USER = "ctakes.umlsuser";
+-
+-	// UMLS password property
+-	private final static String CTAKES_UMLS_PASS = "ctakes.umlspw";
+-
+-	/**
+-	 * Returns a new UIMA Analysis Engine (AE). This method ensures that only
+-	 * one instance of an AE is created.
+-	 * 
+-	 * <p>
+-	 * An Analysis Engine is a component responsible for analyzing unstructured
+-	 * information, discovering and representing semantic content. Unstructured
+-	 * information includes, but is not restricted to, text documents.
+-	 * </p>
+-	 * 
+-	 * @param aeDescriptor
+-	 *            pathname for XML file including an AnalysisEngineDescription
+-	 *            that contains all of the information needed to instantiate and
+-	 *            use an AnalysisEngine.
+-	 * @param umlsUser
+-	 *            UMLS username for NLM database
+-	 * @param umlsPass
+-	 *            UMLS password for NLM database
+-	 * @return an Analysis Engine for analyzing unstructured information.
+-	 * @throws IOException
+-	 *             if any I/O error occurs.
+-	 * @throws InvalidXMLException
+-	 *             if the input XML is not valid or does not specify a valid
+-	 *             ResourceSpecifier.
+-	 * @throws ResourceInitializationException
+-	 *             if a failure occurred during production of the resource.
+-	 * @throws URISyntaxException
+-	 *             if URL of the resource is not formatted strictly according to
+-	 *             to RFC2396 and cannot be converted to a URI.
+-	 */
+-	public static AnalysisEngine getAnalysisEngine(String aeDescriptor,
+-			String umlsUser, String umlsPass) throws IOException,
+-			InvalidXMLException, ResourceInitializationException,
+-			URISyntaxException {
+-		// UMLS user ID and password.
+-		String aeDescriptorPath = CTAKESUtils.class.getResource(aeDescriptor)
+-				.toURI().getPath();
+-
+-		// get Resource Specifier from XML
+-		XMLInputSource aeIputSource = new XMLInputSource(aeDescriptorPath);
+-		ResourceSpecifier aeSpecifier = UIMAFramework.getXMLParser()
+-				.parseResourceSpecifier(aeIputSource);
+-
+-		// UMLS user ID and password
+-		if ((umlsUser != null) && (!umlsUser.isEmpty()) && (umlsPass != null)
+-				&& (!umlsPass.isEmpty())) {
+-			/*
+-			 * It is highly recommended that you change UMLS credentials in the
+-			 * XML configuration file instead of giving user and password using
+-			 * CTAKESConfig.
+-			 */
+-			System.setProperty(CTAKES_UMLS_USER, umlsUser);
+-			System.setProperty(CTAKES_UMLS_PASS, umlsPass);
+-		}
+-
+-		// create AE
+-		AnalysisEngine ae = UIMAFramework.produceAnalysisEngine(aeSpecifier);
+-
+-		return ae;
+-	}
+-
+-	/**
+-	 * Returns a new JCas () appropriate for the given Analysis Engine. This
+-	 * method ensures that only one instance of a JCas is created. A Jcas is a
+-	 * Java Cover Classes based Object-oriented CAS (Common Analysis System)
+-	 * API.
+-	 * 
+-	 * <p>
+-	 * Important: It is highly recommended that you reuse CAS objects rather
+-	 * than creating new CAS objects prior to each analysis. This is because CAS
+-	 * objects may be expensive to create and may consume a significant amount
+-	 * of memory.
+-	 * </p>
+-	 * 
+-	 * @param ae
+-	 *            AnalysisEngine used to create an appropriate JCas object.
+-	 * @return a JCas object appropriate for the given AnalysisEngine.
+-	 * @throws ResourceInitializationException
+-	 *             if a CAS could not be created because this AnalysisEngine's
+-	 *             CAS metadata (type system, type priorities, or FS indexes)
+-	 *             are invalid.
+-	 */
+-	public static JCas getJCas(AnalysisEngine ae)
+-			throws ResourceInitializationException {
+-		JCas jcas = ae.newJCas();
+-		
+-		return jcas;
+-	}
+-
+-	/**
+-	 * Serializes a CAS in the given format.
+-	 * 
+-	 * @param jcas
+-	 *            CAS (Common Analysis System) to be serialized.
+-	 * @param type
+-	 *            type of cTAKES (UIMA) serializer used to write CAS.
+-	 * @param prettyPrint
+-	 *            {@code true} to do pretty printing of output.
+-	 * @param stream
+-	 *            {@see OutputStream} object used to print out information
+-	 *            extracted by using cTAKES.
+-	 * @throws SAXException
+-	 *             if there was a SAX exception.
+-	 * @throws IOException
+-	 *             if any I/O error occurs.
+-	 */
+-	public static void serialize(JCas jcas, CTAKESSerializer type, boolean prettyPrint,
+-			OutputStream stream) throws SAXException, IOException {
+-		if (type == CTAKESSerializer.XCAS) {
+-			XCASSerializer.serialize(jcas.getCas(), stream, prettyPrint);
+-		} else if (type == CTAKESSerializer.XMI) {
+-			XmiCasSerializer.serialize(jcas.getCas(), jcas.getTypeSystem(),
+-					stream, prettyPrint, new XmiSerializationSharedData());
+-		} else {
+-			XmlCasSerializer.serialize(jcas.getCas(), jcas.getTypeSystem(),
+-					stream);
+-		}
+-	}
+-
+-	/**
+-	 * Returns the annotation value based on the given annotation type.
+-	 * 
+-	 * @param annotation
+-	 *            {@see IdentifiedAnnotation} object.
+-	 * @param property
+-	 *            {@see CTAKESAnnotationProperty} enum used to identify the
+-	 *            annotation type.
+-	 * @return the annotation value.
+-	 */
+-	public static String getAnnotationProperty(IdentifiedAnnotation annotation,
+-			CTAKESAnnotationProperty property) {
+-		String value = null;
+-		if (property == CTAKESAnnotationProperty.BEGIN) {
+-			value = Integer.toString(annotation.getBegin());
+-		} else if (property == CTAKESAnnotationProperty.END) {
+-			value = Integer.toString(annotation.getEnd());
+-		} else if (property == CTAKESAnnotationProperty.CONDITIONAL) {
+-			value = Boolean.toString(annotation.getConditional());
+-		} else if (property == CTAKESAnnotationProperty.CONFIDENCE) {
+-			value = Float.toString(annotation.getConfidence());
+-		} else if (property == CTAKESAnnotationProperty.DISCOVERY_TECNIQUE) {
+-			value = Integer.toString(annotation.getDiscoveryTechnique());
+-		} else if (property == CTAKESAnnotationProperty.GENERIC) {
+-			value = Boolean.toString(annotation.getGeneric());
+-		} else if (property == CTAKESAnnotationProperty.HISTORY_OF) {
+-			value = Integer.toString(annotation.getHistoryOf());
+-		} else if (property == CTAKESAnnotationProperty.ID) {
+-			value = Integer.toString(annotation.getId());
+-		} else if (property == CTAKESAnnotationProperty.ONTOLOGY_CONCEPT_ARR) {
+-			FSArray mentions = annotation.getOntologyConceptArr();
+-			StringBuilder sb = new StringBuilder();
+-			if (mentions != null) {
+-				for (int i = 0; i < mentions.size(); i++) {
+-					if (mentions.get(i) instanceof UmlsConcept) {
+-						UmlsConcept concept = (UmlsConcept) mentions.get(i);
+-						sb.append(concept.getCui());
+-						if (i < mentions.size() - 1) {
+-							sb.append(",");
+-						}
+-					}
+-				}
+-			}
+-			value = sb.toString();
+-		} else if (property == CTAKESAnnotationProperty.POLARITY) {
+-			value = Integer.toString(annotation.getPolarity());
+-		}
+-		return value;
+-	}
+-
+-	/**
+-	 * Resets cTAKES objects, if created. This method ensures that new cTAKES
+-	 * objects (a.k.a., Analysis Engine and JCas) will be created if getters of
+-	 * this class are called.
+-	 * 
+-	 * @param ae UIMA Analysis Engine
+-	 * @param jcas JCas object
+-	 */
+-	public static void reset(AnalysisEngine ae, JCas jcas) {
+-		// Analysis Engine
+-		resetAE(ae);
+-
+-		// JCas
+-		resetCAS(jcas);
+-		jcas = null;
+-	}
+-
+-	/**
+-	 * Resets the CAS (Common Analysis System), emptying it of all content.
+-	 * 
+-	 * @param jcas JCas object
+-	 */
+-	public static void resetCAS(JCas jcas) {
+-		if (jcas != null) {
+-			jcas.reset();
+-		}
+-	}
+-
+-	/**
+-	 * Resets the AE (AnalysisEngine), releasing all resources held by the
+-	 * current AE.
+-	 * 
+-	 * @param ae UIMA Analysis Engine
+-	 */
+-	public static void resetAE(AnalysisEngine ae) {
+-		if (ae != null) {
+-			ae.destroy();
+-			ae = null;
+-		}
+-	}
+-}
diff --git a/debian/patches/ignore-org.apache.poi.hslf.usermodel.patch b/debian/patches/ignore-org.apache.poi.hslf.usermodel.patch
new file mode 100644
index 0000000..f02907b
--- /dev/null
+++ b/debian/patches/ignore-org.apache.poi.hslf.usermodel.patch
@@ -0,0 +1,353 @@
+From: Markus Koschany <apo at debian.org>
+Date: Tue, 1 Dec 2015 19:15:33 +0100
+Subject: ignore org.apache.poi.hslf.usermodel
+
+---
+ .../tika/parser/microsoft/HSLFExtractor.java       | 338 ---------------------
+ 1 file changed, 338 deletions(-)
+ delete mode 100644 tika-parsers/src/main/java/org/apache/tika/parser/microsoft/HSLFExtractor.java
+
+diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/HSLFExtractor.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/HSLFExtractor.java
+deleted file mode 100644
+index dedb135..0000000
+--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/HSLFExtractor.java
++++ /dev/null
+@@ -1,338 +0,0 @@
+-/*
+- * Licensed to the Apache Software Foundation (ASF) under one or more
+- * contributor license agreements.  See the NOTICE file distributed with
+- * this work for additional information regarding copyright ownership.
+- * The ASF licenses this file to You under the Apache License, Version 2.0
+- * (the "License"); you may not use this file except in compliance with
+- * the License.  You may obtain a copy of the License at
+- *
+- *     http://www.apache.org/licenses/LICENSE-2.0
+- *
+- * Unless required by applicable law or agreed to in writing, software
+- * distributed under the License is distributed on an "AS IS" BASIS,
+- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+- * See the License for the specific language governing permissions and
+- * limitations under the License.
+- */
+-package org.apache.tika.parser.microsoft;
+-
+-import java.io.IOException;
+-import java.util.HashSet;
+-import java.util.List;
+-
+-import org.apache.poi.hslf.model.Comment;
+-import org.apache.poi.hslf.model.HeadersFooters;
+-import org.apache.poi.hslf.model.OLEShape;
+-import org.apache.poi.hslf.usermodel.HSLFMasterSheet;
+-import org.apache.poi.hslf.usermodel.HSLFNotes;
+-import org.apache.poi.hslf.usermodel.HSLFObjectData;
+-import org.apache.poi.hslf.usermodel.HSLFPictureData;
+-import org.apache.poi.hslf.usermodel.HSLFShape;
+-import org.apache.poi.hslf.usermodel.HSLFSlide;
+-import org.apache.poi.hslf.usermodel.HSLFSlideShow;
+-import org.apache.poi.hslf.usermodel.HSLFTable;
+-import org.apache.poi.hslf.usermodel.HSLFTableCell;
+-import org.apache.poi.hslf.usermodel.HSLFTextParagraph;
+-import org.apache.poi.hslf.usermodel.HSLFTextRun;
+-import org.apache.poi.hslf.usermodel.HSLFTextShape;
+-import org.apache.poi.poifs.filesystem.DirectoryNode;
+-import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
+-import org.apache.tika.exception.TikaException;
+-import org.apache.tika.io.TikaInputStream;
+-import org.apache.tika.parser.ParseContext;
+-import org.apache.tika.sax.XHTMLContentHandler;
+-import org.xml.sax.SAXException;
+-import org.xml.sax.helpers.AttributesImpl;
+-
+-public class HSLFExtractor extends AbstractPOIFSExtractor {
+-    public HSLFExtractor(ParseContext context) {
+-        super(context);
+-    }
+-
+-    protected void parse(
+-            NPOIFSFileSystem filesystem, XHTMLContentHandler xhtml)
+-            throws IOException, SAXException, TikaException {
+-        parse(filesystem.getRoot(), xhtml);
+-    }
+-
+-    protected void parse(
+-            DirectoryNode root, XHTMLContentHandler xhtml)
+-            throws IOException, SAXException, TikaException {
+-        HSLFSlideShow ss = new HSLFSlideShow(root);
+-        List<HSLFSlide> _slides = ss.getSlides();
+-
+-        xhtml.startElement("div", "class", "slideShow");
+-
+-      /* Iterate over slides and extract text */
+-        for (HSLFSlide slide : _slides) {
+-            xhtml.startElement("div", "class", "slide");
+-
+-            // Slide header, if present
+-            HeadersFooters hf = slide.getHeadersFooters();
+-            if (hf != null && hf.isHeaderVisible() && hf.getHeaderText() != null) {
+-                xhtml.startElement("p", "class", "slide-header");
+-
+-                xhtml.characters(hf.getHeaderText());
+-
+-                xhtml.endElement("p");
+-            }
+-
+-            // Slide master, if present
+-            extractMaster(xhtml, slide.getMasterSheet());
+-
+-            // Slide text
+-            {
+-                xhtml.startElement("div", "class", "slide-content");
+-
+-                textRunsToText(xhtml, slide.getTextParagraphs());
+-
+-                xhtml.endElement("div");
+-            }
+-
+-            // Table text
+-            for (HSLFShape shape : slide.getShapes()) {
+-                if (shape instanceof HSLFTable) {
+-                    extractTableText(xhtml, (HSLFTable) shape);
+-                }
+-            }
+-
+-            // Slide footer, if present
+-            if (hf != null && hf.isFooterVisible() && hf.getFooterText() != null) {
+-                xhtml.startElement("p", "class", "slide-footer");
+-
+-                xhtml.characters(hf.getFooterText());
+-
+-                xhtml.endElement("p");
+-            }
+-
+-            // Comments, if present
+-            StringBuilder authorStringBuilder = new StringBuilder();
+-            for (Comment comment : slide.getComments()) {
+-                authorStringBuilder.setLength(0);
+-                xhtml.startElement("p", "class", "slide-comment");
+-
+-                if (comment.getAuthor() != null) {
+-                    authorStringBuilder.append(comment.getAuthor());
+-                }
+-                if (comment.getAuthorInitials() != null) {
+-                    if (authorStringBuilder.length() > 0) {
+-                        authorStringBuilder.append(" ");
+-                    }
+-                    authorStringBuilder.append("("+comment.getAuthorInitials()+")");
+-                }
+-                if (authorStringBuilder.length() > 0) {
+-                    if (comment.getText() != null) {
+-                        authorStringBuilder.append(" - ");
+-                    }
+-                    xhtml.startElement("b");
+-                    xhtml.characters(authorStringBuilder.toString());
+-                    xhtml.endElement("b");
+-                }
+-                if (comment.getText() != null) {
+-                    xhtml.characters(comment.getText());
+-                }
+-                xhtml.endElement("p");
+-            }
+-
+-            // Now any embedded resources
+-            handleSlideEmbeddedResources(slide, xhtml);
+-
+-            // TODO Find the Notes for this slide and extract inline
+-
+-            // Slide complete
+-            xhtml.endElement("div");
+-        }
+-
+-        // All slides done
+-        xhtml.endElement("div");
+-
+-      /* notes */
+-        xhtml.startElement("div", "class", "slide-notes");
+-        HashSet<Integer> seenNotes = new HashSet<>();
+-        HeadersFooters hf = ss.getNotesHeadersFooters();
+-
+-        for (HSLFSlide slide : _slides) {
+-            HSLFNotes notes = slide.getNotes();
+-            if (notes == null) {
+-                continue;
+-            }
+-            Integer id = notes._getSheetNumber();
+-            if (seenNotes.contains(id)) {
+-                continue;
+-            }
+-            seenNotes.add(id);
+-
+-            // Repeat the Notes header, if set
+-            if (hf != null && hf.isHeaderVisible() && hf.getHeaderText() != null) {
+-                xhtml.startElement("p", "class", "slide-note-header");
+-                xhtml.characters(hf.getHeaderText());
+-                xhtml.endElement("p");
+-            }
+-
+-            // Notes text
+-            textRunsToText(xhtml, notes.getTextParagraphs());
+-
+-            // Repeat the notes footer, if set
+-            if (hf != null && hf.isFooterVisible() && hf.getFooterText() != null) {
+-                xhtml.startElement("p", "class", "slide-note-footer");
+-                xhtml.characters(hf.getFooterText());
+-                xhtml.endElement("p");
+-            }
+-        }
+-
+-        handleSlideEmbeddedPictures(ss, xhtml);
+-
+-        xhtml.endElement("div");
+-    }
+-
+-    private void extractMaster(XHTMLContentHandler xhtml, HSLFMasterSheet master) throws SAXException {
+-        if (master == null) {
+-            return;
+-        }
+-        List<HSLFShape> shapes = master.getShapes();
+-        if (shapes == null || shapes.isEmpty()) {
+-            return;
+-        }
+-
+-        xhtml.startElement("div", "class", "slide-master-content");
+-        for (HSLFShape shape : shapes) {
+-            if (shape != null && !HSLFMasterSheet.isPlaceholder(shape)) {
+-                if (shape instanceof HSLFTextShape) {
+-                	HSLFTextShape tsh = (HSLFTextShape) shape;
+-                    String text = tsh.getText();
+-                    if (text != null) {
+-                        xhtml.element("p", text);
+-                    }
+-                }
+-            }
+-        }
+-        xhtml.endElement("div");
+-    }
+-
+-    private void extractTableText(XHTMLContentHandler xhtml, HSLFTable shape) throws SAXException {
+-        xhtml.startElement("table");
+-        for (int row = 0; row < shape.getNumberOfRows(); row++) {
+-            xhtml.startElement("tr");
+-            for (int col = 0; col < shape.getNumberOfColumns(); col++) {
+-                HSLFTableCell cell = shape.getCell(row, col);
+-                //insert empty string for empty cell if cell is null
+-                String txt = "";
+-                if (cell != null) {
+-                    txt = cell.getText();
+-                }
+-                xhtml.element("td", txt);
+-            }
+-            xhtml.endElement("tr");
+-        }
+-        xhtml.endElement("table");
+-    }
+-
+-    private void textRunsToText(XHTMLContentHandler xhtml, List<List<HSLFTextParagraph>> paragraphsList) throws SAXException {
+-        if (paragraphsList == null) {
+-            return;
+-        }
+-
+-        for (List<HSLFTextParagraph> run : paragraphsList) {
+-            // Leaving in wisdom from TIKA-712 for easy revert.
+-            // Avoid boiler-plate text on the master slide (0
+-            // = TextHeaderAtom.TITLE_TYPE, 1 = TextHeaderAtom.BODY_TYPE):
+-            //if (!isMaster || (run.getRunType() != 0 && run.getRunType() != 1)) {
+-
+-        	for (HSLFTextParagraph htp : run) {
+-        		xhtml.startElement("p");
+-
+-        		for (HSLFTextRun htr : htp.getTextRuns()) {
+-        			String line = htr.getRawText();
+-        			if (line != null) {
+-        				boolean isfirst = true;
+-        				for (String fragment : line.split("\\u000b")){
+-        					if (!isfirst)  {
+-        	                    xhtml.startElement("br");
+-        	                    xhtml.endElement("br");
+-        					}
+-        					isfirst = false;
+-        					xhtml.characters(fragment.trim());
+-        				}
+-        			}
+-        		}
+-                xhtml.endElement("p");
+-
+-            }
+-        	
+-        }
+-    }
+-
+-    private void handleSlideEmbeddedPictures(HSLFSlideShow slideshow, XHTMLContentHandler xhtml)
+-            throws TikaException, SAXException, IOException {
+-        for (HSLFPictureData pic : slideshow.getPictureData()) {
+-            String mediaType;
+-
+-            switch (pic.getType()) {
+-                case EMF:
+-                    mediaType = "application/x-emf";
+-                    break;
+-                case WMF:
+-                    mediaType = "application/x-msmetafile";
+-                    break;
+-                case DIB:
+-                    mediaType = "image/bmp";
+-                    break;
+-                default:
+-            		mediaType = pic.getContentType();
+-            		break;
+-            }
+-
+-            handleEmbeddedResource(
+-                    TikaInputStream.get(pic.getData()), null, null,
+-                    mediaType, xhtml, false);
+-        }
+-    }
+-
+-    private void handleSlideEmbeddedResources(HSLFSlide slide, XHTMLContentHandler xhtml)
+-            throws TikaException, SAXException, IOException {
+-        List<HSLFShape> shapes;
+-        try {
+-            shapes = slide.getShapes();
+-        } catch (NullPointerException e) {
+-            // Sometimes HSLF hits problems
+-            // Please open POI bugs for any you come across!
+-            return;
+-        }
+-
+-        for (HSLFShape shape : shapes) {
+-            if (shape instanceof OLEShape) {
+-                OLEShape oleShape = (OLEShape) shape;
+-                HSLFObjectData data = null;
+-                try {
+-                    data = oleShape.getObjectData();
+-                } catch (NullPointerException e) {
+-                /* getObjectData throws NPE some times. */
+-                }
+-
+-                if (data != null) {
+-                    String objID = Integer.toString(oleShape.getObjectID());
+-
+-                    // Embedded Object: add a <div
+-                    // class="embedded" id="X"/> so consumer can see where
+-                    // in the main text each embedded document
+-                    // occurred:
+-                    AttributesImpl attributes = new AttributesImpl();
+-                    attributes.addAttribute("", "class", "class", "CDATA", "embedded");
+-                    attributes.addAttribute("", "id", "id", "CDATA", objID);
+-                    xhtml.startElement("div", attributes);
+-                    xhtml.endElement("div");
+-
+-                    try (TikaInputStream stream = TikaInputStream.get(data.getData())) {
+-                        String mediaType = null;
+-                        if ("Excel.Chart.8".equals(oleShape.getProgID())) {
+-                            mediaType = "application/vnd.ms-excel";
+-                        }
+-                        handleEmbeddedResource(
+-                                stream, objID, objID,
+-                                mediaType, xhtml, false);
+-                    }
+-                }
+-            }
+-        }
+-    }
+-}
diff --git a/debian/patches/ignore-org.apache.poi.hssf.extractor.patch b/debian/patches/ignore-org.apache.poi.hssf.extractor.patch
new file mode 100644
index 0000000..b92ad89
--- /dev/null
+++ b/debian/patches/ignore-org.apache.poi.hssf.extractor.patch
@@ -0,0 +1,112 @@
+From: Markus Koschany <apo at debian.org>
+Date: Tue, 1 Dec 2015 19:05:03 +0100
+Subject: ignore org.apache.poi.hssf.extractor
+
+---
+ .../tika/parser/microsoft/OldExcelParser.java      | 97 ----------------------
+ 1 file changed, 97 deletions(-)
+ delete mode 100644 tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OldExcelParser.java
+
+diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OldExcelParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OldExcelParser.java
+deleted file mode 100644
+index 446eea9..0000000
+--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OldExcelParser.java
++++ /dev/null
+@@ -1,97 +0,0 @@
+-/*
+- * Licensed to the Apache Software Foundation (ASF) under one or more
+- * contributor license agreements.  See the NOTICE file distributed with
+- * this work for additional information regarding copyright ownership.
+- * The ASF licenses this file to You under the Apache License, Version 2.0
+- * (the "License"); you may not use this file except in compliance with
+- * the License.  You may obtain a copy of the License at
+- *
+- *     http://www.apache.org/licenses/LICENSE-2.0
+- *
+- * Unless required by applicable law or agreed to in writing, software
+- * distributed under the License is distributed on an "AS IS" BASIS,
+- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+- * See the License for the specific language governing permissions and
+- * limitations under the License.
+- */
+-package org.apache.tika.parser.microsoft;
+-
+-import java.io.BufferedReader;
+-import java.io.IOException;
+-import java.io.InputStream;
+-import java.io.StringReader;
+-import java.util.Arrays;
+-import java.util.Collections;
+-import java.util.HashSet;
+-import java.util.Set;
+-
+-import org.apache.poi.hssf.extractor.OldExcelExtractor;
+-import org.apache.tika.exception.TikaException;
+-import org.apache.tika.metadata.Metadata;
+-import org.apache.tika.mime.MediaType;
+-import org.apache.tika.parser.AbstractParser;
+-import org.apache.tika.parser.ParseContext;
+-import org.apache.tika.sax.XHTMLContentHandler;
+-import org.xml.sax.ContentHandler;
+-import org.xml.sax.SAXException;
+-
+-/**
+- * A POI-powered Tika Parser for very old versions of Excel, from
+- * pre-OLE2 days, such as Excel 4.
+- */
+-public class OldExcelParser extends AbstractParser {
+-    private static final long serialVersionUID = 4611820730372823452L;
+-
+-    private static final Set<MediaType> SUPPORTED_TYPES =
+-            Collections.unmodifiableSet(new HashSet<MediaType>(Arrays.asList(
+-                    MediaType.application("vnd.ms-excel.sheet.4"),
+-                    MediaType.application("vnd.ms-excel.workspace.4"),
+-                    MediaType.application("vnd.ms-excel.sheet.3"),
+-                    MediaType.application("vnd.ms-excel.workspace.3"),
+-                    MediaType.application("vnd.ms-excel.sheet.2")
+-            )));
+-
+-    protected static void parse(OldExcelExtractor extractor,
+-                                XHTMLContentHandler xhtml) throws TikaException, IOException, SAXException {
+-        // Get the whole text, as a single string
+-        String text = extractor.getText();
+-
+-        // Split and output
+-        xhtml.startDocument();
+-
+-        String line;
+-        BufferedReader reader = new BufferedReader(new StringReader(text));
+-        while ((line = reader.readLine()) != null) {
+-            xhtml.startElement("p");
+-            xhtml.characters(line);
+-            xhtml.endElement("p");
+-        }
+-
+-        xhtml.endDocument();
+-    }
+-
+-    public Set<MediaType> getSupportedTypes(ParseContext context) {
+-        return SUPPORTED_TYPES;
+-    }
+-
+-    /**
+-     * Extracts properties and text from an MS Document input stream
+-     */
+-    public void parse(
+-            InputStream stream, ContentHandler handler,
+-            Metadata metadata, ParseContext context)
+-            throws IOException, SAXException, TikaException {
+-        // Open the POI provided extractor
+-        OldExcelExtractor extractor = new OldExcelExtractor(stream);
+-
+-        // We can't do anything about metadata, as these old formats
+-        //  didn't have any stored with them
+-
+-        // Set the content type
+-        // TODO Get the version and type, to set as the Content Type
+-
+-        // Have the text extracted and given to our Content Handler
+-        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
+-        parse(extractor, xhtml);
+-    }
+-}
diff --git a/debian/patches/ignore-org.json.XML.patch b/debian/patches/ignore-org.json.XML.patch
new file mode 100644
index 0000000..5199d54
--- /dev/null
+++ b/debian/patches/ignore-org.json.XML.patch
@@ -0,0 +1,908 @@
+From: Markus Koschany <apo at debian.org>
+Date: Tue, 1 Dec 2015 19:06:12 +0100
+Subject: ignore org.json.XML
+
+---
+ .../org/apache/tika/parser/journal/TEIParser.java  | 893 ---------------------
+ 1 file changed, 893 deletions(-)
+ delete mode 100644 tika-parsers/src/main/java/org/apache/tika/parser/journal/TEIParser.java
+
+diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/journal/TEIParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/journal/TEIParser.java
+deleted file mode 100644
+index 04d5195..0000000
+--- a/tika-parsers/src/main/java/org/apache/tika/parser/journal/TEIParser.java
++++ /dev/null
+@@ -1,893 +0,0 @@
+-/**
+- * Licensed to the Apache Software Foundation (ASF) under one or more
+- * contributor license agreements.  See the NOTICE file distributed with
+- * this work for additional information regarding copyright ownership.
+- * The ASF licenses this file to You under the Apache License, Version 2.0
+- * (the "License"); you may not use this file except in compliance with
+- * the License.  You may obtain a copy of the License at
+- *
+- *     http://www.apache.org/licenses/LICENSE-2.0
+- *
+- * Unless required by applicable law or agreed to in writing, software
+- * distributed under the License is distributed on an "AS IS" BASIS,
+- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+- * See the License for the specific language governing permissions and
+- * limitations under the License.
+- */
+-
+-package org.apache.tika.parser.journal;
+-
+-import java.util.ArrayList;
+-import java.util.List;
+-
+-import org.apache.tika.metadata.Metadata;
+-import org.json.JSONArray;
+-import org.json.JSONObject;
+-import org.json.XML;
+-
+-public class TEIParser {
+-
+-  public TEIParser() {
+-  }
+-
+-  public Metadata parse(String source) {
+-    JSONObject obj = XML.toJSONObject(source);
+-    Metadata metadata = new Metadata();
+-    createGrobidMetadata(source, obj, metadata);
+-    return metadata;
+-  }
+-
+-  private void createGrobidMetadata(String source, JSONObject obj,
+-      Metadata metadata) {
+-    if (obj != null) {
+-      JSONObject teiHeader = obj.getJSONObject("TEI")
+-          .getJSONObject("teiHeader");
+-      if (teiHeader.has("text")) {
+-        parseText(teiHeader.getJSONObject("text"), metadata);
+-      }
+-
+-      if (teiHeader.has("fileDesc")) {
+-        parseFileDesc(teiHeader.getJSONObject("fileDesc"), metadata);
+-
+-      }
+-      if (teiHeader.has("profileDesc")) {
+-        parseProfileDesc(teiHeader.getJSONObject("profileDesc"), metadata);
+-      }
+-    }
+-
+-    addStaticMet(source, obj, metadata);
+-  }
+-
+-  private void addStaticMet(String source, JSONObject obj, Metadata metadata) {
+-    metadata.add("Class", Metadata.class.getName());
+-    metadata.add("TEIJSONSource", obj.toString());
+-    metadata.add("TEIXMLSource", source);
+-  }
+-
+-  private void parseText(JSONObject text, Metadata metadata) {
+-    if (text.has("xml:lang")) {
+-      metadata.add("Language", text.getString("xml:lang"));
+-    }
+-  }
+-
+-  private void parseFileDesc(JSONObject fileDesc, Metadata metadata) {
+-    if (fileDesc.has("titleStmt")) {
+-      parseTitleStmt(fileDesc.getJSONObject("titleStmt"), metadata);
+-    }
+-
+-    if (fileDesc.has("sourceDesc")) {
+-      parseSourceDesc(fileDesc.getJSONObject("sourceDesc"), metadata);
+-    }
+-  }
+-
+-  private void parseTitleStmt(JSONObject titleStmt, Metadata metadata) {
+-    if (titleStmt.has("title")) {
+-      JSONObject title = titleStmt.getJSONObject("title");
+-      if (title.has("content")) {
+-        metadata.add("Title", title.getString("content"));
+-      }
+-    }
+-  }
+-
+-  private void parseSourceDesc(JSONObject sourceDesc, Metadata metadata) {
+-    if (sourceDesc.has("biblStruct")) {
+-      parseBiblStruct(sourceDesc.getJSONObject("biblStruct"), metadata);
+-    }
+-  }
+-
+-  private void parseBiblStruct(JSONObject biblStruct, Metadata metadata) {
+-    if (biblStruct.has("analytic")
+-        && biblStruct.get("analytic") instanceof JSONObject) {
+-      JSONObject analytic = biblStruct.getJSONObject("analytic");
+-      if (analytic.has("author")) {
+-        Object authorObj = analytic.get("author");
+-
+-        List<Author> authorList = new ArrayList<Author>();
+-        if (authorObj instanceof JSONObject) {
+-          parseAuthor((JSONObject) authorObj, authorList);
+-        } else if (authorObj instanceof JSONArray) {
+-          JSONArray authors = (JSONArray) authorObj;
+-          if (authors.length() > 0) {
+-            for (int i = 0; i < authors.length(); i++) {
+-              JSONObject author = authors.getJSONObject(i);
+-              parseAuthor(author, authorList);
+-            }
+-          }
+-
+-          metadata.add("Address", getMetadataAddresses(authorList));
+-          metadata.add("Affiliation", getMetadataAffiliations(authorList));
+-          metadata.add("Authors", getMetadataAuthors(authorList));
+-          metadata.add("FullAffiliations",
+-              getMetadataFullAffiliations(authorList));
+-        }
+-
+-      }
+-    } else {
+-      metadata.add("Error", "Unable to parse: no analytic section in JSON");
+-    }
+-
+-  }
+-
+-  private String getMetadataFullAffiliations(List<Author> authorList) {
+-    List<Affiliation> unique = new ArrayList<Affiliation>();
+-    StringBuilder metAffils = new StringBuilder();
+-
+-    for (Author a : authorList) {
+-      for (Affiliation af : a.getAffiliations()) {
+-        if (!unique.contains(af)) {
+-          unique.add(af);
+-        }
+-      }
+-    }
+-    metAffils.append("[");
+-    for (Affiliation af : unique) {
+-      metAffils.append(af.toString());
+-      metAffils.append(",");
+-    }
+-    metAffils.append(metAffils.deleteCharAt(metAffils.length() - 1));
+-    metAffils.append("]");
+-    return metAffils.toString();
+-  }
+-
+-  private String getMetadataAuthors(List<Author> authorList) {
+-    // generates Chris A. Mattmann 1, 2 Daniel J. Crichton 1 Nenad Medvidovic 2
+-    // Steve Hughes 1
+-    List<Affiliation> unique = new ArrayList<Affiliation>();
+-    StringBuilder metAuthors = new StringBuilder();
+-
+-    for (Author a : authorList) {
+-      for (Affiliation af : a.getAffiliations()) {
+-        if (!unique.contains(af)) {
+-          unique.add(af);
+-        }
+-      }
+-    }
+-
+-    for (Author a : authorList) {
+-      metAuthors.append(printOrBlank(a.getFirstName()));
+-      metAuthors.append(printOrBlank(a.getMiddleName()));
+-      metAuthors.append(printOrBlank(a.getSurName()));
+-
+-      StringBuilder affilBuilder = new StringBuilder();
+-      for (int idx = 0; idx < unique.size(); idx++) {
+-        Affiliation af = unique.get(idx);
+-        if (a.getAffiliations().contains(af)) {
+-          affilBuilder.append((idx + 1));
+-          affilBuilder.append(",");
+-        }
+-      }
+-
+-      if (affilBuilder.length() > 0)
+-        affilBuilder.deleteCharAt(affilBuilder.length() - 1);
+-
+-      metAuthors.append(affilBuilder.toString());
+-      metAuthors.append(" ");
+-    }
+-
+-    return metAuthors.toString();
+-  }
+-
+-  private String getMetadataAffiliations(List<Author> authorList) {
+-    // generates 1 Jet Propulsion Laboratory California Institute of Technology
+-    // ; 2 Computer Science Department University of Southern California
+-    List<Affiliation> unique = new ArrayList<Affiliation>();
+-    StringBuilder metAffil = new StringBuilder();
+-
+-    for (Author a : authorList) {
+-      for (Affiliation af : a.getAffiliations()) {
+-        if (!unique.contains(af)) {
+-          unique.add(af);
+-        }
+-      }
+-    }
+-
+-    int count = 1;
+-    for (Affiliation a : unique) {
+-      metAffil.append(count);
+-      metAffil.append(" ");
+-      metAffil.append(a.getOrgName().toString());
+-      metAffil.deleteCharAt(metAffil.length() - 1);
+-      metAffil.append("; ");
+-      count++;
+-    }
+-
+-    if (count > 1) {
+-      metAffil.deleteCharAt(metAffil.length() - 1);
+-      metAffil.deleteCharAt(metAffil.length() - 1);
+-    }
+-
+-    return metAffil.toString();
+-  }
+-
+-  private String getMetadataAddresses(List<Author> authorList) {
+-    // generates: "Pasadena, CA 91109, USA Los Angeles, CA 90089, USA",
+-    List<Address> unique = new ArrayList<Address>();
+-    StringBuilder metAddress = new StringBuilder();
+-
+-    for (Author a : authorList) {
+-      for (Affiliation af : a.getAffiliations()) {
+-        if (!unique.contains(af.getAddress())) {
+-          unique.add(af.getAddress());
+-        }
+-      }
+-    }
+-
+-    for (Address ad : unique) {
+-      metAddress.append(ad.toString());
+-      metAddress.append(" ");
+-    }
+-
+-    return metAddress.toString();
+-  }
+-
+-  private void parseAuthor(JSONObject authorObj, List<Author> authorList) {
+-    Author author = new Author();
+-
+-    if (authorObj.has("persName")) {
+-      JSONObject persName = authorObj.getJSONObject("persName");
+-
+-      if (persName.has("forename")) {
+-
+-        Object foreNameObj = persName.get("forename");
+-
+-        if (foreNameObj instanceof JSONObject) {
+-          parseNamePart((JSONObject) foreNameObj, author);
+-        } else if (foreNameObj instanceof JSONArray) {
+-          JSONArray foreName = persName.getJSONArray("forename");
+-
+-          if (foreName.length() > 0) {
+-            for (int i = 0; i < foreName.length(); i++) {
+-              JSONObject namePart = foreName.getJSONObject(i);
+-              parseNamePart(namePart, author);
+-            }
+-          }
+-        }
+-      }
+-
+-      if (persName.has("surname")) {
+-        author.setSurName(persName.getString("surname"));
+-      }
+-
+-      if (authorObj.has("affiliation")) {
+-        parseAffiliation(authorObj.get("affiliation"), author);
+-      }
+-
+-    }
+-
+-    authorList.add(author);
+-  }
+-
+-  private void parseNamePart(JSONObject namePart, Author author) {
+-    if (namePart.has("type") && namePart.has("content")) {
+-      String type = namePart.getString("type");
+-      String content = namePart.getString("content");
+-
+-      if (type.equals("first")) {
+-        author.setFirstName(content);
+-      }
+-
+-      if (type.equals("middle")) {
+-        author.setMiddleName(content);
+-      }
+-    }
+-  }
+-
+-  private void parseAffiliation(Object affiliationJSON, Author author) {
+-    if (affiliationJSON instanceof JSONObject) {
+-      parseOneAffiliation((JSONObject) affiliationJSON, author);
+-    } else if (affiliationJSON instanceof JSONArray) {
+-      JSONArray affiliationArray = (JSONArray) affiliationJSON;
+-      if (affiliationArray != null && affiliationArray.length() > 0) {
+-        for (int i = 0; i < affiliationArray.length(); i++) {
+-          JSONObject affiliationObj = affiliationArray.getJSONObject(i);
+-          parseOneAffiliation(affiliationObj, author);
+-        }
+-      }
+-    }
+-  }
+-
+-  private void parseOneAffiliation(JSONObject affiliationObj, Author author) {
+-
+-    Affiliation affiliation = new Affiliation();
+-    if (affiliationObj.has("address")) {
+-      parseAddress(affiliationObj.getJSONObject("address"), affiliation);
+-    }
+-
+-    if (affiliationObj.has("orgName")) {
+-      OrgName orgName = new OrgName();
+-      Object orgObject = affiliationObj.get("orgName");
+-      if (orgObject instanceof JSONObject) {
+-        parseOrgName((JSONObject) orgObject, orgName);
+-      } else if (orgObject instanceof JSONArray) {
+-        JSONArray orgNames = (JSONArray) orgObject;
+-        if (orgNames != null && orgNames.length() > 0) {
+-          for (int i = 0; i < orgNames.length(); i++) {
+-            parseOrgName(orgNames.getJSONObject(i), orgName);
+-          }
+-        }
+-
+-        affiliation.setOrgName(orgName);
+-      }
+-
+-    }
+-
+-    author.getAffiliations().add(affiliation);
+-  }
+-
+-  private void parseAddress(JSONObject addressObj, Affiliation affiliation) {
+-    Address address = new Address();
+-
+-    if (addressObj.has("region")) {
+-      address.setRegion(addressObj.getString("region"));
+-    }
+-
+-    if (addressObj.has("postCode")) {
+-      address.setPostCode(JSONObject.valueToString(addressObj.get("postCode")));
+-    }
+-
+-    if (addressObj.has("settlement")) {
+-      address.setSettlment(addressObj.getString("settlement"));
+-    }
+-
+-    if (addressObj.has("country")) {
+-      Country country = new Country();
+-      Object countryObj = addressObj.get("country");
+-
+-      if (countryObj instanceof JSONObject) {
+-        JSONObject countryJson = addressObj.getJSONObject("country");
+-
+-        if (countryJson.has("content")) {
+-          country.setContent(countryJson.getString("content"));
+-        }
+-
+-        if (countryJson.has("key")) {
+-          country.setKey(countryJson.getString("key"));
+-        }
+-      } else if (countryObj instanceof String) {
+-        country.setContent((String) countryObj);
+-      }
+-      address.setCountry(country);
+-    }
+-
+-    affiliation.setAddress(address);
+-  }
+-
+-  private void parseOrgName(JSONObject orgObj, OrgName orgName) {
+-    OrgTypeName typeName = new OrgTypeName();
+-    if (orgObj.has("content")) {
+-      typeName.setName(orgObj.getString("content"));
+-    }
+-
+-    if (orgObj.has("type")) {
+-      typeName.setType(orgObj.getString("type"));
+-    }
+-
+-    orgName.getTypeNames().add(typeName);
+-  }
+-
+-  private void parseProfileDesc(JSONObject profileDesc, Metadata metadata) {
+-    if (profileDesc.has("abstract")) {
+-      if (profileDesc.has("p")) {
+-        metadata.add("Abstract", profileDesc.getString("p"));
+-      }
+-    }
+-
+-    if (profileDesc.has("textClass")) {
+-      JSONObject textClass = profileDesc.getJSONObject("textClass");
+-
+-      if (textClass.has("keywords")) {
+-        Object keywordsObj = textClass.get("keywords");
+-        // test AJ15.pdf
+-        if (keywordsObj instanceof String) {
+-          metadata.add("Keyword", (String) keywordsObj);
+-        } else if (keywordsObj instanceof JSONObject) {
+-          JSONObject keywords = textClass.getJSONObject("keywords");
+-          if (keywords.has("term")) {
+-            JSONArray termArr = keywords.getJSONArray("term");
+-            for (int i = 0; i < termArr.length(); i++) {
+-              metadata.add("Keyword", JSONObject.valueToString(termArr.get(i)));
+-            }
+-          }
+-        }
+-
+-      }
+-    }
+-
+-  }
+-
+-  private String printOrBlank(String val) {
+-    if (val != null && !val.equals("")) {
+-      return val + " ";
+-    } else
+-      return " ";
+-  }
+-
+-  class Author {
+-
+-    private String surName;
+-
+-    private String middleName;
+-
+-    private String firstName;
+-
+-    private List<Affiliation> affiliations;
+-
+-    public Author() {
+-      this.surName = null;
+-      this.middleName = null;
+-      this.firstName = null;
+-      this.affiliations = new ArrayList<Affiliation>();
+-    }
+-
+-    /**
+-     * @return the surName
+-     */
+-    public String getSurName() {
+-      return surName;
+-    }
+-
+-    /**
+-     * @param surName
+-     *          the surName to set
+-     */
+-    public void setSurName(String surName) {
+-      this.surName = surName;
+-    }
+-
+-    /**
+-     * @return the middleName
+-     */
+-    public String getMiddleName() {
+-      return middleName;
+-    }
+-
+-    /**
+-     * @param middleName
+-     *          the middleName to set
+-     */
+-    public void setMiddleName(String middleName) {
+-      this.middleName = middleName;
+-    }
+-
+-    /**
+-     * @return the firstName
+-     */
+-    public String getFirstName() {
+-      return firstName;
+-    }
+-
+-    /**
+-     * @param firstName
+-     *          the firstName to set
+-     */
+-    public void setFirstName(String firstName) {
+-      this.firstName = firstName;
+-    }
+-
+-    /**
+-     * @return the affiliations
+-     */
+-    public List<Affiliation> getAffiliations() {
+-      return affiliations;
+-    }
+-
+-    /**
+-     * @param affiliations
+-     *          the affiliations to set
+-     */
+-    public void setAffiliations(List<Affiliation> affiliations) {
+-      this.affiliations = affiliations;
+-    }
+-
+-    /*
+-     * (non-Javadoc)
+-     * 
+-     * @see java.lang.Object#toString()
+-     */
+-    @Override
+-    public String toString() {
+-      return "Author [surName=" + surName + ", middleName=" + middleName != null ? middleName
+-          : "" + ", firstName=" + firstName + ", affiliations=" + affiliations
+-              + "]";
+-    }
+-
+-  }
+-
+-  class Affiliation {
+-
+-    private OrgName orgName;
+-
+-    private Address address;
+-
+-    public Affiliation() {
+-      this.orgName = new OrgName();
+-      this.address = new Address();
+-    }
+-
+-    /**
+-     * @return the orgName
+-     */
+-    public OrgName getOrgName() {
+-      return orgName;
+-    }
+-
+-    /**
+-     * @param orgName
+-     *          the orgName to set
+-     */
+-    public void setOrgName(OrgName orgName) {
+-      this.orgName = orgName;
+-    }
+-
+-    /**
+-     * @return the address
+-     */
+-    public Address getAddress() {
+-      return address;
+-    }
+-
+-    /**
+-     * @param address
+-     *          the address to set
+-     */
+-    public void setAddress(Address address) {
+-      this.address = address;
+-    }
+-
+-    /*
+-     * (non-Javadoc)
+-     * 
+-     * @see java.lang.Object#equals(java.lang.Object)
+-     */
+-    @Override
+-    public boolean equals(Object obj) {
+-      Affiliation otherA = (Affiliation) obj;
+-      return this.getAddress().equals(otherA.getAddress())
+-          && this.getOrgName().equals(otherA.getOrgName());
+-
+-    }
+-
+-    /*
+-     * (non-Javadoc)
+-     * 
+-     * @see java.lang.Object#toString()
+-     */
+-    @Override
+-    public String toString() {
+-      return "Affiliation {orgName=" + orgName + ", address=" + address + "}";
+-    }
+-
+-  }
+-
+-  class OrgName {
+-    private List<OrgTypeName> typeNames;
+-
+-    public OrgName() {
+-      this.typeNames = new ArrayList<OrgTypeName>();
+-    }
+-
+-    /**
+-     * @return the typeNames
+-     */
+-    public List<OrgTypeName> getTypeNames() {
+-      return typeNames;
+-    }
+-
+-    /**
+-     * @param typeNames
+-     *          the typeNames to set
+-     */
+-    public void setTypeNames(List<OrgTypeName> typeNames) {
+-      this.typeNames = typeNames;
+-    }
+-
+-    /*
+-     * (non-Javadoc)
+-     * 
+-     * @see java.lang.Object#toString()
+-     */
+-
+-    @Override
+-    public String toString() {
+-      StringBuilder builder = new StringBuilder();
+-      for (OrgTypeName on : this.typeNames) {
+-        builder.append(on.getName());
+-        builder.append(" ");
+-      }
+-      return builder.toString();
+-    }
+-
+-    /*
+-     * (non-Javadoc)
+-     * 
+-     * @see java.lang.Object#equals(java.lang.Object)
+-     */
+-    @Override
+-    public boolean equals(Object obj) {
+-      OrgName otherA = (OrgName) obj;
+-
+-      if (otherA.getTypeNames() != null) {
+-        if (this.typeNames == null) {
+-          return false;
+-        } else {
+-          return this.typeNames.size() == otherA.getTypeNames().size();
+-        }
+-      } else {
+-        if (this.typeNames == null) {
+-          return true;
+-        } else
+-          return false;
+-      }
+-
+-    }
+-
+-  }
+-
+-  class OrgTypeName {
+-    private String name;
+-    private String type;
+-
+-    public OrgTypeName() {
+-      this.name = null;
+-      this.type = null;
+-    }
+-
+-    /**
+-     * @return the name
+-     */
+-    public String getName() {
+-      return name;
+-    }
+-
+-    /**
+-     * @param name
+-     *          the name to set
+-     */
+-    public void setName(String name) {
+-      this.name = name;
+-    }
+-
+-    /**
+-     * @return the type
+-     */
+-    public String getType() {
+-      return type;
+-    }
+-
+-    /**
+-     * @param type
+-     *          the type to set
+-     */
+-    public void setType(String type) {
+-      this.type = type;
+-    }
+-
+-    /*
+-     * (non-Javadoc)
+-     * 
+-     * @see java.lang.Object#equals(java.lang.Object)
+-     */
+-    @Override
+-    public boolean equals(Object obj) {
+-      OrgTypeName otherOrgName = (OrgTypeName) obj;
+-      return this.type.equals(otherOrgName.getType())
+-          && this.name.equals(otherOrgName.getName());
+-    }
+-
+-  }
+-
+-  private class Address {
+-
+-    private String region;
+-    private String postCode;
+-    private String settlment;
+-    private Country country;
+-
+-    public Address() {
+-      this.region = null;
+-      this.postCode = null;
+-      this.settlment = null;
+-      this.country = new Country();
+-    }
+-
+-    /**
+-     * @return the region
+-     */
+-    public String getRegion() {
+-      return region;
+-    }
+-
+-    /**
+-     * @param region
+-     *          the region to set
+-     */
+-    public void setRegion(String region) {
+-      this.region = region;
+-    }
+-
+-    /**
+-     * @return the postCode
+-     */
+-    public String getPostCode() {
+-      return postCode;
+-    }
+-
+-    /**
+-     * @param postCode
+-     *          the postCode to set
+-     */
+-    public void setPostCode(String postCode) {
+-      this.postCode = postCode;
+-    }
+-
+-    /**
+-     * @return the settlment
+-     */
+-    public String getSettlment() {
+-      return settlment;
+-    }
+-
+-    /**
+-     * @param settlment
+-     *          the settlment to set
+-     */
+-    public void setSettlment(String settlment) {
+-      this.settlment = settlment;
+-    }
+-
+-    /**
+-     * @return the country
+-     */
+-    public Country getCountry() {
+-      return country;
+-    }
+-
+-    /**
+-     * @param country
+-     *          the country to set
+-     */
+-    public void setCountry(Country country) {
+-      this.country = country;
+-    }
+-
+-    /*
+-     * (non-Javadoc)
+-     * 
+-     * @see java.lang.Object#equals(java.lang.Object)
+-     */
+-    @Override
+-    public boolean equals(Object obj) {
+-      Address otherA = (Address) obj;
+-      if (this.settlment == null) {
+-        return otherA.getSettlment() == null;
+-      } else if (this.country == null) {
+-        return otherA.getCountry() == null;
+-      } else if (this.postCode == null) {
+-        return otherA.getPostCode() == null;
+-      } else if (this.region == null) {
+-        return otherA.getRegion() == null;
+-      }
+-
+-      return this.settlment.equals(otherA.getSettlment())
+-          && this.country.equals(otherA.getCountry())
+-          && this.postCode.equals(otherA.getPostCode())
+-          && this.region.equals(otherA.getRegion());
+-    }
+-
+-    /*
+-     * (non-Javadoc)
+-     * 
+-     * @see java.lang.Object#toString()
+-     */
+-    @Override
+-    public String toString() {
+-      StringBuilder builder = new StringBuilder();
+-      builder.append(settlment);
+-      builder.append(", ");
+-      builder.append(region);
+-      builder.append(" ");
+-      builder.append(postCode);
+-      builder.append(" ");
+-      builder.append(country.getContent());
+-      return builder.toString();
+-    }
+-  }
+-
+-  private class Country {
+-    private String key;
+-    private String content;
+-
+-    public Country() {
+-      this.key = null;
+-      this.content = null;
+-    }
+-
+-    /**
+-     * @return the key
+-     */
+-    public String getKey() {
+-      return key;
+-    }
+-
+-    /**
+-     * @param key
+-     *          the key to set
+-     */
+-    public void setKey(String key) {
+-      this.key = key;
+-    }
+-
+-    /**
+-     * @return the content
+-     */
+-    public String getContent() {
+-      return content;
+-    }
+-
+-    /**
+-     * @param content
+-     *          the content to set
+-     */
+-    public void setContent(String content) {
+-      this.content = content;
+-    }
+-
+-    /*
+-     * (non-Javadoc)
+-     * 
+-     * @see java.lang.Object#equals(java.lang.Object)
+-     */
+-    @Override
+-    public boolean equals(Object obj) {
+-      Country otherC = (Country) obj;
+-
+-      if (this.key == null) {
+-        if (otherC.getKey() != null) {
+-          return false;
+-        } else {
+-          if (this.content == null) {
+-            if (otherC.getContent() != null) {
+-              return false;
+-            } else {
+-              return true;
+-            }
+-          } else {
+-            return content.equals(otherC.getContent());
+-          }
+-        }
+-      } else {
+-        if (this.content == null) {
+-          if (otherC.getContent() != null) {
+-            return false;
+-          } else {
+-            return this.key.equals(otherC.getKey());
+-          }
+-        } else {
+-          return this.key.equals(otherC.getKey())
+-              && this.content.equals(otherC.getContent());
+-        }
+-      }
+-    }
+-
+-  }
+-}
diff --git a/debian/patches/ignore-package-org.apache.poi.xwpf.patch b/debian/patches/ignore-package-org.apache.poi.xwpf.patch
new file mode 100644
index 0000000..3f120a7
--- /dev/null
+++ b/debian/patches/ignore-package-org.apache.poi.xwpf.patch
@@ -0,0 +1,647 @@
+From: Markus Koschany <apo at debian.org>
+Date: Tue, 1 Dec 2015 19:11:40 +0100
+Subject: ignore package org.apache.poi.xwpf
+
+---
+ .../parser/microsoft/ooxml/XWPFListManager.java    | 165 --------
+ .../ooxml/XWPFWordExtractorDecorator.java          | 459 ---------------------
+ 2 files changed, 624 deletions(-)
+ delete mode 100644 tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFListManager.java
+ delete mode 100644 tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFWordExtractorDecorator.java
+
+diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFListManager.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFListManager.java
+deleted file mode 100644
+index 5654378..0000000
+--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFListManager.java
++++ /dev/null
+@@ -1,165 +0,0 @@
+-/*
+- * Licensed to the Apache Software Foundation (ASF) under one or more
+- * contributor license agreements.  See the NOTICE file distributed with
+- * this work for additional information regarding copyright ownership.
+- * The ASF licenses this file to You under the Apache License, Version 2.0
+- * (the "License"); you may not use this file except in compliance with
+- * the License.  You may obtain a copy of the License at
+- *
+- *     http://www.apache.org/licenses/LICENSE-2.0
+- *
+- * Unless required by applicable law or agreed to in writing, software
+- * distributed under the License is distributed on an "AS IS" BASIS,
+- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+- * See the License for the specific language governing permissions and
+- * limitations under the License.
+- */
+-package org.apache.tika.parser.microsoft.ooxml;
+-
+-import org.apache.poi.xwpf.usermodel.XWPFAbstractNum;
+-import org.apache.poi.xwpf.usermodel.XWPFDocument;
+-import org.apache.poi.xwpf.usermodel.XWPFNum;
+-import org.apache.poi.xwpf.usermodel.XWPFNumbering;
+-import org.apache.poi.xwpf.usermodel.XWPFParagraph;
+-import org.apache.tika.parser.microsoft.AbstractListManager;
+-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTAbstractNum;
+-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTDecimalNumber;
+-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTLvl;
+-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTNum;
+-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTNumLvl;
+-
+-
+-public class XWPFListManager extends AbstractListManager {
+-    private final static boolean OVERRIDE_AVAILABLE;
+-    private final static String SKIP_FORMAT = Character.toString((char) 61623);//if this shows up as the lvlText, don't show a number
+-
+-    static {
+-        boolean b = false;
+-        try {
+-            Class.forName("org.openxmlformats.schemas.wordprocessingml.x2006.main.CTNumLvl");
+-            b = true;
+-        } catch (ClassNotFoundException e) {
+-        }
+-        b = OVERRIDE_AVAILABLE = false;
+-
+-    }
+-
+-    private final XWPFNumbering numbering;
+-
+-    //map of numId (which paragraph series is this a member of?), levelcounts
+-    public XWPFListManager(XWPFDocument document) {
+-        numbering = document.getNumbering();
+-    }
+-
+-    /**
+-     *
+-     * @param paragraph paragraph
+-     * @return the formatted number or an empty string if something went wrong
+-     */
+-    public String getFormattedNumber(final XWPFParagraph paragraph) {
+-        int currNumId = paragraph.getNumID().intValue();
+-        XWPFNum xwpfNum = numbering.getNum(paragraph.getNumID());
+-        if (xwpfNum == null) {
+-            return "";
+-        }
+-        CTNum ctNum = xwpfNum.getCTNum();
+-        CTDecimalNumber abNum = ctNum.getAbstractNumId();
+-        int currAbNumId = abNum.getVal().intValue();
+-
+-        ParagraphLevelCounter lc = listLevelMap.get(currAbNumId);
+-        LevelTuple[] overrideTuples = overrideTupleMap.get(currNumId);
+-        if (lc == null) {
+-            lc = loadLevelTuples(abNum);
+-        }
+-        if (overrideTuples == null) {
+-            overrideTuples = loadOverrideTuples(ctNum, lc.getNumberOfLevels());
+-        }
+-
+-        String formattedString = lc.incrementLevel(paragraph.getNumIlvl().intValue(), overrideTuples);
+-
+-        listLevelMap.put(currAbNumId, lc);
+-        overrideTupleMap.put(currNumId, overrideTuples);
+-
+-        return formattedString;
+-    }
+-    
+-    private LevelTuple[] loadOverrideTuples(CTNum ctNum, int length) {
+-        LevelTuple[] levelTuples = new LevelTuple[length];
+-        int overrideLength = ctNum.sizeOfLvlOverrideArray();
+-        if (overrideLength == 0) {
+-            return null;
+-        }
+-        for (int i = 0; i < length; i++) {
+-            LevelTuple tuple;
+-            if (i >= overrideLength) {
+-                tuple = new LevelTuple("%"+i+".");
+-            } else {
+-                CTNumLvl ctNumLvl = ctNum.getLvlOverrideArray(i);
+-                if (ctNumLvl != null) {
+-                    tuple = buildTuple(i, ctNumLvl.getLvl());
+-                } else {
+-                    tuple = new LevelTuple("%"+i+".");
+-                }
+-            }
+-            levelTuples[i] = tuple;
+-        }
+-        return levelTuples;
+-    }
+-
+-
+-    private ParagraphLevelCounter loadLevelTuples(CTDecimalNumber abNum) {
+-        //Unfortunately, we need to go this far into the underlying structure
+-        //to get the abstract num information for the edge case where
+-        //someone skips a level and the format is not context-free, e.g. "1.B.i".
+-        XWPFAbstractNum abstractNum = numbering.getAbstractNum(abNum.getVal());
+-        CTAbstractNum ctAbstractNum = abstractNum.getCTAbstractNum();
+-
+-        LevelTuple[] levels = new LevelTuple[ctAbstractNum.sizeOfLvlArray()];
+-        for (int i = 0; i < levels.length; i++) {
+-            levels[i] = buildTuple(i, ctAbstractNum.getLvlArray(i));
+-        }
+-        return new ParagraphLevelCounter(levels);
+-    }
+-
+-    private LevelTuple buildTuple(int level, CTLvl ctLvl) {
+-        boolean isLegal = false;
+-        int start = 1;
+-        int restart = -1;
+-        String lvlText = "%" + level + ".";
+-        String numFmt = "decimal";
+-
+-
+-        if (ctLvl != null && ctLvl.getIsLgl() != null) {
+-            isLegal = true;
+-        }
+-
+-        if (ctLvl != null && ctLvl.getNumFmt() != null &&
+-                ctLvl.getNumFmt().getVal() != null) {
+-            numFmt = ctLvl.getNumFmt().getVal().toString();
+-        }
+-        if (ctLvl != null && ctLvl.getLvlRestart() != null &&
+-                ctLvl.getLvlRestart().getVal() != null) {
+-            restart = ctLvl.getLvlRestart().getVal().intValue();
+-        }
+-        if (ctLvl != null && ctLvl.getStart() != null &&
+-                ctLvl.getStart().getVal() != null) {
+-            start = ctLvl.getStart().getVal().intValue();
+-        } else {
+-
+-            //this is a hack. Currently, this gets the lowest possible
+-            //start for a given numFmt.  We should probably try to grab the
+-            //restartNumberingAfterBreak value in
+-            //e.g. <w:abstractNum w:abstractNumId="12" w15:restartNumberingAfterBreak="0">???
+-            if ("decimal".equals(numFmt) || "ordinal".equals(numFmt) || "decimalZero".equals(numFmt)) {
+-                start = 0;
+-            } else {
+-                start = 1;
+-            }
+-        }
+-        if (ctLvl != null && ctLvl.getLvlText() != null && ctLvl.getLvlText().getVal() != null) {
+-            lvlText = ctLvl.getLvlText().getVal();
+-        }
+-        return new LevelTuple(start, restart, lvlText, numFmt, isLegal);
+-    }
+-
+-}
+diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFWordExtractorDecorator.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFWordExtractorDecorator.java
+deleted file mode 100644
+index 6caf803..0000000
+--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFWordExtractorDecorator.java
++++ /dev/null
+@@ -1,459 +0,0 @@
+-/*
+- * Licensed to the Apache Software Foundation (ASF) under one or more
+- * contributor license agreements.  See the NOTICE file distributed with
+- * this work for additional information regarding copyright ownership.
+- * The ASF licenses this file to You under the Apache License, Version 2.0
+- * (the "License"); you may not use this file except in compliance with
+- * the License.  You may obtain a copy of the License at
+- *
+- *     http://www.apache.org/licenses/LICENSE-2.0
+- *
+- * Unless required by applicable law or agreed to in writing, software
+- * distributed under the License is distributed on an "AS IS" BASIS,
+- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+- * See the License for the specific language governing permissions and
+- * limitations under the License.
+- */
+-package org.apache.tika.parser.microsoft.ooxml;
+-
+-import javax.xml.namespace.QName;
+-import java.io.IOException;
+-import java.util.ArrayList;
+-import java.util.List;
+-
+-import org.apache.poi.openxml4j.opc.PackagePart;
+-import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
+-import org.apache.poi.xwpf.model.XWPFCommentsDecorator;
+-import org.apache.poi.xwpf.model.XWPFHeaderFooterPolicy;
+-import org.apache.poi.xwpf.usermodel.BodyType;
+-import org.apache.poi.xwpf.usermodel.IBody;
+-import org.apache.poi.xwpf.usermodel.IBodyElement;
+-import org.apache.poi.xwpf.usermodel.ICell;
+-import org.apache.poi.xwpf.usermodel.IRunElement;
+-import org.apache.poi.xwpf.usermodel.ISDTContent;
+-import org.apache.poi.xwpf.usermodel.XWPFDocument;
+-import org.apache.poi.xwpf.usermodel.XWPFHeaderFooter;
+-import org.apache.poi.xwpf.usermodel.XWPFHyperlink;
+-import org.apache.poi.xwpf.usermodel.XWPFHyperlinkRun;
+-import org.apache.poi.xwpf.usermodel.XWPFParagraph;
+-import org.apache.poi.xwpf.usermodel.XWPFPicture;
+-import org.apache.poi.xwpf.usermodel.XWPFPictureData;
+-import org.apache.poi.xwpf.usermodel.XWPFRun;
+-import org.apache.poi.xwpf.usermodel.XWPFSDT;
+-import org.apache.poi.xwpf.usermodel.XWPFSDTCell;
+-import org.apache.poi.xwpf.usermodel.XWPFStyle;
+-import org.apache.poi.xwpf.usermodel.XWPFStyles;
+-import org.apache.poi.xwpf.usermodel.XWPFTable;
+-import org.apache.poi.xwpf.usermodel.XWPFTableCell;
+-import org.apache.poi.xwpf.usermodel.XWPFTableRow;
+-import org.apache.tika.parser.ParseContext;
+-import org.apache.tika.parser.microsoft.WordExtractor;
+-import org.apache.tika.parser.microsoft.WordExtractor.TagAndStyle;
+-import org.apache.tika.sax.XHTMLContentHandler;
+-import org.apache.xmlbeans.XmlCursor;
+-import org.apache.xmlbeans.XmlException;
+-import org.apache.xmlbeans.XmlObject;
+-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBookmark;
+-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTObject;
+-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP;
+-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSectPr;
+-import org.xml.sax.SAXException;
+-import org.xml.sax.helpers.AttributesImpl;
+-
+-public class XWPFWordExtractorDecorator extends AbstractOOXMLExtractor {
+-
+-    // could be improved by using the real delimiter in xchFollow [MS-DOC], v20140721, 2.4.6.3, Part 3, Step 3
+-    private static final String LIST_DELIMITER = " ";
+-
+-
+-    private XWPFDocument document;
+-    private XWPFStyles styles;
+-
+-    public XWPFWordExtractorDecorator(ParseContext context, XWPFWordExtractor extractor) {
+-        super(context, extractor);
+-
+-        document = (XWPFDocument) extractor.getDocument();
+-        styles = document.getStyles();
+-    }
+-
+-    /**
+-     * @see org.apache.poi.xwpf.extractor.XWPFWordExtractor#getText()
+-     */
+-    @Override
+-    protected void buildXHTML(XHTMLContentHandler xhtml)
+-            throws SAXException, XmlException, IOException {
+-        XWPFHeaderFooterPolicy hfPolicy = document.getHeaderFooterPolicy();
+-        XWPFListManager listManager = new XWPFListManager(document);
+-        // headers
+-        if (hfPolicy != null) {
+-            extractHeaders(xhtml, hfPolicy, listManager);
+-        }
+-
+-        // process text in the order that it occurs in
+-        extractIBodyText(document, listManager, xhtml);
+-
+-        // then all document tables
+-        if (hfPolicy != null) {
+-            extractFooters(xhtml, hfPolicy, listManager);
+-        }
+-    }
+-
+-    private void extractIBodyText(IBody bodyElement, XWPFListManager listManager,
+-                                  XHTMLContentHandler xhtml)
+-            throws SAXException, XmlException, IOException {
+-        for (IBodyElement element : bodyElement.getBodyElements()) {
+-            if (element instanceof XWPFParagraph) {
+-                XWPFParagraph paragraph = (XWPFParagraph) element;
+-                extractParagraph(paragraph, listManager, xhtml);
+-            }
+-            if (element instanceof XWPFTable) {
+-                XWPFTable table = (XWPFTable) element;
+-                extractTable(table, listManager, xhtml);
+-            }
+-            if (element instanceof XWPFSDT) {
+-                extractSDT((XWPFSDT) element, xhtml);
+-            }
+-
+-        }
+-    }
+-
+-    private void extractSDT(XWPFSDT element, XHTMLContentHandler xhtml) throws SAXException,
+-            XmlException, IOException {
+-        ISDTContent content = element.getContent();
+-        String tag = "p";
+-        xhtml.startElement(tag);
+-        xhtml.characters(content.getText());
+-        xhtml.endElement(tag);
+-    }
+-
+-    private void extractParagraph(XWPFParagraph paragraph, XWPFListManager listManager,
+-                                  XHTMLContentHandler xhtml)
+-            throws SAXException, XmlException, IOException {
+-        // If this paragraph is actually a whole new section, then
+-        //  it could have its own headers and footers
+-        // Check and handle if so
+-        XWPFHeaderFooterPolicy headerFooterPolicy = null;
+-        if (paragraph.getCTP().getPPr() != null) {
+-            CTSectPr ctSectPr = paragraph.getCTP().getPPr().getSectPr();
+-            if (ctSectPr != null) {
+-                headerFooterPolicy =
+-                        new XWPFHeaderFooterPolicy(document, ctSectPr);
+-                extractHeaders(xhtml, headerFooterPolicy, listManager);
+-            }
+-        }
+-
+-        // Is this a paragraph, or a heading?
+-        String tag = "p";
+-        String styleClass = null;
+-        if (paragraph.getStyleID() != null) {
+-            XWPFStyle style = styles.getStyle(
+-                    paragraph.getStyleID()
+-            );
+-
+-            if (style != null && style.getName() != null) {
+-                TagAndStyle tas = WordExtractor.buildParagraphTagAndStyle(
+-                        style.getName(), paragraph.getPartType() == BodyType.TABLECELL
+-                );
+-                tag = tas.getTag();
+-                styleClass = tas.getStyleClass();
+-            }
+-        }
+-
+-        if (styleClass == null) {
+-            xhtml.startElement(tag);
+-        } else {
+-            xhtml.startElement(tag, "class", styleClass);
+-        }
+-
+-        writeParagraphNumber(paragraph, listManager, xhtml);
+-        // Output placeholder for any embedded docs:
+-
+-        // TODO: replace w/ XPath/XQuery:
+-        for (XWPFRun run : paragraph.getRuns()) {
+-            XmlCursor c = run.getCTR().newCursor();
+-            c.selectPath("./*");
+-            while (c.toNextSelection()) {
+-                XmlObject o = c.getObject();
+-                if (o instanceof CTObject) {
+-                    XmlCursor c2 = o.newCursor();
+-                    c2.selectPath("./*");
+-                    while (c2.toNextSelection()) {
+-                        XmlObject o2 = c2.getObject();
+-
+-                        XmlObject embedAtt = o2.selectAttribute(new QName("Type"));
+-                        if (embedAtt != null && embedAtt.getDomNode().getNodeValue().equals("Embed")) {
+-                            // Type is "Embed"
+-                            XmlObject relIDAtt = o2.selectAttribute(new QName("http://schemas.openxmlformats.org/officeDocument/2006/relationships", "id"));
+-                            if (relIDAtt != null) {
+-                                String relID = relIDAtt.getDomNode().getNodeValue();
+-                                AttributesImpl attributes = new AttributesImpl();
+-                                attributes.addAttribute("", "class", "class", "CDATA", "embedded");
+-                                attributes.addAttribute("", "id", "id", "CDATA", relID);
+-                                xhtml.startElement("div", attributes);
+-                                xhtml.endElement("div");
+-                            }
+-                        }
+-                    }
+-                    c2.dispose();
+-                }
+-            }
+-
+-            c.dispose();
+-        }
+-
+-        // Attach bookmarks for the paragraph
+-        // (In future, we might put them in the right place, for now
+-        //  we just put them in the correct paragraph)
+-        for (int i = 0; i < paragraph.getCTP().sizeOfBookmarkStartArray(); i++) {
+-            CTBookmark bookmark = paragraph.getCTP().getBookmarkStartArray(i);
+-            xhtml.startElement("a", "name", bookmark.getName());
+-            xhtml.endElement("a");
+-        }
+-
+-        TmpFormatting fmtg = new TmpFormatting(false, false);
+-
+-        // Do the iruns
+-        for (IRunElement run : paragraph.getIRuns()) {
+-            if (run instanceof XWPFSDT) {
+-                fmtg = closeStyleTags(xhtml, fmtg);
+-                processSDTRun((XWPFSDT) run, xhtml);
+-                //for now, we're ignoring formatting in sdt
+-                //if you hit an sdt reset to false
+-                fmtg.setBold(false);
+-                fmtg.setItalic(false);
+-            } else {
+-                fmtg = processRun((XWPFRun) run, paragraph, xhtml, fmtg);
+-            }
+-        }
+-        closeStyleTags(xhtml, fmtg);
+-
+-
+-        // Now do any comments for the paragraph
+-        XWPFCommentsDecorator comments = new XWPFCommentsDecorator(paragraph, null);
+-        String commentText = comments.getCommentText();
+-        if (commentText != null && commentText.length() > 0) {
+-            xhtml.characters(commentText);
+-        }
+-
+-        String footnameText = paragraph.getFootnoteText();
+-        if (footnameText != null && footnameText.length() > 0) {
+-            xhtml.characters(footnameText + "\n");
+-        }
+-
+-        // Also extract any paragraphs embedded in text boxes:
+-        for (XmlObject embeddedParagraph : paragraph.getCTP().selectPath("declare namespace w='http://schemas.openxmlformats.org/wordprocessingml/2006/main' declare namespace wps='http://schemas.microsoft.com/office/word/2010/wordprocessingShape' .//*/wps:txbx/w:txbxContent/w:p")) {
+-            extractParagraph(new XWPFParagraph(CTP.Factory.parse(embeddedParagraph.xmlText()), paragraph.getBody()), listManager, xhtml);
+-        }
+-
+-        // Finish this paragraph
+-        xhtml.endElement(tag);
+-
+-        if (headerFooterPolicy != null) {
+-            extractFooters(xhtml, headerFooterPolicy, listManager);
+-        }
+-    }
+-
+-    private void writeParagraphNumber(XWPFParagraph paragraph,
+-                                      XWPFListManager listManager,
+-                                      XHTMLContentHandler xhtml) throws SAXException {
+-        if (paragraph.getNumIlvl() == null) {
+-            return;
+-        }
+-        String number = listManager.getFormattedNumber(paragraph);
+-        if (number != null) {
+-            xhtml.characters(number);
+-        }
+-
+-    }
+-
+-    private TmpFormatting closeStyleTags(XHTMLContentHandler xhtml,
+-                                         TmpFormatting fmtg) throws SAXException {
+-        // Close any still open style tags
+-        if (fmtg.isItalic()) {
+-            xhtml.endElement("i");
+-            fmtg.setItalic(false);
+-        }
+-        if (fmtg.isBold()) {
+-            xhtml.endElement("b");
+-            fmtg.setBold(false);
+-        }
+-        return fmtg;
+-    }
+-
+-    private TmpFormatting processRun(XWPFRun run, XWPFParagraph paragraph,
+-                                     XHTMLContentHandler xhtml, TmpFormatting tfmtg)
+-            throws SAXException, XmlException, IOException {
+-        // True if we are currently in the named style tag:
+-        if (run.isBold() != tfmtg.isBold()) {
+-            if (tfmtg.isItalic()) {
+-                xhtml.endElement("i");
+-                tfmtg.setItalic(false);
+-            }
+-            if (run.isBold()) {
+-                xhtml.startElement("b");
+-            } else {
+-                xhtml.endElement("b");
+-            }
+-            tfmtg.setBold(run.isBold());
+-        }
+-
+-        if (run.isItalic() != tfmtg.isItalic()) {
+-            if (run.isItalic()) {
+-                xhtml.startElement("i");
+-            } else {
+-                xhtml.endElement("i");
+-            }
+-            tfmtg.setItalic(run.isItalic());
+-        }
+-
+-        boolean addedHREF = false;
+-        if (run instanceof XWPFHyperlinkRun) {
+-            XWPFHyperlinkRun linkRun = (XWPFHyperlinkRun) run;
+-            XWPFHyperlink link = linkRun.getHyperlink(document);
+-            if (link != null && link.getURL() != null) {
+-                xhtml.startElement("a", "href", link.getURL());
+-                addedHREF = true;
+-            } else if (linkRun.getAnchor() != null && linkRun.getAnchor().length() > 0) {
+-                xhtml.startElement("a", "href", "#" + linkRun.getAnchor());
+-                addedHREF = true;
+-            }
+-        }
+-
+-        xhtml.characters(run.toString());
+-
+-        // If we have any pictures, output them
+-        for (XWPFPicture picture : run.getEmbeddedPictures()) {
+-            if (paragraph.getDocument() != null) {
+-                XWPFPictureData data = picture.getPictureData();
+-                if (data != null) {
+-                    AttributesImpl attr = new AttributesImpl();
+-
+-                    attr.addAttribute("", "src", "src", "CDATA", "embedded:" + data.getFileName());
+-                    attr.addAttribute("", "alt", "alt", "CDATA", picture.getDescription());
+-
+-                    xhtml.startElement("img", attr);
+-                    xhtml.endElement("img");
+-                }
+-            }
+-        }
+-
+-        if (addedHREF) {
+-            xhtml.endElement("a");
+-        }
+-
+-        return tfmtg;
+-    }
+-
+-    private void processSDTRun(XWPFSDT run, XHTMLContentHandler xhtml)
+-            throws SAXException, XmlException, IOException {
+-        xhtml.characters(run.getContent().getText());
+-    }
+-
+-    private void extractTable(XWPFTable table, XWPFListManager listManager,
+-                              XHTMLContentHandler xhtml)
+-            throws SAXException, XmlException, IOException {
+-        xhtml.startElement("table");
+-        xhtml.startElement("tbody");
+-        for (XWPFTableRow row : table.getRows()) {
+-            xhtml.startElement("tr");
+-            for (ICell cell : row.getTableICells()) {
+-                xhtml.startElement("td");
+-                if (cell instanceof XWPFTableCell) {
+-                    extractIBodyText((XWPFTableCell) cell, listManager, xhtml);
+-                } else if (cell instanceof XWPFSDTCell) {
+-                    xhtml.characters(((XWPFSDTCell) cell).getContent().getText());
+-                }
+-                xhtml.endElement("td");
+-            }
+-            xhtml.endElement("tr");
+-        }
+-        xhtml.endElement("tbody");
+-        xhtml.endElement("table");
+-    }
+-
+-    private void extractFooters(
+-            XHTMLContentHandler xhtml, XWPFHeaderFooterPolicy hfPolicy,
+-            XWPFListManager listManager)
+-            throws SAXException, XmlException, IOException {
+-        // footers
+-        if (hfPolicy.getFirstPageFooter() != null) {
+-            extractHeaderText(xhtml, hfPolicy.getFirstPageFooter(), listManager);
+-        }
+-        if (hfPolicy.getEvenPageFooter() != null) {
+-            extractHeaderText(xhtml, hfPolicy.getEvenPageFooter(), listManager);
+-        }
+-        if (hfPolicy.getDefaultFooter() != null) {
+-            extractHeaderText(xhtml, hfPolicy.getDefaultFooter(), listManager);
+-        }
+-    }
+-
+-    private void extractHeaders(
+-            XHTMLContentHandler xhtml, XWPFHeaderFooterPolicy hfPolicy, XWPFListManager listManager)
+-            throws SAXException, XmlException, IOException {
+-        if (hfPolicy == null) return;
+-
+-        if (hfPolicy.getFirstPageHeader() != null) {
+-            extractHeaderText(xhtml, hfPolicy.getFirstPageHeader(), listManager);
+-        }
+-
+-        if (hfPolicy.getEvenPageHeader() != null) {
+-            extractHeaderText(xhtml, hfPolicy.getEvenPageHeader(), listManager);
+-        }
+-
+-        if (hfPolicy.getDefaultHeader() != null) {
+-            extractHeaderText(xhtml, hfPolicy.getDefaultHeader(), listManager);
+-        }
+-    }
+-
+-    private void extractHeaderText(XHTMLContentHandler xhtml, XWPFHeaderFooter header, XWPFListManager listManager) throws SAXException, XmlException, IOException {
+-
+-        for (IBodyElement e : header.getBodyElements()) {
+-            if (e instanceof XWPFParagraph) {
+-                extractParagraph((XWPFParagraph) e, listManager, xhtml);
+-            } else if (e instanceof XWPFTable) {
+-                extractTable((XWPFTable) e, listManager, xhtml);
+-            } else if (e instanceof XWPFSDT) {
+-                extractSDT((XWPFSDT) e, xhtml);
+-            }
+-        }
+-    }
+-
+-    /**
+-     * Word documents are simple, they only have the one
+-     * main part
+-     */
+-    @Override
+-    protected List<PackagePart> getMainDocumentParts() {
+-        List<PackagePart> parts = new ArrayList<PackagePart>();
+-        parts.add(document.getPackagePart());
+-        return parts;
+-    }
+-
+-    private class TmpFormatting {
+-        private boolean bold = false;
+-        private boolean italic = false;
+-
+-        private TmpFormatting(boolean bold, boolean italic) {
+-            this.bold = bold;
+-            this.italic = italic;
+-        }
+-
+-        public boolean isBold() {
+-            return bold;
+-        }
+-
+-        public void setBold(boolean bold) {
+-            this.bold = bold;
+-        }
+-
+-        public boolean isItalic() {
+-            return italic;
+-        }
+-
+-        public void setItalic(boolean italic) {
+-            this.italic = italic;
+-        }
+-
+-    }
+-
+-}
diff --git a/debian/patches/ignore-sqlite-jdbc.patch b/debian/patches/ignore-sqlite-jdbc.patch
new file mode 100644
index 0000000..3b8f7a3
--- /dev/null
+++ b/debian/patches/ignore-sqlite-jdbc.patch
@@ -0,0 +1,125 @@
+From: Markus Koschany <apo at debian.org>
+Date: Tue, 1 Dec 2015 19:09:08 +0100
+Subject: ignore sqlite jdbc
+
+---
+ .../apache/tika/parser/jdbc/SQLite3DBParser.java   | 110 ---------------------
+ 1 file changed, 110 deletions(-)
+ delete mode 100644 tika-parsers/src/main/java/org/apache/tika/parser/jdbc/SQLite3DBParser.java
+
+diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/jdbc/SQLite3DBParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/jdbc/SQLite3DBParser.java
+deleted file mode 100644
+index 4ea8f30..0000000
+--- a/tika-parsers/src/main/java/org/apache/tika/parser/jdbc/SQLite3DBParser.java
++++ /dev/null
+@@ -1,110 +0,0 @@
+-package org.apache.tika.parser.jdbc;
+-/*
+- * Licensed to the Apache Software Foundation (ASF) under one or more
+- * contributor license agreements.  See the NOTICE file distributed with
+- * this work for additional information regarding copyright ownership.
+- * The ASF licenses this file to You under the Apache License, Version 2.0
+- * (the "License"); you may not use this file except in compliance with
+- * the License.  You may obtain a copy of the License at
+- *
+- *     http://www.apache.org/licenses/LICENSE-2.0
+- *
+- * Unless required by applicable law or agreed to in writing, software
+- * distributed under the License is distributed on an "AS IS" BASIS,
+- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+- * See the License for the specific language governing permissions and
+- * limitations under the License.
+- */
+-
+-import java.io.File;
+-import java.io.IOException;
+-import java.io.InputStream;
+-import java.sql.Connection;
+-import java.sql.ResultSet;
+-import java.sql.SQLException;
+-import java.sql.Statement;
+-import java.util.LinkedList;
+-import java.util.List;
+-import java.util.Set;
+-
+-import org.apache.commons.io.IOExceptionWithCause;
+-import org.apache.tika.io.TikaInputStream;
+-import org.apache.tika.metadata.Metadata;
+-import org.apache.tika.mime.MediaType;
+-import org.apache.tika.parser.ParseContext;
+-import org.sqlite.SQLiteConfig;
+-
+-/**
+- * This is the implementation of the db parser for SQLite.
+- * <p/>
+- * This parser is internal only; it should not be registered in the services
+- * file or configured in the TikaConfig xml file.
+- */
+-class SQLite3DBParser extends AbstractDBParser {
+-
+-    protected static final String SQLITE_CLASS_NAME = "org.sqlite.JDBC";
+-
+-    /**
+-     * @param context context
+-     * @return null (always)
+-     */
+-    @Override
+-    public Set<MediaType> getSupportedTypes(ParseContext context) {
+-        return null;
+-    }
+-
+-    @Override
+-    protected Connection getConnection(InputStream stream, Metadata metadata, ParseContext context) throws IOException {
+-        String connectionString = getConnectionString(stream, metadata, context);
+-
+-        Connection connection = null;
+-        try {
+-            Class.forName(getJDBCClassName());
+-        } catch (ClassNotFoundException e) {
+-            throw new IOExceptionWithCause(e);
+-        }
+-        try {
+-            SQLiteConfig config = new SQLiteConfig();
+-
+-            //good habit, but effectively meaningless here
+-            config.setReadOnly(true);
+-            connection = config.createConnection(connectionString);
+-
+-        } catch (SQLException e) {
+-            throw new IOException(e.getMessage());
+-        }
+-        return connection;
+-    }
+-
+-    @Override
+-    protected String getConnectionString(InputStream is, Metadata metadata, ParseContext context) throws IOException {
+-        File dbFile = TikaInputStream.get(is).getFile();
+-        return "jdbc:sqlite:" + dbFile.getAbsolutePath();
+-    }
+-
+-    @Override
+-    protected String getJDBCClassName() {
+-        return SQLITE_CLASS_NAME;
+-    }
+-
+-    @Override
+-    protected List<String> getTableNames(Connection connection, Metadata metadata,
+-                                         ParseContext context) throws SQLException {
+-        List<String> tableNames = new LinkedList<String>();
+-
+-        try (Statement st = connection.createStatement()) {
+-            String sql = "SELECT name FROM sqlite_master WHERE type='table'";
+-            ResultSet rs = st.executeQuery(sql);
+-
+-            while (rs.next()) {
+-                tableNames.add(rs.getString(1));
+-            }
+-        }
+-        return tableNames;
+-    }
+-
+-    @Override
+-    public JDBCTableReader getTableReader(Connection connection, String tableName, ParseContext context) {
+-        return new SQLite3TableReader(connection, tableName, context);
+-    }
+-}
diff --git a/debian/patches/ignore-ucar.nc2.patch b/debian/patches/ignore-ucar.nc2.patch
new file mode 100644
index 0000000..1bb0771
--- /dev/null
+++ b/debian/patches/ignore-ucar.nc2.patch
@@ -0,0 +1,137 @@
+From: Markus Koschany <apo at debian.org>
+Date: Tue, 1 Dec 2015 19:13:14 +0100
+Subject: ignore ucar.nc2
+
+---
+ .../org/apache/tika/parser/grib/GribParser.java    | 121 ---------------------
+ 1 file changed, 121 deletions(-)
+ delete mode 100644 tika-parsers/src/main/java/org/apache/tika/parser/grib/GribParser.java
+
+diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/grib/GribParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/grib/GribParser.java
+deleted file mode 100644
+index 6f8756d..0000000
+--- a/tika-parsers/src/main/java/org/apache/tika/parser/grib/GribParser.java
++++ /dev/null
+@@ -1,121 +0,0 @@
+-/*
+- * Licensed to the Apache Software Foundation (ASF) under one or more
+- * contributor license agreements.  See the NOTICE file distributed with
+- * this work for additional information regarding copyright ownership.
+- * The ASF licenses this file to You under the Apache License, Version 2.0
+- * (the "License"); you may not use this file except in compliance with
+- * the License.  You may obtain a copy of the License at
+- *
+- *     http://www.apache.org/licenses/LICENSE-2.0
+- *
+- * Unless required by applicable law or agreed to in writing, software
+- * distributed under the License is distributed on an "AS IS" BASIS,
+- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+- * See the License for the specific language governing permissions and
+- * limitations under the License.
+- */
+-
+-package org.apache.tika.parser.grib;
+-
+-import java.io.IOException;
+-import java.io.InputStream;
+-import java.io.File;
+-import java.util.Collections;
+-import java.util.Set;
+-import org.apache.tika.exception.TikaException;
+-import org.apache.tika.io.TemporaryResources;
+-import org.apache.tika.io.TikaInputStream;
+-import org.apache.tika.metadata.Metadata;
+-import org.apache.tika.metadata.Property;
+-import org.apache.tika.metadata.TikaCoreProperties;
+-import org.apache.tika.mime.MediaType;
+-import org.apache.tika.parser.AbstractParser;
+-import org.apache.tika.parser.ParseContext;
+-import org.apache.tika.sax.XHTMLContentHandler;
+-import org.xml.sax.ContentHandler;
+-import org.xml.sax.SAXException;
+-import ucar.nc2.Attribute;
+-import ucar.nc2.Dimension;
+-import ucar.nc2.NetcdfFile;
+-import ucar.nc2.Variable;
+-import ucar.nc2.dataset.NetcdfDataset;
+-
+-public class GribParser extends AbstractParser {
+-
+-    private static final long serialVersionUID = 7855458954474247655L;
+-
+-    public static final String GRIB_MIME_TYPE = "application/x-grib2";
+-
+-    private final Set<MediaType> SUPPORTED_TYPES =
+-            Collections.singleton(MediaType.application("x-grib2"));
+-
+-    public Set<MediaType> getSupportedTypes(ParseContext context) {
+-        return SUPPORTED_TYPES;
+-    }
+-
+-    public void parse(InputStream stream, ContentHandler handler,
+-                      Metadata metadata, ParseContext context) throws IOException,
+-            SAXException, TikaException {
+-
+-        //Set MIME type as grib2
+-        metadata.set(Metadata.CONTENT_TYPE, GRIB_MIME_TYPE);
+-
+-        TikaInputStream tis = TikaInputStream.get(stream, new TemporaryResources());
+-        File gribFile = tis.getFile();
+-
+-        try {
+-            NetcdfFile ncFile = NetcdfDataset.openFile(gribFile.getAbsolutePath(), null);
+-
+-            // first parse out the set of global attributes
+-            for (Attribute attr : ncFile.getGlobalAttributes()) {
+-                Property property = resolveMetadataKey(attr.getFullName());
+-                if (attr.getDataType().isString()) {
+-                    metadata.add(property, attr.getStringValue());
+-                } else if (attr.getDataType().isNumeric()) {
+-                    int value = attr.getNumericValue().intValue();
+-                    metadata.add(property, String.valueOf(value));
+-                }
+-            }
+-
+-            XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
+-
+-            xhtml.startDocument();
+-
+-            xhtml.newline();
+-            xhtml.startElement("ul");
+-            xhtml.characters("dimensions:");
+-            xhtml.newline();
+-
+-            for (Dimension dim : ncFile.getDimensions()){
+-                xhtml.element("li", dim.getFullName() + "=" + String.valueOf(dim.getLength()) + ";");
+-                xhtml.newline();
+-            }
+-
+-            xhtml.startElement("ul");
+-            xhtml.characters("variables:");
+-            xhtml.newline();
+-
+-            for (Variable var : ncFile.getVariables()){
+-                xhtml.element("p", String.valueOf(var.getDataType()) + var.getNameAndDimensions() + ";");
+-                for(Attribute element : var.getAttributes()){
+-                    xhtml.element("li", " :" + element + ";");
+-                    xhtml.newline();
+-                }
+-            }
+-            xhtml.endElement("ul");
+-            xhtml.endElement("ul");
+-            xhtml.endDocument();
+-
+-        } catch (IOException e) {
+-            throw new TikaException("NetCDF parse error", e);
+-        }
+-    }
+-
+-    private Property resolveMetadataKey(String localName) {
+-        if ("title".equals(localName)) {
+-            return TikaCoreProperties.TITLE;
+-        }
+-        return Property.internalText(localName);
+-    }
+-
+-}
+\ No newline at end of file
diff --git a/debian/patches/series b/debian/patches/series
index 0418cf5..44c83e4 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -3,3 +3,16 @@
 03-ignore-netcdf.patch
 05-osgi-compatibility.patch
 06-optional-parser-dependencies.patch
+ignore-org.apache.poi.hssf.extractor.patch
+ignore-org.json.XML.patch
+ignore-org.apache.ctakes.patch
+ignore-com.pff.patch
+ignore-sqlite-jdbc.patch
+ignore-com.drew.imaging.webp.patch
+ignore-package-org.apache.poi.xwpf.patch
+ignore-opennlp.tools.namefind.patch
+ignore-javax.ws.rs.core.patch
+ignore-ucar.nc2.patch
+ignore-com.github.junrar.exception.patch
+ignore-com.healthmarketscience.jackcess.patch
+ignore-org.apache.poi.hslf.usermodel.patch

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-java/tika.git



More information about the pkg-java-commits mailing list