[tika] 10/23: Add refreshed patches again.
Markus Koschany
apo-guest at moszumanska.debian.org
Mon Nov 30 20:27:41 UTC 2015
This is an automated email from the git hooks/post-receive script.
apo-guest pushed a commit to branch master
in repository tika.
commit 8f685429e2f880595c101744256f3a4c51e4e8fa
Author: Markus Koschany <apo at debian.org>
Date: Mon Nov 30 16:08:35 2015 +0000
Add refreshed patches again.
---
debian/patches/01-jar-packaging.patch | 16 +-
debian/patches/MP4Parser.patch | 340 ++++++++++++++++++++++
debian/patches/netcdf.patch | 290 ++++++++++++++++++
debian/patches/optional-parser-dependencies.patch | 303 +++++++++++++++++++
debian/patches/osgi.patch | 31 ++
debian/patches/series | 4 +
6 files changed, 981 insertions(+), 3 deletions(-)
diff --git a/debian/patches/01-jar-packaging.patch b/debian/patches/01-jar-packaging.patch
index 3186ec0..38ccbaa 100644
--- a/debian/patches/01-jar-packaging.patch
+++ b/debian/patches/01-jar-packaging.patch
@@ -1,6 +1,14 @@
-Description: Change the Maven packaging from bundle to jar to avoid build issues with maven-debian-helper
-Author: Emmanuel Bourg <ebourg at apache.org>
-Forwarded: not-needed
+From: Debian Java Maintainers <pkg-java-maintainers at lists.alioth.debian.org>
+Date: Mon, 30 Nov 2015 15:50:06 +0000
+Subject: jar-packaging
+
+---
+ tika-core/pom.xml | 2 +-
+ tika-parsers/pom.xml | 2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/tika-core/pom.xml b/tika-core/pom.xml
+index 1ed5538..7f80b87 100644
--- a/tika-core/pom.xml
+++ b/tika-core/pom.xml
@@ -30,7 +30,7 @@
@@ -12,6 +20,8 @@ Forwarded: not-needed
<name>Apache Tika core</name>
<url>http://tika.apache.org/</url>
+diff --git a/tika-parsers/pom.xml b/tika-parsers/pom.xml
+index 9557a3d..c0f673f 100644
--- a/tika-parsers/pom.xml
+++ b/tika-parsers/pom.xml
@@ -30,7 +30,7 @@
diff --git a/debian/patches/MP4Parser.patch b/debian/patches/MP4Parser.patch
new file mode 100644
index 0000000..3998d88
--- /dev/null
+++ b/debian/patches/MP4Parser.patch
@@ -0,0 +1,340 @@
+From: Markus Koschany <apo at debian.org>
+Date: Mon, 30 Nov 2015 15:50:18 +0000
+Subject: MP4Parser
+
+---
+ .../java/org/apache/tika/parser/mp4/MP4Parser.java | 325 ---------------------
+ 1 file changed, 325 deletions(-)
+ delete mode 100644 tika-parsers/src/main/java/org/apache/tika/parser/mp4/MP4Parser.java
+
+diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/mp4/MP4Parser.java b/tika-parsers/src/main/java/org/apache/tika/parser/mp4/MP4Parser.java
+deleted file mode 100644
+index 20c8246..0000000
+--- a/tika-parsers/src/main/java/org/apache/tika/parser/mp4/MP4Parser.java
++++ /dev/null
+@@ -1,325 +0,0 @@
+-/*
+- * Licensed to the Apache Software Foundation (ASF) under one or more
+- * contributor license agreements. See the NOTICE file distributed with
+- * this work for additional information regarding copyright ownership.
+- * The ASF licenses this file to You under the Apache License, Version 2.0
+- * (the "License"); you may not use this file except in compliance with
+- * the License. You may obtain a copy of the License at
+- *
+- * http://www.apache.org/licenses/LICENSE-2.0
+- *
+- * Unless required by applicable law or agreed to in writing, software
+- * distributed under the License is distributed on an "AS IS" BASIS,
+- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+- * See the License for the specific language governing permissions and
+- * limitations under the License.
+- */
+-package org.apache.tika.parser.mp4;
+-
+-import java.io.IOException;
+-import java.io.InputStream;
+-import java.text.DecimalFormat;
+-import java.text.NumberFormat;
+-import java.util.Arrays;
+-import java.util.Collections;
+-import java.util.HashMap;
+-import java.util.List;
+-import java.util.Locale;
+-import java.util.Map;
+-import java.util.Set;
+-
+-import org.apache.tika.exception.TikaException;
+-import org.apache.tika.io.TemporaryResources;
+-import org.apache.tika.io.TikaInputStream;
+-import org.apache.tika.metadata.Metadata;
+-import org.apache.tika.metadata.Property;
+-import org.apache.tika.metadata.TikaCoreProperties;
+-import org.apache.tika.metadata.XMP;
+-import org.apache.tika.metadata.XMPDM;
+-import org.apache.tika.mime.MediaType;
+-import org.apache.tika.parser.AbstractParser;
+-import org.apache.tika.parser.ParseContext;
+-import org.apache.tika.sax.XHTMLContentHandler;
+-import org.xml.sax.ContentHandler;
+-import org.xml.sax.SAXException;
+-
+-import com.coremedia.iso.IsoFile;
+-import com.coremedia.iso.boxes.Box;
+-import com.coremedia.iso.boxes.Container;
+-import com.coremedia.iso.boxes.FileTypeBox;
+-import com.coremedia.iso.boxes.MetaBox;
+-import com.coremedia.iso.boxes.MovieBox;
+-import com.coremedia.iso.boxes.MovieHeaderBox;
+-import com.coremedia.iso.boxes.SampleDescriptionBox;
+-import com.coremedia.iso.boxes.SampleTableBox;
+-import com.coremedia.iso.boxes.TrackBox;
+-import com.coremedia.iso.boxes.TrackHeaderBox;
+-import com.coremedia.iso.boxes.UserDataBox;
+-import com.coremedia.iso.boxes.apple.AppleItemListBox;
+-import com.coremedia.iso.boxes.sampleentry.AudioSampleEntry;
+-import com.googlecode.mp4parser.boxes.apple.AppleAlbumBox;
+-import com.googlecode.mp4parser.boxes.apple.AppleArtistBox;
+-import com.googlecode.mp4parser.boxes.apple.AppleArtist2Box;
+-import com.googlecode.mp4parser.boxes.apple.AppleCommentBox;
+-import com.googlecode.mp4parser.boxes.apple.AppleCompilationBox;
+-import com.googlecode.mp4parser.boxes.apple.AppleDiskNumberBox;
+-import com.googlecode.mp4parser.boxes.apple.AppleEncoderBox;
+-import com.googlecode.mp4parser.boxes.apple.AppleGenreBox;
+-import com.googlecode.mp4parser.boxes.apple.AppleNameBox;
+-import com.googlecode.mp4parser.boxes.apple.AppleRecordingYear2Box;
+-import com.googlecode.mp4parser.boxes.apple.AppleTrackAuthorBox;
+-import com.googlecode.mp4parser.boxes.apple.AppleTrackNumberBox;
+-import com.googlecode.mp4parser.boxes.apple.Utf8AppleDataBox;
+-
+-/**
+- * Parser for the MP4 media container format, as well as the older
+- * QuickTime format that MP4 is based on.
+- *
+- * This uses the MP4Parser project from http://code.google.com/p/mp4parser/
+- * to do the underlying parsing
+- */
+-public class MP4Parser extends AbstractParser {
+- /** Serial version UID */
+- private static final long serialVersionUID = 84011216792285L;
+- /** TODO Replace this with a 2dp Duration Property Converter */
+- private static final DecimalFormat DURATION_FORMAT =
+- (DecimalFormat)NumberFormat.getNumberInstance(Locale.ROOT);
+- static {
+- DURATION_FORMAT.applyPattern("0.0#");
+- }
+-
+- // Ensure this stays in Sync with the entries in tika-mimetypes.xml
+- private static final Map<MediaType,List<String>> typesMap = new HashMap<MediaType, List<String>>();
+- static {
+- // All types should be 4 bytes long, space padded as needed
+- typesMap.put(MediaType.audio("mp4"), Arrays.asList(
+- "M4A ", "M4B ", "F4A ", "F4B "));
+- typesMap.put(MediaType.video("3gpp"), Arrays.asList(
+- "3ge6", "3ge7", "3gg6", "3gp1", "3gp2", "3gp3", "3gp4", "3gp5", "3gp6", "3gs7"));
+- typesMap.put(MediaType.video("3gpp2"), Arrays.asList(
+- "3g2a", "3g2b", "3g2c"));
+- typesMap.put(MediaType.video("mp4"), Arrays.asList(
+- "mp41", "mp42"));
+- typesMap.put(MediaType.video("x-m4v"), Arrays.asList(
+- "M4V ", "M4VH", "M4VP"));
+-
+- typesMap.put(MediaType.video("quicktime"), Collections.<String>emptyList());
+- typesMap.put(MediaType.application("mp4"), Collections.<String>emptyList());
+- }
+-
+- private static final Set<MediaType> SUPPORTED_TYPES =
+- Collections.unmodifiableSet(typesMap.keySet());
+-
+- public Set<MediaType> getSupportedTypes(ParseContext context) {
+- return SUPPORTED_TYPES;
+- }
+-
+-
+- public void parse(
+- InputStream stream, ContentHandler handler,
+- Metadata metadata, ParseContext context)
+- throws IOException, SAXException, TikaException {
+- IsoFile isoFile;
+-
+- // The MP4Parser library accepts either a File, or a byte array
+- // As MP4 video files are typically large, always use a file to
+- // avoid OOMs that may occur with in-memory buffering
+- TemporaryResources tmp = new TemporaryResources();
+- TikaInputStream tstream = TikaInputStream.get(stream, tmp);
+- try {
+- isoFile = new IsoFile(new DirectFileReadDataSource(tstream.getFile()));
+- tmp.addResource(isoFile);
+-
+- // Grab the file type box
+- FileTypeBox fileType = getOrNull(isoFile, FileTypeBox.class);
+- if (fileType != null) {
+- // Identify the type
+- MediaType type = MediaType.application("mp4");
+- for (MediaType t : typesMap.keySet()) {
+- if (typesMap.get(t).contains(fileType.getMajorBrand())) {
+- type = t;
+- break;
+- }
+- }
+- metadata.set(Metadata.CONTENT_TYPE, type.toString());
+-
+- if (type.getType().equals("audio")) {
+- metadata.set(XMPDM.AUDIO_COMPRESSOR, fileType.getMajorBrand().trim());
+- }
+- } else {
+- // Some older QuickTime files lack the FileType
+- metadata.set(Metadata.CONTENT_TYPE, "video/quicktime");
+- }
+-
+-
+- // Get the main MOOV box
+- MovieBox moov = getOrNull(isoFile, MovieBox.class);
+- if (moov == null) {
+- // Bail out
+- return;
+- }
+-
+-
+- XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
+- xhtml.startDocument();
+-
+-
+- // Pull out some information from the header box
+- MovieHeaderBox mHeader = getOrNull(moov, MovieHeaderBox.class);
+- if (mHeader != null) {
+- // Get the creation and modification dates
+- metadata.set(Metadata.CREATION_DATE, mHeader.getCreationTime());
+- metadata.set(TikaCoreProperties.MODIFIED, mHeader.getModificationTime());
+-
+- // Get the duration
+- double durationSeconds = ((double)mHeader.getDuration()) / mHeader.getTimescale();
+- metadata.set(XMPDM.DURATION, DURATION_FORMAT.format(durationSeconds));
+-
+- // The timescale is normally the sampling rate
+- metadata.set(XMPDM.AUDIO_SAMPLE_RATE, (int)mHeader.getTimescale());
+- }
+-
+-
+- // Get some more information from the track header
+- // TODO Decide how to handle multiple tracks
+- List<TrackBox> tb = moov.getBoxes(TrackBox.class);
+- if (tb.size() > 0) {
+- TrackBox track = tb.get(0);
+-
+- TrackHeaderBox header = track.getTrackHeaderBox();
+- // Get the creation and modification dates
+- metadata.set(TikaCoreProperties.CREATED, header.getCreationTime());
+- metadata.set(TikaCoreProperties.MODIFIED, header.getModificationTime());
+-
+- // Get the video with and height
+- metadata.set(Metadata.IMAGE_WIDTH, (int)header.getWidth());
+- metadata.set(Metadata.IMAGE_LENGTH, (int)header.getHeight());
+-
+- // Get the sample information
+- SampleTableBox samples = track.getSampleTableBox();
+- SampleDescriptionBox sampleDesc = samples.getSampleDescriptionBox();
+- if (sampleDesc != null) {
+- // Look for the first Audio Sample, if present
+- AudioSampleEntry sample = getOrNull(sampleDesc, AudioSampleEntry.class);
+- if (sample != null) {
+- XMPDM.ChannelTypePropertyConverter.convertAndSet(metadata, sample.getChannelCount());
+- //metadata.set(XMPDM.AUDIO_SAMPLE_TYPE, sample.getSampleSize()); // TODO Num -> Type mapping
+- metadata.set(XMPDM.AUDIO_SAMPLE_RATE, (int)sample.getSampleRate());
+- //metadata.set(XMPDM.AUDIO_, sample.getSamplesPerPacket());
+- //metadata.set(XMPDM.AUDIO_, sample.getBytesPerSample());
+- }
+- }
+- }
+-
+- // Get metadata from the User Data Box
+- UserDataBox userData = getOrNull(moov, UserDataBox.class);
+- if (userData != null) {
+- MetaBox meta = getOrNull(userData, MetaBox.class);
+-
+- // Check for iTunes Metadata
+- // See http://atomicparsley.sourceforge.net/mpeg-4files.html and
+- // http://code.google.com/p/mp4v2/wiki/iTunesMetadata for more on these
+- AppleItemListBox apple = getOrNull(meta, AppleItemListBox.class);
+- if (apple != null) {
+- // Title
+- AppleNameBox title = getOrNull(apple, AppleNameBox.class);
+- addMetadata(TikaCoreProperties.TITLE, metadata, title);
+-
+- // Artist
+- AppleArtistBox artist = getOrNull(apple, AppleArtistBox.class);
+- addMetadata(TikaCoreProperties.CREATOR, metadata, artist);
+- addMetadata(XMPDM.ARTIST, metadata, artist);
+-
+- // Album Artist
+- AppleArtist2Box artist2 = getOrNull(apple, AppleArtist2Box.class);
+- addMetadata(XMPDM.ALBUM_ARTIST, metadata, artist2);
+-
+- // Album
+- AppleAlbumBox album = getOrNull(apple, AppleAlbumBox.class);
+- addMetadata(XMPDM.ALBUM, metadata, album);
+-
+- // Composer
+- AppleTrackAuthorBox composer = getOrNull(apple, AppleTrackAuthorBox.class);
+- addMetadata(XMPDM.COMPOSER, metadata, composer);
+-
+- // Genre
+- AppleGenreBox genre = getOrNull(apple, AppleGenreBox.class);
+- addMetadata(XMPDM.GENRE, metadata, genre);
+-
+- // Year
+- AppleRecordingYear2Box year = getOrNull(apple, AppleRecordingYear2Box.class);
+- if (year != null) {
+- metadata.set(XMPDM.RELEASE_DATE, year.getValue());
+- }
+-
+- // Track number
+- AppleTrackNumberBox trackNum = getOrNull(apple, AppleTrackNumberBox.class);
+- if (trackNum != null) {
+- metadata.set(XMPDM.TRACK_NUMBER, trackNum.getA());
+- //metadata.set(XMPDM.NUMBER_OF_TRACKS, trackNum.getB()); // TODO
+- }
+-
+- // Disc number
+- AppleDiskNumberBox discNum = getOrNull(apple, AppleDiskNumberBox.class);
+- if (discNum != null) {
+- metadata.set(XMPDM.DISC_NUMBER, discNum.getA());
+- }
+-
+- // Compilation
+- AppleCompilationBox compilation = getOrNull(apple, AppleCompilationBox.class);
+- if (compilation != null) {
+- metadata.set(XMPDM.COMPILATION, (int)compilation.getValue());
+- }
+-
+- // Comment
+- AppleCommentBox comment = getOrNull(apple, AppleCommentBox.class);
+- addMetadata(XMPDM.LOG_COMMENT, metadata, comment);
+-
+- // Encoder
+- AppleEncoderBox encoder = getOrNull(apple, AppleEncoderBox.class);
+- if (encoder != null) {
+- metadata.set(XMP.CREATOR_TOOL, encoder.getValue());
+- }
+-
+-
+- // As text
+- for (Box box : apple.getBoxes()) {
+- if (box instanceof Utf8AppleDataBox) {
+- xhtml.element("p", ((Utf8AppleDataBox)box).getValue());
+- }
+- }
+- }
+-
+- // TODO Check for other kinds too
+- }
+-
+- // All done
+- xhtml.endDocument();
+-
+- } finally {
+- tmp.dispose();
+- }
+-
+- }
+-
+- private static void addMetadata(String key, Metadata m, Utf8AppleDataBox metadata) {
+- if (metadata != null) {
+- m.add(key, metadata.getValue());
+- }
+- }
+- private static void addMetadata(Property prop, Metadata m, Utf8AppleDataBox metadata) {
+- if (metadata != null) {
+- m.set(prop, metadata.getValue());
+- }
+- }
+-
+- private static <T extends Box> T getOrNull(Container box, Class<T> clazz) {
+- if (box == null) return null;
+-
+- List<T> boxes = box.getBoxes(clazz);
+- if (boxes.size() == 0) {
+- return null;
+- }
+- return boxes.get(0);
+- }
+-}
diff --git a/debian/patches/netcdf.patch b/debian/patches/netcdf.patch
new file mode 100644
index 0000000..2fe84d6
--- /dev/null
+++ b/debian/patches/netcdf.patch
@@ -0,0 +1,290 @@
+From: Markus Koschany <apo at debian.org>
+Date: Mon, 30 Nov 2015 15:53:57 +0000
+Subject: netcdf
+
+---
+ .../java/org/apache/tika/parser/hdf/HDFParser.java | 122 -----------------
+ .../apache/tika/parser/netcdf/NetCDFParser.java | 144 ---------------------
+ 2 files changed, 266 deletions(-)
+ delete mode 100644 tika-parsers/src/main/java/org/apache/tika/parser/hdf/HDFParser.java
+ delete mode 100644 tika-parsers/src/main/java/org/apache/tika/parser/netcdf/NetCDFParser.java
+
+diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/hdf/HDFParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/hdf/HDFParser.java
+deleted file mode 100644
+index 821493b..0000000
+--- a/tika-parsers/src/main/java/org/apache/tika/parser/hdf/HDFParser.java
++++ /dev/null
+@@ -1,122 +0,0 @@
+-/*
+- * Licensed to the Apache Software Foundation (ASF) under one or more
+- * contributor license agreements. See the NOTICE file distributed with
+- * this work for additional information regarding copyright ownership.
+- * The ASF licenses this file to You under the Apache License, Version 2.0
+- * (the "License"); you may not use this file except in compliance with
+- * the License. You may obtain a copy of the License at
+- *
+- * http://www.apache.org/licenses/LICENSE-2.0
+- *
+- * Unless required by applicable law or agreed to in writing, software
+- * distributed under the License is distributed on an "AS IS" BASIS,
+- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+- * See the License for the specific language governing permissions and
+- * limitations under the License.
+- */
+-
+-package org.apache.tika.parser.hdf;
+-
+-//JDK imports
+-import java.io.ByteArrayOutputStream;
+-import java.io.IOException;
+-import java.io.InputStream;
+-import java.util.Collections;
+-import java.util.Set;
+-
+-import org.apache.commons.io.IOUtils;
+-import org.apache.tika.exception.TikaException;
+-import org.apache.tika.metadata.Metadata;
+-import org.apache.tika.mime.MediaType;
+-import org.apache.tika.parser.AbstractParser;
+-import org.apache.tika.parser.ParseContext;
+-import org.apache.tika.parser.netcdf.NetCDFParser;
+-import org.apache.tika.sax.XHTMLContentHandler;
+-import org.xml.sax.ContentHandler;
+-import org.xml.sax.SAXException;
+-
+-import ucar.nc2.Attribute;
+-import ucar.nc2.Group;
+-import ucar.nc2.NetcdfFile;
+-
+-/**
+- *
+- * Since the {@link NetCDFParser} depends on the <a
+- * href="http://www.unidata.ucar.edu/software/netcdf-java" >NetCDF-Java</a> API,
+- * we are able to use it to parse HDF files as well. See <a href=
+- * "http://www.unidata.ucar.edu/software/netcdf-java/formats/FileTypes.html"
+- * >this link</a> for more information.
+- */
+-public class HDFParser extends AbstractParser {
+-
+- /** Serial version UID */
+- private static final long serialVersionUID = 1091208208003437549L;
+-
+- private static final Set<MediaType> SUPPORTED_TYPES =
+- Collections.singleton(MediaType.application("x-hdf"));
+-
+- /*
+- * (non-Javadoc)
+- *
+- * @see
+- * org.apache.tika.parser.netcdf.NetCDFParser#getSupportedTypes(org.apache
+- * .tika.parser.ParseContext)
+- */
+- public Set<MediaType> getSupportedTypes(ParseContext context) {
+- return SUPPORTED_TYPES;
+- }
+-
+- /*
+- * (non-Javadoc)
+- *
+- * @see
+- * org.apache.tika.parser.netcdf.NetCDFParser#parse(java.io.InputStream,
+- * org.xml.sax.ContentHandler, org.apache.tika.metadata.Metadata,
+- * org.apache.tika.parser.ParseContext)
+- */
+- public void parse(InputStream stream, ContentHandler handler,
+- Metadata metadata, ParseContext context) throws IOException,
+- SAXException, TikaException {
+- ByteArrayOutputStream os = new ByteArrayOutputStream();
+- IOUtils.copy(stream, os);
+-
+- String name = metadata.get(Metadata.RESOURCE_NAME_KEY);
+- if (name == null) {
+- name = "";
+- }
+- try {
+- NetcdfFile ncFile = NetcdfFile.openInMemory(name, os.toByteArray());
+- unravelStringMet(ncFile, null, metadata);
+- } catch (IOException e) {
+- throw new TikaException("HDF parse error", e);
+- }
+-
+- XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
+- xhtml.startDocument();
+- xhtml.endDocument();
+- }
+-
+- protected void unravelStringMet(NetcdfFile ncFile, Group group, Metadata met) {
+- if (group == null) {
+- group = ncFile.getRootGroup();
+- }
+-
+- // get file type
+- met.set("File-Type-Description", ncFile.getFileTypeDescription());
+- // unravel its string attrs
+- for (Attribute attribute : group.getAttributes()) {
+- if (attribute.isString()) {
+- met.add(attribute.getFullName(), attribute.getStringValue());
+- } else {
+- // try and cast its value to a string
+- met.add(attribute.getFullName(), String.valueOf(attribute
+- .getNumericValue()));
+- }
+- }
+-
+- for (Group g : group.getGroups()) {
+- unravelStringMet(ncFile, g, met);
+- }
+- }
+-
+-}
+diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/netcdf/NetCDFParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/netcdf/NetCDFParser.java
+deleted file mode 100644
+index 57254f8..0000000
+--- a/tika-parsers/src/main/java/org/apache/tika/parser/netcdf/NetCDFParser.java
++++ /dev/null
+@@ -1,144 +0,0 @@
+-/*
+- * Licensed to the Apache Software Foundation (ASF) under one or more
+- * contributor license agreements. See the NOTICE file distributed with
+- * this work for additional information regarding copyright ownership.
+- * The ASF licenses this file to You under the Apache License, Version 2.0
+- * (the "License"); you may not use this file except in compliance with
+- * the License. You may obtain a copy of the License at
+- *
+- * http://www.apache.org/licenses/LICENSE-2.0
+- *
+- * Unless required by applicable law or agreed to in writing, software
+- * distributed under the License is distributed on an "AS IS" BASIS,
+- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+- * See the License for the specific language governing permissions and
+- * limitations under the License.
+- */
+-package org.apache.tika.parser.netcdf;
+-
+-//JDK imports
+-
+-import java.io.IOException;
+-import java.io.InputStream;
+-import java.util.Collections;
+-import java.util.Set;
+-import java.util.List;
+-
+-import org.apache.tika.exception.TikaException;
+-import org.apache.tika.io.TemporaryResources;
+-import org.apache.tika.io.TikaInputStream;
+-import org.apache.tika.metadata.Metadata;
+-import org.apache.tika.metadata.Property;
+-import org.apache.tika.metadata.TikaCoreProperties;
+-import org.apache.tika.mime.MediaType;
+-import org.apache.tika.parser.AbstractParser;
+-import org.apache.tika.parser.ParseContext;
+-import org.apache.tika.parser.Parser;
+-import org.apache.tika.sax.XHTMLContentHandler;
+-import org.xml.sax.ContentHandler;
+-import org.xml.sax.SAXException;
+-
+-import ucar.nc2.Attribute;
+-import ucar.nc2.NetcdfFile;
+-import ucar.nc2.Variable;
+-import ucar.nc2.Dimension;
+-
+-/**
+- * A {@link Parser} for <a
+- * href="http://www.unidata.ucar.edu/software/netcdf/index.html">NetCDF</a>
+- * files using the UCAR, MIT-licensed <a
+- * href="http://www.unidata.ucar.edu/software/netcdf-java/">NetCDF for Java</a>
+- * API.
+- */
+-public class NetCDFParser extends AbstractParser {
+-
+- /**
+- * Serial version UID
+- */
+- private static final long serialVersionUID = -5940938274907708665L;
+-
+- private final Set<MediaType> SUPPORTED_TYPES =
+- Collections.singleton(MediaType.application("x-netcdf"));
+-
+- /*
+- * (non-Javadoc)
+- *
+- * @see
+- * org.apache.tika.parser.Parser#getSupportedTypes(org.apache.tika.parser
+- * .ParseContext)
+- */
+- public Set<MediaType> getSupportedTypes(ParseContext context) {
+- return SUPPORTED_TYPES;
+- }
+-
+- /*
+- * (non-Javadoc)
+- *
+- * @see org.apache.tika.parser.Parser#parse(java.io.InputStream,
+- * org.xml.sax.ContentHandler, org.apache.tika.metadata.Metadata,
+- * org.apache.tika.parser.ParseContext)
+- */
+- public void parse(InputStream stream, ContentHandler handler,
+- Metadata metadata, ParseContext context) throws IOException,
+- SAXException, TikaException {
+-
+- TikaInputStream tis = TikaInputStream.get(stream, new TemporaryResources());
+- try {
+- NetcdfFile ncFile = NetcdfFile.open(tis.getFile().getAbsolutePath());
+- metadata.set("File-Type-Description", ncFile.getFileTypeDescription());
+- // first parse out the set of global attributes
+- for (Attribute attr : ncFile.getGlobalAttributes()) {
+- Property property = resolveMetadataKey(attr.getFullName());
+- if (attr.getDataType().isString()) {
+- metadata.add(property, attr.getStringValue());
+- } else if (attr.getDataType().isNumeric()) {
+- int value = attr.getNumericValue().intValue();
+- metadata.add(property, String.valueOf(value));
+- }
+- }
+-
+-
+- XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
+- xhtml.startDocument();
+- xhtml.newline();
+- xhtml.element("h1", "dimensions");
+- xhtml.startElement("ul");
+- xhtml.newline();
+- for (Dimension dim : ncFile.getDimensions()) {
+- xhtml.element("li", dim.getFullName() + " = " + dim.getLength());
+- }
+- xhtml.endElement("ul");
+-
+- xhtml.element("h1", "variables");
+- xhtml.startElement("ul");
+- xhtml.newline();
+- for (Variable var : ncFile.getVariables()) {
+- xhtml.startElement("li");
+- xhtml.characters(var.getDataType() + " " + var.getNameAndDimensions());
+- xhtml.newline();
+- List<Attribute> attributes = var.getAttributes();
+- if (!attributes.isEmpty()) {
+- xhtml.startElement("ul");
+- for (Attribute element : attributes) {
+- xhtml.element("li", element.toString());
+- }
+- xhtml.endElement("ul");
+- }
+- xhtml.endElement("li");
+- }
+- xhtml.endElement("ul");
+-
+- xhtml.endDocument();
+-
+- } catch (IOException e) {
+- throw new TikaException("NetCDF parse error", e);
+- }
+- }
+-
+- private Property resolveMetadataKey(String localName) {
+- if ("title".equals(localName)) {
+- return TikaCoreProperties.TITLE;
+- }
+- return Property.internalText(localName);
+- }
+-}
+\ No newline at end of file
diff --git a/debian/patches/optional-parser-dependencies.patch b/debian/patches/optional-parser-dependencies.patch
new file mode 100644
index 0000000..5d933bc
--- /dev/null
+++ b/debian/patches/optional-parser-dependencies.patch
@@ -0,0 +1,303 @@
+From: Markus Koschany <apo at debian.org>
+Date: Mon, 30 Nov 2015 16:08:14 +0000
+Subject: optional parser dependencies
+
+---
+ tika-parsers/pom.xml | 44 ++++++++++++++++++++++++++++++++++++++++++++
+ 1 file changed, 44 insertions(+)
+
+diff --git a/tika-parsers/pom.xml b/tika-parsers/pom.xml
+index c0f673f..6872101 100644
+--- a/tika-parsers/pom.xml
++++ b/tika-parsers/pom.xml
+@@ -76,16 +76,19 @@
+ <groupId>org.gagravarr</groupId>
+ <artifactId>vorbis-java-tika</artifactId>
+ <version>${vorbis.version}</version>
++ <optional>true</optional>
+ </dependency>
+ <dependency>
+ <groupId>com.healthmarketscience.jackcess</groupId>
+ <artifactId>jackcess</artifactId>
+ <version>2.1.2</version>
++ <optional>true</optional>
+ </dependency>
+ <dependency>
+ <groupId>com.healthmarketscience.jackcess</groupId>
+ <artifactId>jackcess-encrypt</artifactId>
+ <version>2.1.1</version>
++ <optional>true</optional>
+ </dependency>
+
+ <!-- Optional OSGi dependencies, used only when running within OSGi -->
+@@ -93,6 +96,7 @@
+ <groupId>org.apache.felix</groupId>
+ <artifactId>org.apache.felix.scr.annotations</artifactId>
+ <scope>provided</scope>
++ <optional>true</optional>
+ </dependency>
+
+ <!-- Upstream parser libraries -->
+@@ -100,37 +104,44 @@
+ <groupId>net.sourceforge.jmatio</groupId>
+ <artifactId>jmatio</artifactId>
+ <version>1.0</version>
++ <optional>true</optional>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.james</groupId>
+ <artifactId>apache-mime4j-core</artifactId>
+ <version>${mime4j.version}</version>
++ <optional>true</optional>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.james</groupId>
+ <artifactId>apache-mime4j-dom</artifactId>
+ <version>${mime4j.version}</version>
++ <optional>true</optional>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.commons</groupId>
+ <artifactId>commons-compress</artifactId>
+ <version>${commons.compress.version}</version>
++ <optional>true</optional>
+ </dependency>
+ <dependency>
+ <groupId>org.tukaani</groupId>
+ <artifactId>xz</artifactId>
+ <version>${tukaani.version}</version>
++ <optional>true</optional>
+ </dependency>
+
+ <dependency>
+ <groupId>commons-codec</groupId>
+ <artifactId>commons-codec</artifactId>
+ <version>${codec.version}</version>
++ <optional>true</optional>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.pdfbox</groupId>
+ <artifactId>pdfbox</artifactId>
+ <version>${pdfbox.version}</version>
++ <optional>true</optional>
+ </dependency>
+ <!-- TIKA-370: PDFBox declares the Bouncy Castle dependencies
+ as optional, but we prefer to have them always to avoid
+@@ -139,26 +150,31 @@
+ <groupId>org.bouncycastle</groupId>
+ <artifactId>bcmail-jdk15on</artifactId>
+ <version>1.52</version>
++ <optional>true</optional>
+ </dependency>
+ <dependency>
+ <groupId>org.bouncycastle</groupId>
+ <artifactId>bcprov-jdk15on</artifactId>
+ <version>1.52</version>
++ <optional>true</optional>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.poi</groupId>
+ <artifactId>poi</artifactId>
+ <version>${poi.version}</version>
++ <optional>true</optional>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.poi</groupId>
+ <artifactId>poi-scratchpad</artifactId>
+ <version>${poi.version}</version>
++ <optional>true</optional>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.poi</groupId>
+ <artifactId>poi-ooxml</artifactId>
+ <version>${poi.version}</version>
++ <optional>true</optional>
+ <exclusions>
+ <exclusion>
+ <groupId>stax</groupId>
+@@ -174,61 +190,73 @@
+ <groupId>org.ccil.cowan.tagsoup</groupId>
+ <artifactId>tagsoup</artifactId>
+ <version>1.2.1</version>
++ <optional>true</optional>
+ </dependency>
+ <dependency>
+ <groupId>org.ow2.asm</groupId>
+ <artifactId>asm</artifactId>
+ <version>5.0.4</version>
++ <optional>true</optional>
+ </dependency>
+ <dependency>
+ <groupId>com.googlecode.mp4parser</groupId>
+ <artifactId>isoparser</artifactId>
+ <version>1.0.2</version>
++ <optional>true</optional>
+ </dependency>
+ <dependency>
+ <groupId>com.drewnoakes</groupId>
+ <artifactId>metadata-extractor</artifactId>
+ <version>2.8.0</version>
++ <optional>true</optional>
+ </dependency>
+ <dependency>
+ <groupId>de.l3s.boilerpipe</groupId>
+ <artifactId>boilerpipe</artifactId>
+ <version>1.1.0</version>
++ <optional>true</optional>
+ </dependency>
+ <dependency>
+ <groupId>rome</groupId>
+ <artifactId>rome</artifactId>
+ <version>1.0</version>
++ <optional>true</optional>
+ </dependency>
+ <dependency>
+ <groupId>org.gagravarr</groupId>
+ <artifactId>vorbis-java-core</artifactId>
+ <version>${vorbis.version}</version>
++ <optional>true</optional>
+ </dependency>
+ <dependency>
+ <groupId>com.googlecode.juniversalchardet</groupId>
+ <artifactId>juniversalchardet</artifactId>
+ <version>1.0.3</version>
++ <optional>true</optional>
+ </dependency>
+ <dependency>
+ <groupId>org.codelibs</groupId>
+ <artifactId>jhighlight</artifactId>
+ <version>1.0.2</version>
++ <optional>true</optional>
+ </dependency>
+ <dependency>
+ <groupId>com.pff</groupId>
+ <artifactId>java-libpst</artifactId>
+ <version>0.8.1</version>
++ <optional>true</optional>
+ </dependency>
+ <dependency>
+ <groupId>com.github.junrar</groupId>
+ <artifactId>junrar</artifactId>
+ <version>0.7</version>
++ <optional>true</optional>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.cxf</groupId>
+ <artifactId>cxf-rt-rs-client</artifactId>
+ <version>${cxf.version}</version>
++ <optional>true</optional>
+ </dependency>
+
+
+@@ -238,30 +266,35 @@
+ <artifactId>sqlite-jdbc</artifactId>
+ <version>3.8.10.1</version>
+ <scope>provided</scope>
++ <optional>true</optional>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.opennlp</groupId>
+ <artifactId>opennlp-tools</artifactId>
+ <version>1.5.3</version>
++ <optional>true</optional>
+ </dependency>
+
+ <dependency>
+ <groupId>commons-io</groupId>
+ <artifactId>commons-io</artifactId>
+ <version>${commons.io.version}</version>
++ <optional>true</optional>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.commons</groupId>
+ <artifactId>commons-exec</artifactId>
+ <version>1.3</version>
++ <optional>true</optional>
+ </dependency>
+
+ <dependency>
+ <groupId>com.googlecode.json-simple</groupId>
+ <artifactId>json-simple</artifactId>
+ <version>1.1.1</version>
++ <optional>true</optional>
+ <exclusions>
+ <exclusion>
+ <groupId>junit</groupId>
+@@ -274,6 +307,7 @@
+ <groupId>org.json</groupId>
+ <artifactId>json</artifactId>
+ <version>20140107</version>
++ <optional>true</optional>
+ </dependency>
+
+
+@@ -299,16 +333,19 @@
+ <groupId>edu.ucar</groupId>
+ <artifactId>netcdf4</artifactId>
+ <version>${netcdf-java.version}</version>
++ <optional>true</optional>
+ </dependency>
+ <dependency>
+ <groupId>edu.ucar</groupId>
+ <artifactId>grib</artifactId>
+ <version>${netcdf-java.version}</version>
++ <optional>true</optional>
+ </dependency>
+ <dependency>
+ <groupId>edu.ucar</groupId>
+ <artifactId>cdm</artifactId>
+ <version>${netcdf-java.version}</version>
++ <optional>true</optional>
+ <exclusions>
+ <exclusion>
+ <groupId>org.slf4j</groupId>
+@@ -320,33 +357,39 @@
+ <groupId>edu.ucar</groupId>
+ <artifactId>httpservices</artifactId>
+ <version>${netcdf-java.version}</version>
++ <optional>true</optional>
+ </dependency>
+ <!-- Apache Commons CSV -->
+ <dependency>
+ <groupId>org.apache.commons</groupId>
+ <artifactId>commons-csv</artifactId>
+ <version>1.0</version>
++ <optional>true</optional>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.sis.core</groupId>
+ <artifactId>sis-utility</artifactId>
+ <version>0.5</version>
++ <optional>true</optional>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.sis.storage</groupId>
+ <artifactId>sis-netcdf</artifactId>
+ <version>0.5</version>
++ <optional>true</optional>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.sis.core</groupId>
+ <artifactId>sis-metadata</artifactId>
+ <version>0.5</version>
++ <optional>true</optional>
+ </dependency>
+ <dependency>
+ <groupId>org.opengis</groupId>
+ <artifactId>geoapi</artifactId>
+ <version>3.0.0</version>
++ <optional>true</optional>
+ </dependency>
+ <!-- Apache cTAKES -->
+ <dependency>
+@@ -354,6 +397,7 @@
+ <artifactId>ctakes-core</artifactId>
+ <version>3.2.2</version>
+ <scope>provided</scope>
++ <optional>true</optional>
+ </dependency>
+ </dependencies>
+
diff --git a/debian/patches/osgi.patch b/debian/patches/osgi.patch
new file mode 100644
index 0000000..15a86d9
--- /dev/null
+++ b/debian/patches/osgi.patch
@@ -0,0 +1,31 @@
+From: Markus Koschany <apo at debian.org>
+Date: Mon, 30 Nov 2015 15:55:24 +0000
+Subject: osgi
+
+---
+ .../src/main/java/org/apache/tika/parser/internal/Activator.java | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/internal/Activator.java b/tika-parsers/src/main/java/org/apache/tika/parser/internal/Activator.java
+index a884d3a..f3324b8 100644
+--- a/tika-parsers/src/main/java/org/apache/tika/parser/internal/Activator.java
++++ b/tika-parsers/src/main/java/org/apache/tika/parser/internal/Activator.java
+@@ -35,14 +35,14 @@ public class Activator implements BundleActivator {
+ @Override
+ public void start(BundleContext context) throws Exception {
+ detectorService = context.registerService(
+- Detector.class.getName(),
++ Detector.class,
+ new DefaultDetector(Activator.class.getClassLoader()),
+- new Properties());
++ new java.util.Hashtable<String,String>());
+ Parser parser = new DefaultParser(Activator.class.getClassLoader());
+ parserService = context.registerService(
+- Parser.class.getName(),
++ Parser.class,
+ parser,
+- new Properties());
++ new java.util.Hashtable<String,String>());
+ }
+
+ @Override
diff --git a/debian/patches/series b/debian/patches/series
index da693f0..9c90618 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -1 +1,5 @@
01-jar-packaging.patch
+MP4Parser.patch
+netcdf.patch
+osgi.patch
+optional-parser-dependencies.patch
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-java/tika.git
More information about the pkg-java-commits
mailing list