[tika] 01/05: Enabled JHighlight support
Emmanuel Bourg
ebourg-guest at moszumanska.debian.org
Fri Jun 24 14:26:45 UTC 2016
This is an automated email from the git hooks/post-receive script.
ebourg-guest pushed a commit to branch master
in repository tika.
commit 77c6f9b681a0b288253f99e4b47548f025c75b52
Author: Emmanuel Bourg <ebourg at apache.org>
Date: Fri Jun 24 16:15:23 2016 +0200
Enabled JHighlight support
---
debian/changelog | 6 ++
debian/control | 1 +
debian/maven.ignoreRules | 1 -
debian/patches/04-ignore-jhighlight.patch | 140 ------------------------------
debian/patches/series | 1 -
5 files changed, 7 insertions(+), 142 deletions(-)
diff --git a/debian/changelog b/debian/changelog
index c48786a..7b8817e 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,9 @@
+tika (1.5-5) UNRELEASED; urgency=medium
+
+ * Enabled JHighlight support
+
+ -- Emmanuel Bourg <ebourg at apache.org> Fri, 24 Jun 2016 16:05:32 +0200
+
tika (1.5-4) unstable; urgency=medium
* Team upload.
diff --git a/debian/control b/debian/control
index 746d589..0a14f81 100644
--- a/debian/control
+++ b/debian/control
@@ -12,6 +12,7 @@ Build-Depends-Indep: bnd (>= 2.1.0),
libboilerpipe-java,
libcommons-compress-java,
libjempbox-java,
+ libjhighlight-java,
libjuniversalchardet-java,
libmaven-bundle-plugin-java,
libmetadata-extractor-java (>= 2.7.2-1~),
diff --git a/debian/maven.ignoreRules b/debian/maven.ignoreRules
index 4eca5ed..82c8f18 100644
--- a/debian/maven.ignoreRules
+++ b/debian/maven.ignoreRules
@@ -1,6 +1,5 @@
com.googlecode.mp4parser isoparser * * * *
-com.uwyn jhighlight * * * *
edu.ucar netcdf * * * *
junit junit * * * *
org.apache.felix maven-scr-plugin * * * *
diff --git a/debian/patches/04-ignore-jhighlight.patch b/debian/patches/04-ignore-jhighlight.patch
deleted file mode 100644
index cee0b85..0000000
--- a/debian/patches/04-ignore-jhighlight.patch
+++ /dev/null
@@ -1,140 +0,0 @@
-Description: Remove the classes using the jhighlight library which isn't in Debian yet
-Author: Emmanuel Bourg <ebourg at apache.org>
-Forwarded: not-needed
-
---- a/tika-parsers/src/main/java/org/apache/tika/parser/code/SourceCodeParser.java
-+++ /dev/null
-@@ -1,133 +0,0 @@
--/*
-- * Licensed to the Apache Software Foundation (ASF) under one or more
-- * contributor license agreements. See the NOTICE file distributed with
-- * this work for additional information regarding copyright ownership.
-- * The ASF licenses this file to You under the Apache License, Version 2.0
-- * (the "License"); you may not use this file except in compliance with
-- * the License. You may obtain a copy of the License at
-- *
-- * http://www.apache.org/licenses/LICENSE-2.0
-- *
-- * Unless required by applicable law or agreed to in writing, software
-- * distributed under the License is distributed on an "AS IS" BASIS,
-- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- * See the License for the specific language governing permissions and
-- * limitations under the License.
-- */
--package org.apache.tika.parser.code;
--
--import static com.uwyn.jhighlight.renderer.XhtmlRendererFactory.CPP;
--import static com.uwyn.jhighlight.renderer.XhtmlRendererFactory.GROOVY;
--import static com.uwyn.jhighlight.renderer.XhtmlRendererFactory.JAVA;
--
--import java.io.IOException;
--import java.io.InputStream;
--import java.nio.charset.Charset;
--import java.util.HashMap;
--import java.util.Map;
--import java.util.Set;
--import java.util.regex.Matcher;
--import java.util.regex.Pattern;
--
--import org.apache.tika.config.ServiceLoader;
--import org.apache.tika.detect.AutoDetectReader;
--import org.apache.tika.exception.TikaException;
--import org.apache.tika.io.CloseShieldInputStream;
--import org.apache.tika.metadata.Metadata;
--import org.apache.tika.metadata.TikaCoreProperties;
--import org.apache.tika.mime.MediaType;
--import org.apache.tika.parser.ParseContext;
--import org.apache.tika.parser.Parser;
--import org.xml.sax.ContentHandler;
--import org.xml.sax.SAXException;
--
--import com.uwyn.jhighlight.renderer.Renderer;
--import com.uwyn.jhighlight.renderer.XhtmlRendererFactory;
--/**
-- * Generic Source code parser for Java, Groovy, C++
-- *
-- * @author Hong-Thai.Nguyen
-- * @since 1.6
-- */
--public class SourceCodeParser implements Parser {
--
-- private static final long serialVersionUID = -4543476498190054160L;
--
-- private static final Pattern authorPattern = Pattern.compile("(?im)@author (.*) *$");
--
-- private static final Map<MediaType, String> TYPES_TO_RENDERER = new HashMap<MediaType, String>() {
-- private static final long serialVersionUID = -741976157563751152L;
-- {
-- put(MediaType.text("x-c++src"), CPP);
-- put(MediaType.text("x-java-source"), JAVA);
-- put(MediaType.text("x-groovy"), GROOVY);
-- }
-- };
--
-- private static final ServiceLoader LOADER = new ServiceLoader(SourceCodeParser.class.getClassLoader());
--
-- @Override
-- public Set<MediaType> getSupportedTypes(ParseContext context) {
-- return TYPES_TO_RENDERER.keySet();
-- }
--
-- @Override
-- public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context)
-- throws IOException, SAXException, TikaException {
--
-- AutoDetectReader reader = new AutoDetectReader(new CloseShieldInputStream(stream), metadata, context.get(ServiceLoader.class, LOADER));
--
-- try {
-- Charset charset = reader.getCharset();
-- String mediaType = metadata.get(Metadata.CONTENT_TYPE);
-- String name = metadata.get(Metadata.RESOURCE_NAME_KEY);
-- if (mediaType != null && name != null) {
-- MediaType type = MediaType.parse(mediaType);
-- metadata.set(Metadata.CONTENT_TYPE, type.toString());
-- metadata.set(Metadata.CONTENT_ENCODING, charset.name());
--
-- StringBuilder out = new StringBuilder();
-- String line;
-- int nbLines = 0;
-- while ((line = reader.readLine()) != null) {
-- out.append(line);
-- String author = parserAuthor(line);
-- if (author != null) {
-- metadata.add(TikaCoreProperties.CREATOR, author);
-- }
-- nbLines ++;
-- }
-- metadata.set("LoC", String.valueOf(nbLines));
--
-- Renderer renderer = getRenderer(type.toString());
-- String codeAsHtml = renderer.highlight(name, out.toString(), charset.name(), false);
-- char[] charArray = codeAsHtml.toCharArray();
-- handler.startDocument();
-- handler.characters(charArray, 0, charArray.length);
-- handler.endDocument();
-- }
-- } finally {
-- reader.close();
-- }
--
-- }
--
-- private Renderer getRenderer(String mimeType) {
-- MediaType mt = MediaType.parse(mimeType);
-- String type = TYPES_TO_RENDERER.get(mt);
-- if (type == null) {
-- throw new RuntimeException("unparseable content type " + mimeType);
-- }
-- return XhtmlRendererFactory.getRenderer(type);
-- }
--
--
-- private String parserAuthor(String line) {
-- Matcher m = authorPattern.matcher(line);
-- if (m.find()) {
-- return m.group(1).trim();
-- }
--
-- return null;
-- }
--}
diff --git a/debian/patches/series b/debian/patches/series
index 15fcf2e..707ecd7 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -1,7 +1,6 @@
01-jar-packaging.patch
02-ignore-mp4parser.patch
03-ignore-netcdf.patch
-04-ignore-jhighlight.patch
05-osgi-compatibility.patch
06-optional-parser-dependencies.patch
07-metadata-extractor-2.7-compatibility.patch
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-java/tika.git
More information about the pkg-java-commits
mailing list