[tika] 01/05: Enabled JHighlight support

Emmanuel Bourg ebourg-guest at moszumanska.debian.org
Fri Jun 24 14:26:45 UTC 2016


This is an automated email from the git hooks/post-receive script.

ebourg-guest pushed a commit to branch master
in repository tika.

commit 77c6f9b681a0b288253f99e4b47548f025c75b52
Author: Emmanuel Bourg <ebourg at apache.org>
Date:   Fri Jun 24 16:15:23 2016 +0200

    Enabled JHighlight support
---
 debian/changelog                          |   6 ++
 debian/control                            |   1 +
 debian/maven.ignoreRules                  |   1 -
 debian/patches/04-ignore-jhighlight.patch | 140 ------------------------------
 debian/patches/series                     |   1 -
 5 files changed, 7 insertions(+), 142 deletions(-)

diff --git a/debian/changelog b/debian/changelog
index c48786a..7b8817e 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,9 @@
+tika (1.5-5) UNRELEASED; urgency=medium
+
+  * Enabled JHighlight support
+
+ -- Emmanuel Bourg <ebourg at apache.org>  Fri, 24 Jun 2016 16:05:32 +0200
+
 tika (1.5-4) unstable; urgency=medium
 
   * Team upload.
diff --git a/debian/control b/debian/control
index 746d589..0a14f81 100644
--- a/debian/control
+++ b/debian/control
@@ -12,6 +12,7 @@ Build-Depends-Indep: bnd (>= 2.1.0),
                      libboilerpipe-java,
                      libcommons-compress-java,
                      libjempbox-java,
+                     libjhighlight-java,
                      libjuniversalchardet-java,
                      libmaven-bundle-plugin-java,
                      libmetadata-extractor-java (>= 2.7.2-1~),
diff --git a/debian/maven.ignoreRules b/debian/maven.ignoreRules
index 4eca5ed..82c8f18 100644
--- a/debian/maven.ignoreRules
+++ b/debian/maven.ignoreRules
@@ -1,6 +1,5 @@
 
 com.googlecode.mp4parser isoparser * * * *
-com.uwyn jhighlight * * * *
 edu.ucar netcdf * * * *
 junit junit * * * *
 org.apache.felix maven-scr-plugin * * * *
diff --git a/debian/patches/04-ignore-jhighlight.patch b/debian/patches/04-ignore-jhighlight.patch
deleted file mode 100644
index cee0b85..0000000
--- a/debian/patches/04-ignore-jhighlight.patch
+++ /dev/null
@@ -1,140 +0,0 @@
-Description: Remove the classes using the jhighlight library which isn't in Debian yet
-Author: Emmanuel Bourg <ebourg at apache.org>
-Forwarded: not-needed
-
---- a/tika-parsers/src/main/java/org/apache/tika/parser/code/SourceCodeParser.java
-+++ /dev/null
-@@ -1,133 +0,0 @@
--/*
-- * Licensed to the Apache Software Foundation (ASF) under one or more
-- * contributor license agreements.  See the NOTICE file distributed with
-- * this work for additional information regarding copyright ownership.
-- * The ASF licenses this file to You under the Apache License, Version 2.0
-- * (the "License"); you may not use this file except in compliance with
-- * the License.  You may obtain a copy of the License at
-- *
-- *     http://www.apache.org/licenses/LICENSE-2.0
-- *
-- * Unless required by applicable law or agreed to in writing, software
-- * distributed under the License is distributed on an "AS IS" BASIS,
-- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- * See the License for the specific language governing permissions and
-- * limitations under the License.
-- */
--package org.apache.tika.parser.code;
--
--import static com.uwyn.jhighlight.renderer.XhtmlRendererFactory.CPP;
--import static com.uwyn.jhighlight.renderer.XhtmlRendererFactory.GROOVY;
--import static com.uwyn.jhighlight.renderer.XhtmlRendererFactory.JAVA;
--
--import java.io.IOException;
--import java.io.InputStream;
--import java.nio.charset.Charset;
--import java.util.HashMap;
--import java.util.Map;
--import java.util.Set;
--import java.util.regex.Matcher;
--import java.util.regex.Pattern;
--
--import org.apache.tika.config.ServiceLoader;
--import org.apache.tika.detect.AutoDetectReader;
--import org.apache.tika.exception.TikaException;
--import org.apache.tika.io.CloseShieldInputStream;
--import org.apache.tika.metadata.Metadata;
--import org.apache.tika.metadata.TikaCoreProperties;
--import org.apache.tika.mime.MediaType;
--import org.apache.tika.parser.ParseContext;
--import org.apache.tika.parser.Parser;
--import org.xml.sax.ContentHandler;
--import org.xml.sax.SAXException;
--
--import com.uwyn.jhighlight.renderer.Renderer;
--import com.uwyn.jhighlight.renderer.XhtmlRendererFactory;
--/**
-- * Generic Source code parser for Java, Groovy, C++
-- *
-- * @author Hong-Thai.Nguyen
-- * @since 1.6
-- */
--public class SourceCodeParser implements Parser {
--
--  private static final long serialVersionUID = -4543476498190054160L;
--
--  private static final Pattern authorPattern = Pattern.compile("(?im)@author (.*) *$");
--
--  private static final Map<MediaType, String> TYPES_TO_RENDERER = new HashMap<MediaType, String>() {
--    private static final long serialVersionUID = -741976157563751152L;
--    {
--      put(MediaType.text("x-c++src"), CPP);
--      put(MediaType.text("x-java-source"), JAVA);
--      put(MediaType.text("x-groovy"), GROOVY);
--    }
--  };
--
--  private static final ServiceLoader LOADER = new ServiceLoader(SourceCodeParser.class.getClassLoader());
--
--  @Override
--  public Set<MediaType> getSupportedTypes(ParseContext context) {
--    return TYPES_TO_RENDERER.keySet();
--  }
--
--  @Override
--  public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context)
--      throws IOException, SAXException, TikaException {
--
--    AutoDetectReader reader = new AutoDetectReader(new CloseShieldInputStream(stream), metadata, context.get(ServiceLoader.class, LOADER));
--
--    try {
--      Charset charset = reader.getCharset();
--      String mediaType = metadata.get(Metadata.CONTENT_TYPE);
--      String name = metadata.get(Metadata.RESOURCE_NAME_KEY);
--      if (mediaType != null && name != null) {
--        MediaType type = MediaType.parse(mediaType);
--        metadata.set(Metadata.CONTENT_TYPE, type.toString());
--        metadata.set(Metadata.CONTENT_ENCODING, charset.name());
--
--        StringBuilder out = new StringBuilder();
--        String line;
--        int nbLines =  0;
--        while ((line = reader.readLine()) != null) {
--            out.append(line);
--            String author = parserAuthor(line);
--            if (author != null) {
--              metadata.add(TikaCoreProperties.CREATOR, author);
--            }
--            nbLines ++;
--        }
--        metadata.set("LoC", String.valueOf(nbLines));
--
--        Renderer renderer = getRenderer(type.toString());
--        String codeAsHtml = renderer.highlight(name, out.toString(), charset.name(), false);
--        char[] charArray = codeAsHtml.toCharArray();
--        handler.startDocument();
--        handler.characters(charArray, 0, charArray.length);
--        handler.endDocument();
--      }
--    } finally {
--      reader.close();
--    }
--
--  }
--
--  private Renderer getRenderer(String mimeType) {
--    MediaType mt = MediaType.parse(mimeType);
--    String type = TYPES_TO_RENDERER.get(mt);
--    if (type == null) {
--      throw new RuntimeException("unparseable content type " + mimeType);
--    }
--    return XhtmlRendererFactory.getRenderer(type);
--  }
--
--
--  private String parserAuthor(String line) {
--    Matcher m = authorPattern.matcher(line);
--    if (m.find()) {
--      return m.group(1).trim();
--    }
--
--    return null;
--  }
--}
diff --git a/debian/patches/series b/debian/patches/series
index 15fcf2e..707ecd7 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -1,7 +1,6 @@
 01-jar-packaging.patch
 02-ignore-mp4parser.patch
 03-ignore-netcdf.patch
-04-ignore-jhighlight.patch
 05-osgi-compatibility.patch
 06-optional-parser-dependencies.patch
 07-metadata-extractor-2.7-compatibility.patch

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-java/tika.git



More information about the pkg-java-commits mailing list