[tika] 01/23: Merge tag 'upstream/1.11_rc1'
Markus Koschany
apo-guest at moszumanska.debian.org
Mon Nov 30 20:27:40 UTC 2015
This is an automated email from the git hooks/post-receive script.
apo-guest pushed a commit to branch master
in repository tika.
commit 5555afe598590622a0b2aa5e4275ad9dc785e36a
Merge: 9bd7743 c3b0fed
Author: Markus Koschany <apo at debian.org>
Date: Mon Nov 30 15:20:22 2015 +0000
Merge tag 'upstream/1.11_rc1'
Upstream version 1.11~rc1
# gpg: Signature made Mon Nov 30 15:20:18 2015 UTC using RSA key ID 513B51E4
# gpg: Good signature from "Markus Koschany <apo at gambaru.de>" [ultimate]
# gpg: aka "Markus Koschany <markus at koschany.net>" [ultimate]
# gpg: aka "Markus Koschany <apo at debian.org>" [ultimate]
.gitignore | 3 +
CHANGES.txt | 456 ++
KEYS | 111 +-
LICENSE.txt | 48 +
NOTICE.txt | 6 +-
README.md | 85 +
README.txt | 102 -
pom.xml | 112 +-
src/site/apt/detection.apt | 152 -
src/site/apt/formats.apt | 145 -
src/site/apt/gettingstarted.apt | 208 -
src/site/apt/index.apt | 31 -
src/site/apt/parser.apt | 245 -
src/site/apt/parser_guide.apt | 135 -
src/site/resources/css/site.css | 324 --
src/site/resources/tika.png | Bin 10203 -> 0 bytes
src/site/resources/tika.svg | 5318 --------------------
src/site/resources/tikaNoText.svg | 5305 -------------------
src/site/resources/tikaNoText16.png | Bin 641 -> 0 bytes
src/site/resources/tikaNoText32.png | Bin 1768 -> 0 bytes
src/site/resources/tikaNoText64.png | Bin 5552 -> 0 bytes
src/site/site.vm | 283 --
src/site/site.xml | 47 -
tika-app/pom.xml | 90 +-
.../src/main/appended-resources/META-INF/LICENSE | 5 +
.../batch/DigestingAutoDetectParserFactory.java | 36 +-
.../batch/builders/AppParserFactoryBuilder.java | 76 +
.../apache/tika/cli/BatchCommandLineBuilder.java | 209 +
.../src/main/java/org/apache/tika/cli/TikaCLI.java | 526 +-
.../src/main/java/org/apache/tika/gui/TikaGUI.java | 140 +-
.../src/main/resources/log4j.properties | 12 +-
.../main/resources/log4j_batch_process.properties | 12 +-
.../src/main/resources/tika-app-batch-config.xml | 136 +
.../tika/cli/TikaCLIBatchCommandLineTest.java | 207 +
.../tika/cli/TikaCLIBatchIntegrationTest.java | 174 +
.../test/java/org/apache/tika/cli/TikaCLITest.java | 213 +-
{tika-core => tika-batch}/pom.xml | 129 +-
.../apache/tika/batch/AutoDetectParserFactory.java | 18 +-
.../org/apache/tika/batch/BatchNoRestartError.java | 20 +-
.../java/org/apache/tika/batch/BatchProcess.java | 597 +++
.../apache/tika/batch/BatchProcessDriverCLI.java | 403 ++
.../org/apache/tika/batch/ConsumersManager.java | 80 +
.../tika/batch/FileConsumerFutureResult.java | 28 +-
.../java/org/apache/tika/batch/FileResource.java | 68 +
.../apache/tika/batch/FileResourceConsumer.java | 429 ++
.../org/apache/tika/batch/FileResourceCrawler.java | 270 +
.../batch/FileResourceCrawlerFutureResult.java | 28 +-
.../java/org/apache/tika/batch/FileStarted.java | 113 +
.../tika/batch/IFileProcessorFutureResult.java | 13 +-
.../java/org/apache/tika/batch/Interrupter.java | 59 +
.../apache/tika/batch/InterrupterFutureResult.java | 13 +-
.../org/apache/tika/batch/OutputStreamFactory.java | 16 +-
.../tika/batch/ParallelFileProcessingResult.java | 109 +
.../java/org/apache/tika/batch/ParserFactory.java | 25 +-
.../org/apache/tika/batch/PoisonFileResource.java | 37 +-
.../java/org/apache/tika/batch/StatusReporter.java | 227 +
.../tika/batch/StatusReporterFutureResult.java | 12 +-
.../batch/builders/AbstractConsumersBuilder.java | 28 +-
.../tika/batch/builders/BatchProcessBuilder.java | 295 ++
.../batch/builders/CommandLineParserBuilder.java | 143 +
.../DefaultContentHandlerFactoryBuilder.java | 58 +
.../builders/IContentHandlerFactoryBuilder.java | 18 +-
.../tika/batch/builders/ICrawlerBuilder.java | 20 +-
.../tika/batch/builders/IParserFactoryBuilder.java | 15 +-
.../tika/batch/builders/InterrupterBuilder.java | 19 +-
.../builders/ObjectFromDOMAndQueueBuilder.java | 21 +-
.../tika/batch/builders/ObjectFromDOMBuilder.java | 18 +-
.../tika/batch/builders/ParserFactoryBuilder.java | 49 +
.../tika/batch/builders/ReporterBuilder.java | 19 +-
.../batch/builders/SimpleLogReporterBuilder.java | 43 +
.../tika/batch/builders/StatusReporterBuilder.java | 23 +-
.../apache/tika/batch/fs/AbstractFSConsumer.java | 78 +
.../apache/tika/batch/fs/BasicTikaFSConsumer.java | 126 +
.../apache/tika/batch/fs/FSBatchProcessCLI.java | 160 +
.../apache/tika/batch/fs/FSConsumersManager.java | 30 +-
.../apache/tika/batch/fs/FSDirectoryCrawler.java | 165 +
.../apache/tika/batch/fs/FSDocumentSelector.java | 83 +
.../org/apache/tika/batch/fs/FSFileResource.java | 130 +
.../org/apache/tika/batch/fs/FSListCrawler.java | 118 +
.../tika/batch/fs/FSOutputStreamFactory.java | 114 +
.../org/apache/tika/batch/fs/FSProperties.java | 22 +-
.../main/java/org/apache/tika/batch/fs/FSUtil.java | 211 +
.../batch/fs/RecursiveParserWrapperFSConsumer.java | 159 +
.../fs/builders/BasicTikaFSConsumersBuilder.java | 207 +
.../tika/batch/fs/builders/FSCrawlerBuilder.java | 141 +
.../batch/fs/strawman/StrawManTikaAppDriver.java | 254 +
.../java/org/apache/tika/util/ClassLoaderUtil.java | 41 +-
.../org/apache/tika/util/DurationFormatUtils.java | 66 +
.../main/java/org/apache/tika/util/PropsUtil.java | 149 +
.../main/java/org/apache/tika/util/XMLDOMUtil.java | 109 +
tika-batch/src/main/java/overview.html | 41 +
.../tika/batch/fs/default-tika-batch-config.xml | 127 +
.../tika/batch/CommandLineParserBuilderTest.java | 34 +-
.../RecursiveParserWrapperFSConsumerTest.java | 149 +
.../org/apache/tika/batch/fs/BatchDriverTest.java | 210 +
.../org/apache/tika/batch/fs/BatchProcessTest.java | 369 ++
.../org/apache/tika/batch/fs/FSBatchTestBase.java | 301 ++
.../apache/tika/batch/fs/FSFileResourceTest.java | 49 +
.../java/org/apache/tika/batch/fs/FSUtilTest.java | 41 +-
.../apache/tika/batch/fs/HandlerBuilderTest.java | 120 +
.../tika/batch/fs/OutputStreamFactoryTest.java | 101 +
.../apache/tika/batch/fs/StringStreamGobbler.java | 64 +
.../tika/batch/fs/strawman/StrawmanTest.java | 17 +-
.../tika/batch/mock/MockConsumersBuilder.java | 38 +
.../tika/batch/mock/MockConsumersManager.java | 77 +
.../apache/tika/parser/mock/MockParserFactory.java | 21 +-
tika-bundle/pom.xml | 289 +-
.../test/java/org/apache/tika/bundle/BundleIT.java | 202 +-
tika-core/pom.xml | 39 +-
tika-core/src/main/java/org/apache/tika/Tika.java | 201 +-
.../ConfigurableThreadPoolExecutor.java} | 61 +-
.../tika/concurrent/SimpleThreadPoolExecutor.java | 76 +-
.../org/apache/tika/config/LoadErrorHandler.java | 13 +-
.../java/org/apache/tika/config/ServiceLoader.java | 72 +-
.../java/org/apache/tika/config/TikaConfig.java | 722 ++-
.../org/apache/tika/detect/CompositeDetector.java | 32 +-
.../org/apache/tika/detect/DefaultDetector.java | 33 +-
.../apache/tika/detect/DefaultProbDetector.java | 80 +
.../java/org/apache/tika/detect/MagicDetector.java | 14 +-
.../apache/tika/detect/NNExampleModelDetector.java | 160 +
.../org/apache/tika/detect/NNTrainedModel.java | 103 +
.../apache/tika/detect/NNTrainedModelBuilder.java | 76 +
.../java/org/apache/tika/detect/NameDetector.java | 4 +-
.../TrainedModel.java} | 15 +-
.../apache/tika/detect/TrainedModelDetector.java | 176 +
.../org/apache/tika/detect/XmlRootExtractor.java | 10 +-
.../org/apache/tika/embedder/ExternalEmbedder.java | 4 +-
.../tika/exception/AccessPermissionException.java | 31 +-
.../tika/extractor/ParserContainerExtractor.java | 5 +-
.../ParsingEmbeddedDocumentExtractor.java | 9 +-
.../main/java/org/apache/tika/fork/ForkClient.java | 25 +-
.../main/java/org/apache/tika/fork/ForkParser.java | 48 +-
.../main/java/org/apache/tika/io/EndianUtils.java | 21 +
.../java/org/apache/tika/io/FilenameUtils.java | 42 +-
.../src/main/java/org/apache/tika/io/IOUtils.java | 28 +-
.../org/apache/tika/io/LookaheadInputStream.java | 5 +-
.../java/org/apache/tika/io/TaggedInputStream.java | 2 +-
.../org/apache/tika/io/TemporaryResources.java | 84 +-
.../java/org/apache/tika/io/TikaInputStream.java | 149 +-
.../apache/tika/language/LanguageIdentifier.java | 13 +-
.../org/apache/tika/language/LanguageProfile.java | 159 +
.../tika/language/LanguageProfilerBuilder.java | 9 +-
.../tika/language/translate/DefaultTranslator.java | 119 +
.../tika/language/translate/EmptyTranslator.java | 26 +-
.../apache/tika/language/translate/Translator.java | 71 +
.../apache/tika/metadata/AccessPermissions.java | 71 +
.../{TikaMetadataKeys.java => Database.java} | 20 +-
.../main/java/org/apache/tika/metadata/IPTC.java | 2 +-
.../java/org/apache/tika/metadata/Metadata.java | 25 +-
.../tika/metadata/OfficeOpenXMLExtended.java | 5 +-
.../java/org/apache/tika/metadata/PagedText.java | 2 +-
.../java/org/apache/tika/metadata/Photoshop.java | 8 +
.../java/org/apache/tika/metadata/RTFMetadata.java | 46 +
.../main/java/org/apache/tika/metadata/TIFF.java | 2 +-
.../apache/tika/metadata/TikaCoreProperties.java | 48 +
.../org/apache/tika/metadata/TikaMetadataKeys.java | 3 +
.../main/java/org/apache/tika/metadata/XMPDM.java | 17 +-
.../org/apache/tika/mime/MediaTypeRegistry.java | 33 +-
.../main/java/org/apache/tika/mime/MimeType.java | 3 +-
.../main/java/org/apache/tika/mime/MimeTypes.java | 153 +-
.../java/org/apache/tika/mime/MimeTypesReader.java | 2 +-
.../mime/ProbabilisticMimeDetectionSelector.java | 539 ++
.../org/apache/tika/parser/AutoDetectParser.java | 4 +-
.../org/apache/tika/parser/CompositeParser.java | 70 +-
.../java/org/apache/tika/parser/DefaultParser.java | 41 +-
.../org/apache/tika/parser/DigestingParser.java | 76 +
.../java/org/apache/tika/parser/EmptyParser.java | 2 -
.../java/org/apache/tika/parser/ErrorParser.java | 4 +-
.../java/org/apache/tika/parser/NetworkParser.java | 10 +-
.../org/apache/tika/parser/ParserDecorator.java | 101 +-
.../java/org/apache/tika/parser/ParsingReader.java | 14 +
.../apache/tika/parser/RecursiveParserWrapper.java | 357 ++
.../tika/parser/external/ExternalParser.java | 83 +-
.../external/ExternalParsersConfigReader.java | 5 +-
.../parser/external/ExternalParsersFactory.java | 5 +-
.../tika/sax/BasicContentHandlerFactory.java | 156 +
.../java/org/apache/tika/sax/CleanPhoneText.java | 286 ++
.../ContentHandlerFactory.java} | 19 +-
.../org/apache/tika/sax/DIFContentHandler.java | 152 +
.../tika/sax/PhoneExtractingContentHandler.java | 111 +
.../org/apache/tika/sax/ToTextContentHandler.java | 3 +-
.../apache/tika/sax/WriteOutContentHandler.java | 3 +-
.../org/apache/tika/sax/XHTMLContentHandler.java | 2 +-
.../org/apache/tika/utils/ConcurrentUtils.java | 57 +
.../main/java/org/apache/tika/utils/DateUtils.java | 36 +-
.../java/org/apache/tika/utils/ExceptionUtils.java | 90 +
.../org/apache/tika/utils/ServiceLoaderUtils.java | 48 +
.../org/apache/tika/detect/tika-example.nnmodel | 2 +
.../main/resources/org/apache/tika/language/fa.ngp | 1001 ++++
.../apache/tika/language/tika.language.properties | 3 +-
.../org/apache/tika/mime/tika-mimetypes.xml | 1004 +++-
.../java/org/apache/tika/TikaDetectionTest.java | 18 +-
.../src/test/java/org/apache/tika/TikaTest.java | 86 +-
.../org/apache/tika/TypeDetectionBenchmark.java | 11 +-
.../apache/tika/config/AbstractTikaConfigTest.java | 50 +
.../java/org/apache/tika/config/DummyExecutor.java | 64 +-
.../java/org/apache/tika/config/DummyParser.java | 28 +-
.../org/apache/tika/config/TikaConfigTest.java | 180 +-
.../org/apache/tika/detect/MagicDetectorTest.java | 35 +-
.../tika/detect/MimeDetectionWithNNTest.java | 140 +
.../org/apache/tika/detect/TextDetectorTest.java | 5 +-
.../java/org/apache/tika/io/EndianUtilsTest.java | 36 +-
.../java/org/apache/tika/io/FilenameUtilsTest.java | 27 +-
.../java/org/apache/tika/io/TailStreamTest.java | 15 +-
.../org/apache/tika/io/TemporaryResourcesTest.java | 34 +-
.../org/apache/tika/io/TikaInputStreamTest.java | 65 +-
.../tika/language/LanguageIdentifierTest.java | 61 +-
.../tika/language/LanguageProfilerBuilderTest.java | 24 +-
.../org/apache/tika/metadata/TestMetadata.java | 9 +-
.../org/apache/tika/mime/MimeDetectionTest.java | 42 +-
.../org/apache/tika/mime/MimeTypesReaderTest.java | 88 +-
...st.java => ProbabilisticMimeDetectionTest.java} | 135 +-
...=> ProbabilisticMimeDetectionTestWithTika.java} | 165 +-
.../apache/tika/parser/CompositeParserTest.java | 1 +
.../java/org/apache/tika/parser/DummyParser.java | 10 +-
.../apache/tika/parser/ParserDecoratorTest.java | 120 +
.../org/apache/tika/parser/mock/MockParser.java | 359 ++
.../tika/sax/BasicContentHandlerFactoryTest.java | 341 ++
.../apache/tika/sax/BodyContentHandlerTest.java | 3 +-
.../apache/tika/sax/XHTMLContentHandlerTest.java | 20 +
.../org/apache/tika/utils/ConcurrentUtilsTest.java | 63 +
tika-dotnet/pom.xml | 55 +-
tika-example/pom.xml | 134 +
.../apache/tika/example/AdvancedTypeDetector.java | 56 +
.../apache/tika/example/ContentHandlerExample.java | 137 +
.../org/apache/tika/example/CustomMimeInfo.java | 49 +
.../org/apache/tika/example/DescribeMetadata.java | 18 +-
.../org/apache/tika/example/DirListParser.java | 143 +
.../apache/tika/example/DisplayMetInstance.java | 38 +-
.../apache/tika/example/DumpTikaConfigExample.java | 314 ++
.../example/EncryptedPrescriptionDetector.java | 59 +
.../tika/example/EncryptedPrescriptionParser.java | 51 +-
.../apache/tika/example/ExtractEmbeddedFiles.java | 106 +
.../tika/example/GrabPhoneNumbersExample.java | 103 +
.../org/apache/tika/example/ImportContextImpl.java | 235 +
.../tika/example/InterruptableParsingExample.java | 92 +
.../java/org/apache/tika/example/Language.java | 58 +
.../tika/example/LanguageDetectingParser.java | 48 +-
.../tika/example/LanguageIdentifierExample.java | 19 +-
.../tika/example/LazyTextExtractorField.java | 210 +
.../org/apache/tika/example/LuceneIndexer.java | 37 +-
.../apache/tika/example/LuceneIndexerExtended.java | 65 +
.../org/apache/tika/example/MediaTypeExample.java | 58 +
.../tika/example/MetadataAwareLuceneIndexer.java | 88 +
.../java/org/apache/tika/example/MyFirstTika.java | 116 +
.../org/apache/tika/example/ParsingExample.java | 217 +
.../java/org/apache/tika/example/Pharmacy.java | 19 +-
.../apache/tika/example/PrescriptionParser.java | 49 +-
.../java/org/apache/tika/example/RecentFiles.java | 145 +
.../org/apache/tika/example/RollbackSoftware.java | 137 +
.../apache/tika/example/SimpleTextExtractor.java | 24 +-
.../apache/tika/example/SimpleTypeDetector.java | 21 +-
.../org/apache/tika/example/SpringExample.java | 34 +-
.../org/apache/tika/example/TIAParsingExample.java | 201 +
.../org/apache/tika/example/TranslatorExample.java | 26 +-
.../apache/tika/example/TrecDocumentGenerator.java | 107 +
.../java/org/apache/tika/example/ZipListFiles.java | 40 +-
.../resources/org/apache/tika/example/spring.xml | 36 +
.../resources/org/apache/tika/example/test.doc | Bin 0 -> 9216 bytes
.../resources/org/apache/tika/example/test2.doc | Bin 0 -> 10752 bytes
.../tika/example/test_recursive_embedded.docx | Bin 0 -> 27082 bytes
.../tika/example/AdvancedTypeDetectorTest.java | 20 +-
.../tika/example/ContentHandlerExampleTest.java | 105 +
.../tika/example/DumpTikaConfigExampleTest.java | 90 +
.../tika/example/ExtractEmbeddedFilesTest.java | 62 +
.../example/LanguageIdentifierExampleTest.java | 25 +-
.../tika/example/SimpleTextExtractorTest.java | 48 +
.../tika/example/SimpleTypeDetectorTest.java | 43 +
.../apache/tika/example/TestParsingExample.java | 102 +
.../apache/tika/example/TranslatorExampleTest.java | 36 +-
tika-java7/pom.xml | 43 +-
.../filetypedetector/TikaFileTypeDetector.java | 3 +-
tika-parent/pom.xml | 141 +-
tika-parsers/pom.xml | 246 +-
.../src/main/appended-resources/META-INF/LICENSE | 57 +
.../apache/tika/parser/asm/XHTMLClassVisitor.java | 2 +-
.../org/apache/tika/parser/audio/MidiParser.java | 4 +-
.../java/org/apache/tika/parser/chm/ChmParser.java | 39 +-
.../chm/accessor/ChmDirectoryListingSet.java | 283 +-
.../tika/parser/chm/accessor/ChmItsfHeader.java | 21 +-
.../tika/parser/chm/accessor/ChmItspHeader.java | 44 +-
.../parser/chm/accessor/ChmLzxcControlData.java | 20 +-
.../parser/chm/accessor/ChmLzxcResetTable.java | 15 +-
.../tika/parser/chm/accessor/ChmPmgiHeader.java | 36 +-
.../tika/parser/chm/accessor/ChmPmglHeader.java | 67 +-
.../parser/chm/accessor/DirectoryListingEntry.java | 5 +-
.../apache/tika/parser/chm/core/ChmCommons.java | 23 +-
.../apache/tika/parser/chm/core/ChmConstants.java | 4 +-
.../apache/tika/parser/chm/core/ChmExtractor.java | 54 +-
.../apache/tika/parser/chm/lzx/ChmBlockInfo.java | 10 +-
.../apache/tika/parser/chm/lzx/ChmLzxBlock.java | 223 +-
.../apache/tika/parser/chm/lzx/ChmLzxState.java | 30 +-
.../org/apache/tika/parser/chm/lzx/ChmSection.java | 31 +-
.../apache/tika/parser/code/SourceCodeParser.java | 37 +-
.../org/apache/tika/parser/crypto/Pkcs7Parser.java | 20 +-
.../CTAKESAnnotationProperty.java} | 39 +-
.../apache/tika/parser/ctakes/CTAKESConfig.java | 336 ++
.../tika/parser/ctakes/CTAKESContentHandler.java | 176 +
.../apache/tika/parser/ctakes/CTAKESParser.java | 92 +
.../CTAKESSerializer.java} | 31 +-
.../org/apache/tika/parser/ctakes/CTAKESUtils.java | 265 +
.../apache/tika/parser/dif/DIFContentHandler.java | 152 +
.../java/org/apache/tika/parser/dif/DIFParser.java | 86 +
.../apache/tika/parser/envi/EnviHeaderParser.java | 84 +
.../apache/tika/parser/epub/EpubContentParser.java | 2 +-
.../org/apache/tika/parser/epub/EpubParser.java | 6 +-
.../tika/parser/executable/MachineMetadata.java | 36 +-
.../org/apache/tika/parser/feed/FeedParser.java | 4 +-
.../tika/parser/font/AdobeFontMetricParser.java | 33 +-
.../apache/tika/parser/font/TrueTypeParser.java | 58 +-
.../org/apache/tika/parser/gdal/GDALParser.java | 415 ++
.../apache/tika/parser/geo/topic/GeoParser.java | 155 +
.../tika/parser/geo/topic/GeoParserConfig.java | 54 +
.../org/apache/tika/parser/geo/topic/GeoTag.java | 65 +
.../tika/parser/geo/topic/NameEntityExtractor.java | 127 +
.../geoinfo/GeographicInformationParser.java | 391 ++
.../NetCDFParser.java => grib/GribParser.java} | 104 +-
.../java/org/apache/tika/parser/hdf/HDFParser.java | 8 +-
.../tika/parser/html/BoilerpipeContentHandler.java | 189 +-
.../apache/tika/parser/html/DefaultHtmlMapper.java | 30 +-
.../tika/parser/html/HtmlEncodingDetector.java | 45 +-
.../org/apache/tika/parser/html/HtmlHandler.java | 31 +-
.../org/apache/tika/parser/html/HtmlMapper.java | 14 +-
.../org/apache/tika/parser/html/HtmlParser.java | 80 +-
.../tika/parser/html/IdentityHtmlMapper.java | 2 +-
.../tika/parser/html/XHTMLDowngradeHandler.java | 3 +-
.../org/apache/tika/parser/image/BPGParser.java | 177 +
.../tika/parser/image/ImageMetadataExtractor.java | 367 +-
.../org/apache/tika/parser/image/ImageParser.java | 179 +-
.../apache/tika/parser/image/MetadataFields.java | 22 +-
.../org/apache/tika/parser/image/PSDParser.java | 194 +-
.../org/apache/tika/parser/image/TiffParser.java | 6 +-
.../image/{TiffParser.java => WebPParser.java} | 14 +-
.../tika/parser/image/xmp/JempboxExtractor.java | 14 +-
.../tika/parser/image/xmp/XMPPacketScanner.java | 79 +-
.../org/apache/tika/parser/internal/Activator.java | 5 +-
.../apache/tika/parser/iptc/IptcAnpaParser.java | 41 +-
.../org/apache/tika/parser/isatab/ISATabUtils.java | 209 +
.../apache/tika/parser/isatab/ISArchiveParser.java | 136 +
.../tika/parser/iwork/AutoPageNumberUtils.java | 6 +-
.../tika/parser/iwork/IWorkPackageParser.java | 13 +-
.../tika/parser/iwork/PagesContentHandler.java | 1 -
.../apache/tika/parser/jdbc/AbstractDBParser.java | 189 +
.../apache/tika/parser/jdbc/JDBCTableReader.java | 302 ++
.../apache/tika/parser/jdbc/SQLite3DBParser.java | 110 +
.../org/apache/tika/parser/jdbc/SQLite3Parser.java | 80 +
.../tika/parser/jdbc/SQLite3TableReader.java | 109 +
.../tika/parser/journal/GrobidRESTParser.java | 112 +
.../TiffParser.java => journal/JournalParser.java} | 57 +-
.../org/apache/tika/parser/journal/TEIParser.java | 893 ++++
.../org/apache/tika/parser/jpeg/JpegParser.java | 6 +-
.../tika/parser/mail/MailContentHandler.java | 84 +-
.../org/apache/tika/parser/mail/RFC822Parser.java | 18 +-
.../java/org/apache/tika/parser/mat/MatParser.java | 133 +
.../org/apache/tika/parser/mbox/MboxParser.java | 213 +-
.../apache/tika/parser/mbox/OutlookPSTParser.java | 203 +
.../tika/parser/microsoft/AbstractListManager.java | 269 +
.../parser/microsoft/AbstractPOIFSExtractor.java | 170 +-
.../tika/parser/microsoft/ExcelExtractor.java | 403 +-
.../tika/parser/microsoft/HSLFExtractor.java | 511 +-
.../tika/parser/microsoft/JackcessExtractor.java | 345 ++
.../tika/parser/microsoft/JackcessParser.java | 129 +
.../apache/tika/parser/microsoft/ListManager.java | 190 +
.../apache/tika/parser/microsoft/OfficeParser.java | 272 +-
.../tika/parser/microsoft/OldExcelParser.java | 97 +
.../tika/parser/microsoft/OutlookExtractor.java | 480 +-
.../parser/microsoft/POIFSContainerDetector.java | 354 +-
.../tika/parser/microsoft/SummaryExtractor.java | 63 +-
.../apache/tika/parser/microsoft/TNEFParser.java | 142 +-
.../tika/parser/microsoft/WordExtractor.java | 979 ++--
.../microsoft/ooxml/AbstractOOXMLExtractor.java | 122 +-
.../parser/microsoft/ooxml/MetadataExtractor.java | 225 +-
.../parser/microsoft/ooxml/OOXMLExtractor.java | 4 +-
.../microsoft/ooxml/OOXMLExtractorFactory.java | 39 +-
.../tika/parser/microsoft/ooxml/OOXMLParser.java | 61 +-
.../ooxml/POIXMLTextExtractorDecorator.java | 2 +-
.../ooxml/XSLFPowerPointExtractorDecorator.java | 176 +-
.../ooxml/XSSFExcelExtractorDecorator.java | 538 +-
.../parser/microsoft/ooxml/XWPFListManager.java | 165 +
.../ooxml/XWPFWordExtractorDecorator.java | 611 +--
.../tika/parser/mp3/CompositeTagHandler.java | 26 +
.../java/org/apache/tika/parser/mp3/ID3Tags.java | 19 +-
.../org/apache/tika/parser/mp3/ID3v1Handler.java | 35 +-
.../org/apache/tika/parser/mp3/ID3v22Handler.java | 23 +-
.../org/apache/tika/parser/mp3/ID3v23Handler.java | 22 +-
.../org/apache/tika/parser/mp3/ID3v24Handler.java | 22 +-
.../org/apache/tika/parser/mp3/ID3v2Frame.java | 11 +-
.../org/apache/tika/parser/mp3/LyricsHandler.java | 7 +-
.../java/org/apache/tika/parser/mp3/Mp3Parser.java | 19 +-
.../tika/parser/mp4/DirectFileReadDataSource.java | 100 +
.../java/org/apache/tika/parser/mp4/MP4Parser.java | 389 +-
.../apache/tika/parser/netcdf/NetCDFParser.java | 75 +-
.../apache/tika/parser/ocr/TesseractOCRConfig.java | 256 +
.../apache/tika/parser/ocr/TesseractOCRParser.java | 336 ++
.../parser/odf/NSNormalizerContentHandler.java | 9 +-
.../tika/parser/odf/OpenDocumentContentParser.java | 504 +-
.../tika/parser/odf/OpenDocumentMetaParser.java | 94 +-
.../apache/tika/parser/odf/OpenDocumentParser.java | 188 +-
.../org/apache/tika/parser/pdf/AccessChecker.java | 81 +
.../java/org/apache/tika/parser/pdf/PDF2XHTML.java | 491 +-
.../tika/parser/pdf/PDFEncodedStringDecoder.java | 117 +
.../java/org/apache/tika/parser/pdf/PDFParser.java | 500 +-
.../apache/tika/parser/pdf/PDFParserConfig.java | 316 +-
.../apache/tika/parser/pkg/CompressorParser.java | 33 +-
.../org/apache/tika/parser/pkg/PackageParser.java | 160 +-
.../java/org/apache/tika/parser/pkg/RarParser.java | 110 +
.../tika/parser/pkg/ZipContainerDetector.java | 87 +-
.../java/org/apache/tika/parser/prt/PRTParser.java | 4 +-
.../org/apache/tika/parser/rtf/GroupState.java | 17 +-
.../org/apache/tika/parser/rtf/ListDescriptor.java | 3 +-
.../apache/tika/parser/rtf/RTFEmbObjHandler.java | 287 ++
.../apache/tika/parser/rtf/RTFObjDataParser.java | 315 ++
.../java/org/apache/tika/parser/rtf/RTFParser.java | 38 +-
.../org/apache/tika/parser/rtf/TextExtractor.java | 422 +-
.../org/apache/tika/parser/strings/FileConfig.java | 77 +
.../tika/parser/strings/Latin1StringsParser.java | 322 ++
.../apache/tika/parser/strings/StringsConfig.java | 187 +
.../tika/parser/strings/StringsEncoding.java | 45 +
.../apache/tika/parser/strings/StringsParser.java | 335 ++
.../apache/tika/parser/txt/CharsetDetector.java | 452 +-
.../org/apache/tika/parser/txt/CharsetMatch.java | 181 +-
.../apache/tika/parser/txt/CharsetRecog_2022.java | 147 +-
.../apache/tika/parser/txt/CharsetRecog_UTF8.java | 56 +-
.../tika/parser/txt/CharsetRecog_Unicode.java | 127 +-
.../apache/tika/parser/txt/CharsetRecog_mbcs.java | 916 ++--
.../apache/tika/parser/txt/CharsetRecog_sbcs.java | 1749 +++----
.../apache/tika/parser/txt/CharsetRecognizer.java | 31 +-
.../tika/parser/txt/Icu4jEncodingDetector.java | 6 +-
.../java/org/apache/tika/parser/txt/TXTParser.java | 21 +-
.../tika/parser/txt/UniversalEncodingDetector.java | 2 -
.../apache/tika/parser/utils/CommonsDigester.java | 299 ++
.../org/apache/tika/parser/video/FLVParser.java | 4 +-
.../java/org/apache/tika/parser/xml/XMLParser.java | 8 +-
.../services/org.apache.tika.parser.Parser | 17 +
.../tika/parser/ctakes/CTAKESConfig.properties | 10 +-
.../tika/parser/external/tika-external-parsers.xml | 29 +-
.../GrobidExtractor.properties} | 7 +-
.../TesseractOCRConfig.properties} | 12 +-
.../apache/tika/parser/pdf/PDFParser.properties | 4 +
.../src/test/java/org/apache/tika/TestParsers.java | 19 +-
.../apache/tika/config/TikaDetectorConfigTest.java | 144 +
.../apache/tika/config/TikaParserConfigTest.java | 157 +
.../tika/config/TikaTranslatorConfigTest.java | 72 +
.../tika/detect/TestContainerAwareDetector.java | 104 +-
.../apache/tika/embedder/ExternalEmbedderTest.java | 8 +-
.../java/org/apache/tika/mime/TestMimeTypes.java | 295 +-
.../apache/tika/parser/AutoDetectParserTest.java | 165 +-
.../apache/tika/parser/DigestingParserTest.java | 136 +
.../org/apache/tika/parser/ParsingReaderTest.java | 16 +-
.../tika/parser/RecursiveParserWrapperTest.java | 312 ++
.../apache/tika/parser/audio/MidiParserTest.java | 4 +-
.../apache/tika/parser/chm/TestChmBlockInfo.java | 9 +-
.../apache/tika/parser/chm/TestChmExtraction.java | 122 +-
.../apache/tika/parser/chm/TestChmExtractor.java | 13 +-
.../apache/tika/parser/chm/TestChmItspHeader.java | 3 +-
.../apache/tika/parser/chm/TestChmLzxState.java | 3 +-
.../tika/parser/chm/TestChmLzxcControlData.java | 7 +-
.../tika/parser/chm/TestChmLzxcResetTable.java | 3 +-
.../org/apache/tika/parser/chm/TestParameters.java | 9 +-
.../org/apache/tika/parser/chm/TestPmglHeader.java | 3 +-
.../tika/parser/code/SourceCodeParserTest.java | 101 +
.../apache/tika/parser/crypto/Pkcs7ParserTest.java | 9 +-
.../DIFParserTest.java} | 40 +-
.../org/apache/tika/parser/dwg/DWGParserTest.java | 27 +-
.../tika/parser/envi/EnviHeaderParserTest.java | 60 +
.../apache/tika/parser/epub/EpubParserTest.java | 17 +-
.../parser/executable/ExecutableParserTest.java | 56 +-
.../apache/tika/parser/feed/FeedParserTest.java | 31 +-
.../parser/font/AdobeFontMetricParserTest.java | 71 -
.../apache/tika/parser/font/FontParsersTest.java | 113 +
.../parser/fork/ForkParserIntegrationTest.java | 26 +-
.../apache/tika/parser/gdal/TestGDALParser.java | 181 +
.../tika/parser/geo/topic/GeoParserTest.java | 91 +
.../geoinfo/GeographicInformationParserTest.java | 62 +
.../GribParserTest.java} | 33 +-
.../org/apache/tika/parser/hdf/HDFParserTest.java | 18 +-
.../apache/tika/parser/html/HtmlParserTest.java | 495 +-
.../tika/parser/ibooks/iBooksParserTest.java | 17 +-
.../apache/tika/parser/image/BPGParserTest.java | 133 +
.../parser/image/ImageMetadataExtractorTest.java | 64 +-
.../apache/tika/parser/image/ImageParserTest.java | 16 +-
.../apache/tika/parser/image/PSDParserTest.java | 12 +-
.../apache/tika/parser/image/TiffParserTest.java | 27 +-
.../apache/tika/parser/image/WebPParserTest.java | 72 +
.../parser/image/xmp/JempboxExtractorTest.java | 25 +-
.../tika/parser/isatab/ISArchiveParserTest.java | 60 +
.../apache/tika/parser/iwork/IWorkParserTest.java | 177 +-
.../apache/tika/parser/jdbc/SQLite3ParserTest.java | 356 ++
.../JournalParserTest.java} | 42 +-
.../apache/tika/parser/jpeg/JpegParserTest.java | 106 +-
.../apache/tika/parser/mail/RFC822ParserTest.java | 228 +-
.../org/apache/tika/parser/mat/MatParserTest.java | 80 +
.../apache/tika/parser/mbox/MboxParserTest.java | 203 +-
.../tika/parser/mbox/OutlookPSTParserTest.java | 110 +
.../AbstractPOIContainerExtractionTest.java | 45 +-
.../tika/parser/microsoft/ExcelParserTest.java | 453 +-
.../tika/parser/microsoft/JackcessParserTest.java | 194 +
...tectedParserTest.java => OfficeParserTest.java} | 32 +-
.../tika/parser/microsoft/OldExcelParserTest.java | 114 +
.../tika/parser/microsoft/OutlookParserTest.java | 137 +-
.../microsoft/POIContainerExtractionTest.java | 502 +-
.../parser/microsoft/PowerPointParserTest.java | 207 +-
.../tika/parser/microsoft/ProjectParserTest.java | 97 +-
.../tika/parser/microsoft/PublisherParserTest.java | 19 +-
.../tika/parser/microsoft/TNEFParserTest.java | 112 +-
.../tika/parser/microsoft/VisioParserTest.java | 17 +-
.../tika/parser/microsoft/WordParserTest.java | 328 +-
.../parser/microsoft/WriteProtectedParserTest.java | 10 +-
.../ooxml/OOXMLContainerExtractionTest.java | 396 +-
.../parser/microsoft/ooxml/OOXMLParserTest.java | 1116 ++--
.../apache/tika/parser/mock/MockParserTest.java | 247 +
.../org/apache/tika/parser/mp3/Mp3ParserTest.java | 201 +-
.../org/apache/tika/parser/mp3/MpegStreamTest.java | 3 +-
.../org/apache/tika/parser/mp4/MP4ParserTest.java | 31 +-
.../tika/parser/netcdf/NetCDFParserTest.java | 30 +-
.../tika/parser/ocr/TesseractOCRConfigTest.java | 93 +
.../tika/parser/ocr/TesseractOCRParserTest.java | 206 +
.../org/apache/tika/parser/odf/ODFParserTest.java | 495 +-
.../apache/tika/parser/pdf/AccessCheckerTest.java | 137 +
.../org/apache/tika/parser/pdf/PDFParserTest.java | 1108 +++-
.../apache/tika/parser/pkg/AbstractPkgTest.java | 7 +
.../org/apache/tika/parser/pkg/ArParserTest.java | 158 +-
.../apache/tika/parser/pkg/Bzip2ParserTest.java | 61 +-
...zip2ParserTest.java => CompressParserTest.java} | 60 +-
.../org/apache/tika/parser/pkg/GzipParserTest.java | 73 +-
.../pkg/{TarParserTest.java => RarParserTest.java} | 87 +-
.../apache/tika/parser/pkg/Seven7ParserTest.java | 219 +
.../org/apache/tika/parser/pkg/TarParserTest.java | 63 +-
.../org/apache/tika/parser/pkg/ZipParserTest.java | 109 +-
.../org/apache/tika/parser/pkg/ZlibParserTest.java | 77 +
.../org/apache/tika/parser/prt/PRTParserTest.java | 28 +-
.../org/apache/tika/parser/rtf/RTFParserTest.java | 286 +-
.../parser/solidworks/SolidworksParserTest.java | 47 +-
.../apache/tika/parser/strings/FileConfigTest.java | 28 +
.../parser/strings/Latin1StringsParserTest.java | 69 +
.../tika/parser/strings/StringsConfigTest.java | 61 +
.../tika/parser/strings/StringsParserTest.java | 74 +
.../tika/parser/txt/CharsetDetectorTest.java | 53 +-
.../org/apache/tika/parser/txt/TXTParserTest.java | 74 +-
.../apache/tika/parser/xml/DcXMLParserTest.java | 30 +-
.../EmptyAndDuplicateElementsXMLParserTest.java | 42 +-
.../tika/parser/xml/FictionBookParserTest.java | 25 +-
.../sax/PhoneExtractingContentHandlerTest.java | 58 +
.../apache/tika/utils/ServiceLoaderUtilsTest.java | 57 +
tika-serialization/pom.xml | 100 +
.../tika/metadata/serialization/JsonMetadata.java | 87 +
.../metadata/serialization/JsonMetadataBase.java | 52 +
.../serialization/JsonMetadataDeserializer.java | 75 +
.../metadata/serialization/JsonMetadataList.java | 96 +
.../serialization/JsonMetadataSerializer.java | 97 +
.../serialization/PrettyMetadataKeyComparator.java | 44 +
.../serialization/JsonMetadataListTest.java | 123 +
.../metadata/serialization/JsonMetadataTest.java | 132 +
tika-server/Dockerfile | 37 +
tika-server/README | 35 -
tika-server/README.md | 45 +
tika-server/pom.xml | 196 +-
.../java/org/apache/tika/server/HTMLHelper.java | 64 +
.../java/org/apache/tika/server/MetadataEP.java | 164 -
.../server/{TikaVersion.java => MetadataList.java} | 25 +-
.../org/apache/tika/server/MetadataResource.java | 93 -
.../apache/tika/server/RichTextContentHandler.java | 34 +-
.../org/apache/tika/server/TikaLoggingFilter.java | 51 +
.../java/org/apache/tika/server/TikaResource.java | 343 --
.../java/org/apache/tika/server/TikaServerCli.java | 256 +-
...onMapper.java => TikaServerParseException.java} | 27 +-
.../server/TikaServerParseExceptionMapper.java | 90 +
.../org/apache/tika/server/UnpackerResource.java | 258 -
.../java/org/apache/tika/server/ZipWriter.java | 85 -
.../tika/server/resource/DetectorResource.java | 64 +
.../tika/server/resource/LanguageResource.java | 75 +
.../tika/server/resource/MetadataResource.java | 134 +
.../server/resource/RecursiveMetadataResource.java | 146 +
.../apache/tika/server/resource/TikaDetectors.java | 123 +
.../apache/tika/server/resource/TikaMimeTypes.java | 173 +
.../apache/tika/server/resource/TikaParsers.java | 242 +
.../apache/tika/server/resource/TikaResource.java | 426 ++
.../tika/server/{ => resource}/TikaVersion.java | 10 +-
.../apache/tika/server/resource/TikaWelcome.java | 232 +
.../tika/server/resource/TranslateResource.java | 111 +
.../tika/server/resource/UnpackerResource.java | 261 +
.../server/{ => writer}/CSVMessageBodyWriter.java | 49 +-
.../server/{ => writer}/JSONMessageBodyWriter.java | 62 +-
.../writer/MetadataListMessageBodyWriter.java | 68 +
.../apache/tika/server/{ => writer}/TarWriter.java | 49 +-
.../tika/server/writer/TextMessageBodyWriter.java | 76 +
.../tika/server/writer/XMPMessageBodyWriter.java | 68 +
.../org/apache/tika/server/writer/ZipWriter.java | 86 +
.../src/main/resources/tikaserver-template.html | 32 +
.../main/resources/tikaserver-version.properties | 18 -
.../java/org/apache/tika/server/CXFTestBase.java | 197 +-
.../apache/tika/server/DetectorResourceTest.java | 107 +
.../apache/tika/server/LanguageResourceTest.java | 109 +
.../org/apache/tika/server/MetadataEPTest.java | 187 -
.../apache/tika/server/MetadataResourceTest.java | 259 +-
.../tika/server/RecursiveMetadataResourceTest.java | 277 +
.../org/apache/tika/server/StackTraceOffTest.java | 150 +
.../org/apache/tika/server/StackTraceTest.java | 146 +
.../org/apache/tika/server/TikaDetectorsTest.java | 142 +
.../org/apache/tika/server/TikaMimeTypesTest.java | 121 +
.../org/apache/tika/server/TikaParsersTest.java | 186 +
.../org/apache/tika/server/TikaResourceTest.java | 282 +-
.../org/apache/tika/server/TikaVersionTest.java | 85 +-
.../org/apache/tika/server/TikaWelcomeTest.java | 112 +
.../apache/tika/server/TranslateResourceTest.java | 86 +
.../apache/tika/server/UnpackerResourceTest.java | 371 +-
tika-translate/pom.xml | 160 +
.../tika/language/translate/CachedTranslator.java | 179 +
.../language/translate/ExternalTranslator.java | 101 +
.../tika/language/translate/GoogleTranslator.java | 118 +
.../tika/language/translate/Lingo24Translator.java | 114 +
.../language/translate/MicrosoftTranslator.java | 149 +
.../tika/language/translate/MosesTranslator.java | 140 +
.../org.apache.tika.language.translate.Translator | 10 +-
.../translate/translator.google.properties | 11 +-
.../translate/translator.lingo24.properties | 11 +-
.../translate/translator.microsoft.properties | 12 +-
.../language/translate/translator.moses.properties | 13 +-
.../language/translate/CachedTranslatorTest.java | 85 +
.../language/translate/GoogleTranslatorTest.java | 83 +
.../language/translate/Lingo24TranslatorTest.java | 78 +
.../translate/MicrosoftTranslatorTest.java | 76 +
.../language/translate/MosesTranslatorTest.java | 38 +-
tika-xmp/pom.xml | 38 +-
.../java/org/apache/tika/xmp/XMPMetadataTest.java | 8 +-
625 files changed, 57037 insertions(+), 26907 deletions(-)
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-java/tika.git
More information about the pkg-java-commits
mailing list