[tika] 10/23: Add refreshed patches again.

Markus Koschany apo-guest at moszumanska.debian.org
Mon Nov 30 20:27:41 UTC 2015


This is an automated email from the git hooks/post-receive script.

apo-guest pushed a commit to branch master
in repository tika.

commit 8f685429e2f880595c101744256f3a4c51e4e8fa
Author: Markus Koschany <apo at debian.org>
Date:   Mon Nov 30 16:08:35 2015 +0000

    Add refreshed patches again.
---
 debian/patches/01-jar-packaging.patch             |  16 +-
 debian/patches/MP4Parser.patch                    | 340 ++++++++++++++++++++++
 debian/patches/netcdf.patch                       | 290 ++++++++++++++++++
 debian/patches/optional-parser-dependencies.patch | 303 +++++++++++++++++++
 debian/patches/osgi.patch                         |  31 ++
 debian/patches/series                             |   4 +
 6 files changed, 981 insertions(+), 3 deletions(-)

diff --git a/debian/patches/01-jar-packaging.patch b/debian/patches/01-jar-packaging.patch
index 3186ec0..38ccbaa 100644
--- a/debian/patches/01-jar-packaging.patch
+++ b/debian/patches/01-jar-packaging.patch
@@ -1,6 +1,14 @@
-Description: Change the Maven packaging from bundle to jar to avoid build issues with maven-debian-helper
-Author: Emmanuel Bourg <ebourg at apache.org>
-Forwarded: not-needed
+From: Debian Java Maintainers <pkg-java-maintainers at lists.alioth.debian.org>
+Date: Mon, 30 Nov 2015 15:50:06 +0000
+Subject: jar-packaging
+
+---
+ tika-core/pom.xml    | 2 +-
+ tika-parsers/pom.xml | 2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/tika-core/pom.xml b/tika-core/pom.xml
+index 1ed5538..7f80b87 100644
 --- a/tika-core/pom.xml
 +++ b/tika-core/pom.xml
 @@ -30,7 +30,7 @@
@@ -12,6 +20,8 @@ Forwarded: not-needed
    <name>Apache Tika core</name>
    <url>http://tika.apache.org/</url>
  
+diff --git a/tika-parsers/pom.xml b/tika-parsers/pom.xml
+index 9557a3d..c0f673f 100644
 --- a/tika-parsers/pom.xml
 +++ b/tika-parsers/pom.xml
 @@ -30,7 +30,7 @@
diff --git a/debian/patches/MP4Parser.patch b/debian/patches/MP4Parser.patch
new file mode 100644
index 0000000..3998d88
--- /dev/null
+++ b/debian/patches/MP4Parser.patch
@@ -0,0 +1,340 @@
+From: Markus Koschany <apo at debian.org>
+Date: Mon, 30 Nov 2015 15:50:18 +0000
+Subject: MP4Parser
+
+---
+ .../java/org/apache/tika/parser/mp4/MP4Parser.java | 325 ---------------------
+ 1 file changed, 325 deletions(-)
+ delete mode 100644 tika-parsers/src/main/java/org/apache/tika/parser/mp4/MP4Parser.java
+
+diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/mp4/MP4Parser.java b/tika-parsers/src/main/java/org/apache/tika/parser/mp4/MP4Parser.java
+deleted file mode 100644
+index 20c8246..0000000
+--- a/tika-parsers/src/main/java/org/apache/tika/parser/mp4/MP4Parser.java
++++ /dev/null
+@@ -1,325 +0,0 @@
+-/*
+- * Licensed to the Apache Software Foundation (ASF) under one or more
+- * contributor license agreements.  See the NOTICE file distributed with
+- * this work for additional information regarding copyright ownership.
+- * The ASF licenses this file to You under the Apache License, Version 2.0
+- * (the "License"); you may not use this file except in compliance with
+- * the License.  You may obtain a copy of the License at
+- *
+- *     http://www.apache.org/licenses/LICENSE-2.0
+- *
+- * Unless required by applicable law or agreed to in writing, software
+- * distributed under the License is distributed on an "AS IS" BASIS,
+- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+- * See the License for the specific language governing permissions and
+- * limitations under the License.
+- */
+-package org.apache.tika.parser.mp4;
+-
+-import java.io.IOException;
+-import java.io.InputStream;
+-import java.text.DecimalFormat;
+-import java.text.NumberFormat;
+-import java.util.Arrays;
+-import java.util.Collections;
+-import java.util.HashMap;
+-import java.util.List;
+-import java.util.Locale;
+-import java.util.Map;
+-import java.util.Set;
+-
+-import org.apache.tika.exception.TikaException;
+-import org.apache.tika.io.TemporaryResources;
+-import org.apache.tika.io.TikaInputStream;
+-import org.apache.tika.metadata.Metadata;
+-import org.apache.tika.metadata.Property;
+-import org.apache.tika.metadata.TikaCoreProperties;
+-import org.apache.tika.metadata.XMP;
+-import org.apache.tika.metadata.XMPDM;
+-import org.apache.tika.mime.MediaType;
+-import org.apache.tika.parser.AbstractParser;
+-import org.apache.tika.parser.ParseContext;
+-import org.apache.tika.sax.XHTMLContentHandler;
+-import org.xml.sax.ContentHandler;
+-import org.xml.sax.SAXException;
+-
+-import com.coremedia.iso.IsoFile;
+-import com.coremedia.iso.boxes.Box;
+-import com.coremedia.iso.boxes.Container;
+-import com.coremedia.iso.boxes.FileTypeBox;
+-import com.coremedia.iso.boxes.MetaBox;
+-import com.coremedia.iso.boxes.MovieBox;
+-import com.coremedia.iso.boxes.MovieHeaderBox;
+-import com.coremedia.iso.boxes.SampleDescriptionBox;
+-import com.coremedia.iso.boxes.SampleTableBox;
+-import com.coremedia.iso.boxes.TrackBox;
+-import com.coremedia.iso.boxes.TrackHeaderBox;
+-import com.coremedia.iso.boxes.UserDataBox;
+-import com.coremedia.iso.boxes.apple.AppleItemListBox;
+-import com.coremedia.iso.boxes.sampleentry.AudioSampleEntry;
+-import com.googlecode.mp4parser.boxes.apple.AppleAlbumBox;
+-import com.googlecode.mp4parser.boxes.apple.AppleArtistBox;
+-import com.googlecode.mp4parser.boxes.apple.AppleArtist2Box;
+-import com.googlecode.mp4parser.boxes.apple.AppleCommentBox;
+-import com.googlecode.mp4parser.boxes.apple.AppleCompilationBox;
+-import com.googlecode.mp4parser.boxes.apple.AppleDiskNumberBox;
+-import com.googlecode.mp4parser.boxes.apple.AppleEncoderBox;
+-import com.googlecode.mp4parser.boxes.apple.AppleGenreBox;
+-import com.googlecode.mp4parser.boxes.apple.AppleNameBox;
+-import com.googlecode.mp4parser.boxes.apple.AppleRecordingYear2Box;
+-import com.googlecode.mp4parser.boxes.apple.AppleTrackAuthorBox;
+-import com.googlecode.mp4parser.boxes.apple.AppleTrackNumberBox;
+-import com.googlecode.mp4parser.boxes.apple.Utf8AppleDataBox;
+-
+-/**
+- * Parser for the MP4 media container format, as well as the older
+- *  QuickTime format that MP4 is based on.
+- * 
+- * This uses the MP4Parser project from http://code.google.com/p/mp4parser/
+- *  to do the underlying parsing
+- */
+-public class MP4Parser extends AbstractParser {
+-    /** Serial version UID */
+-    private static final long serialVersionUID = 84011216792285L;
+-    /** TODO Replace this with a 2dp Duration Property Converter */
+-    private static final DecimalFormat DURATION_FORMAT = 
+-            (DecimalFormat)NumberFormat.getNumberInstance(Locale.ROOT); 
+-    static {
+-        DURATION_FORMAT.applyPattern("0.0#");
+-    }
+-    
+-    // Ensure this stays in Sync with the entries in tika-mimetypes.xml
+-    private static final Map<MediaType,List<String>> typesMap = new HashMap<MediaType, List<String>>();
+-    static {
+-       // All types should be 4 bytes long, space padded as needed
+-       typesMap.put(MediaType.audio("mp4"), Arrays.asList(
+-             "M4A ", "M4B ", "F4A ", "F4B "));
+-       typesMap.put(MediaType.video("3gpp"), Arrays.asList(
+-             "3ge6", "3ge7", "3gg6", "3gp1", "3gp2", "3gp3", "3gp4", "3gp5", "3gp6", "3gs7"));
+-       typesMap.put(MediaType.video("3gpp2"), Arrays.asList(
+-             "3g2a", "3g2b", "3g2c"));
+-       typesMap.put(MediaType.video("mp4"), Arrays.asList(
+-             "mp41", "mp42"));
+-       typesMap.put(MediaType.video("x-m4v"), Arrays.asList(
+-             "M4V ", "M4VH", "M4VP"));
+-       
+-       typesMap.put(MediaType.video("quicktime"), Collections.<String>emptyList());
+-       typesMap.put(MediaType.application("mp4"), Collections.<String>emptyList());
+-    }
+-
+-    private static final Set<MediaType> SUPPORTED_TYPES =
+-       Collections.unmodifiableSet(typesMap.keySet());
+-
+-    public Set<MediaType> getSupportedTypes(ParseContext context) {
+-        return SUPPORTED_TYPES;
+-    }
+-
+-
+-    public void parse(
+-            InputStream stream, ContentHandler handler,
+-            Metadata metadata, ParseContext context)
+-            throws IOException, SAXException, TikaException {
+-        IsoFile isoFile;
+-        
+-        // The MP4Parser library accepts either a File, or a byte array
+-        // As MP4 video files are typically large, always use a file to
+-        //  avoid OOMs that may occur with in-memory buffering
+-        TemporaryResources tmp = new TemporaryResources();
+-        TikaInputStream tstream = TikaInputStream.get(stream, tmp);
+-        try {
+-            isoFile = new IsoFile(new DirectFileReadDataSource(tstream.getFile()));
+-            tmp.addResource(isoFile);
+-
+-            // Grab the file type box
+-            FileTypeBox fileType = getOrNull(isoFile, FileTypeBox.class);
+-            if (fileType != null) {
+-               // Identify the type
+-               MediaType type = MediaType.application("mp4");
+-               for (MediaType t : typesMap.keySet()) {
+-                  if (typesMap.get(t).contains(fileType.getMajorBrand())) {
+-                     type = t;
+-                     break;
+-                  }
+-               }
+-               metadata.set(Metadata.CONTENT_TYPE, type.toString());
+-
+-               if (type.getType().equals("audio")) {
+-                  metadata.set(XMPDM.AUDIO_COMPRESSOR, fileType.getMajorBrand().trim());
+-               }
+-            } else {
+-               // Some older QuickTime files lack the FileType
+-               metadata.set(Metadata.CONTENT_TYPE, "video/quicktime");
+-            }
+-
+-
+-            // Get the main MOOV box
+-            MovieBox moov = getOrNull(isoFile, MovieBox.class);
+-            if (moov == null) {
+-               // Bail out
+-               return;
+-            }
+-
+-
+-            XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
+-            xhtml.startDocument();
+-
+-
+-            // Pull out some information from the header box
+-            MovieHeaderBox mHeader = getOrNull(moov, MovieHeaderBox.class);
+-            if (mHeader != null) {
+-               // Get the creation and modification dates
+-               metadata.set(Metadata.CREATION_DATE, mHeader.getCreationTime());
+-               metadata.set(TikaCoreProperties.MODIFIED, mHeader.getModificationTime());
+-
+-               // Get the duration
+-               double durationSeconds = ((double)mHeader.getDuration()) / mHeader.getTimescale();
+-               metadata.set(XMPDM.DURATION, DURATION_FORMAT.format(durationSeconds));
+-
+-               // The timescale is normally the sampling rate
+-               metadata.set(XMPDM.AUDIO_SAMPLE_RATE, (int)mHeader.getTimescale());
+-            }
+-
+-
+-            // Get some more information from the track header
+-            // TODO Decide how to handle multiple tracks
+-            List<TrackBox> tb = moov.getBoxes(TrackBox.class);
+-            if (tb.size() > 0) {
+-               TrackBox track = tb.get(0);
+-
+-               TrackHeaderBox header = track.getTrackHeaderBox();
+-               // Get the creation and modification dates
+-               metadata.set(TikaCoreProperties.CREATED, header.getCreationTime());
+-               metadata.set(TikaCoreProperties.MODIFIED, header.getModificationTime());
+-
+-               // Get the video with and height
+-               metadata.set(Metadata.IMAGE_WIDTH,  (int)header.getWidth());
+-               metadata.set(Metadata.IMAGE_LENGTH, (int)header.getHeight());
+-
+-               // Get the sample information
+-               SampleTableBox samples = track.getSampleTableBox();
+-               SampleDescriptionBox sampleDesc = samples.getSampleDescriptionBox();
+-               if (sampleDesc != null) {
+-                  // Look for the first Audio Sample, if present
+-                  AudioSampleEntry sample = getOrNull(sampleDesc, AudioSampleEntry.class);
+-                  if (sample != null) {
+-                     XMPDM.ChannelTypePropertyConverter.convertAndSet(metadata, sample.getChannelCount());
+-                     //metadata.set(XMPDM.AUDIO_SAMPLE_TYPE, sample.getSampleSize());    // TODO Num -> Type mapping
+-                     metadata.set(XMPDM.AUDIO_SAMPLE_RATE, (int)sample.getSampleRate());
+-                     //metadata.set(XMPDM.AUDIO_, sample.getSamplesPerPacket());
+-                     //metadata.set(XMPDM.AUDIO_, sample.getBytesPerSample());
+-                  }
+-               }
+-            }
+-
+-            // Get metadata from the User Data Box
+-            UserDataBox userData = getOrNull(moov, UserDataBox.class);
+-            if (userData != null) {
+-               MetaBox meta = getOrNull(userData, MetaBox.class);
+-
+-               // Check for iTunes Metadata
+-               // See http://atomicparsley.sourceforge.net/mpeg-4files.html and
+-               //  http://code.google.com/p/mp4v2/wiki/iTunesMetadata for more on these
+-               AppleItemListBox apple = getOrNull(meta, AppleItemListBox.class);
+-               if (apple != null) {
+-                  // Title
+-                  AppleNameBox title = getOrNull(apple, AppleNameBox.class);
+-                  addMetadata(TikaCoreProperties.TITLE, metadata, title);
+-
+-                  // Artist
+-                  AppleArtistBox artist = getOrNull(apple, AppleArtistBox.class);
+-                  addMetadata(TikaCoreProperties.CREATOR, metadata, artist);
+-                  addMetadata(XMPDM.ARTIST, metadata, artist);
+-
+-                  // Album Artist
+-                  AppleArtist2Box artist2 = getOrNull(apple, AppleArtist2Box.class);
+-                  addMetadata(XMPDM.ALBUM_ARTIST, metadata, artist2);
+-
+-                  // Album
+-                  AppleAlbumBox album = getOrNull(apple, AppleAlbumBox.class);
+-                  addMetadata(XMPDM.ALBUM, metadata, album);
+-
+-                  // Composer
+-                  AppleTrackAuthorBox composer = getOrNull(apple, AppleTrackAuthorBox.class);
+-                  addMetadata(XMPDM.COMPOSER, metadata, composer);
+-
+-                  // Genre
+-                  AppleGenreBox genre = getOrNull(apple, AppleGenreBox.class);
+-                  addMetadata(XMPDM.GENRE, metadata, genre);
+-
+-                  // Year
+-                  AppleRecordingYear2Box year = getOrNull(apple, AppleRecordingYear2Box.class);
+-                  if (year != null) {
+-                      metadata.set(XMPDM.RELEASE_DATE, year.getValue());
+-                  }
+-
+-                  // Track number
+-                  AppleTrackNumberBox trackNum = getOrNull(apple, AppleTrackNumberBox.class);
+-                  if (trackNum != null) {
+-                     metadata.set(XMPDM.TRACK_NUMBER, trackNum.getA());
+-                     //metadata.set(XMPDM.NUMBER_OF_TRACKS, trackNum.getB()); // TODO
+-                  }
+-
+-                  // Disc number
+-                  AppleDiskNumberBox discNum = getOrNull(apple, AppleDiskNumberBox.class);
+-                  if (discNum != null) {
+-                     metadata.set(XMPDM.DISC_NUMBER, discNum.getA());
+-                  }
+-
+-                  // Compilation
+-                  AppleCompilationBox compilation = getOrNull(apple, AppleCompilationBox.class);
+-                  if (compilation != null) {
+-                      metadata.set(XMPDM.COMPILATION, (int)compilation.getValue());
+-                  }
+-
+-                  // Comment
+-                  AppleCommentBox comment = getOrNull(apple, AppleCommentBox.class);
+-                  addMetadata(XMPDM.LOG_COMMENT, metadata, comment);
+-
+-                  // Encoder
+-                  AppleEncoderBox encoder = getOrNull(apple, AppleEncoderBox.class);
+-                  if (encoder != null) {
+-                      metadata.set(XMP.CREATOR_TOOL, encoder.getValue());
+-                  }
+-
+-
+-                  // As text
+-                  for (Box box : apple.getBoxes()) {
+-                     if (box instanceof Utf8AppleDataBox) {
+-                        xhtml.element("p", ((Utf8AppleDataBox)box).getValue());
+-                     }
+-                  }
+-               }
+-
+-               // TODO Check for other kinds too
+-            }
+-
+-            // All done
+-            xhtml.endDocument();
+-
+-        } finally {
+-            tmp.dispose();
+-        }
+-
+-    }
+-    
+-    private static void addMetadata(String key, Metadata m, Utf8AppleDataBox metadata) {
+-       if (metadata != null) {
+-          m.add(key, metadata.getValue());
+-       }
+-    }
+-    private static void addMetadata(Property prop, Metadata m, Utf8AppleDataBox metadata) {
+-       if (metadata != null) {
+-          m.set(prop, metadata.getValue());
+-       }
+-    }
+-    
+-    private static <T extends Box> T getOrNull(Container box, Class<T> clazz) {
+-       if (box == null) return null;
+-
+-       List<T> boxes = box.getBoxes(clazz);
+-       if (boxes.size() == 0) {
+-          return null;
+-       }
+-       return boxes.get(0);
+-    }
+-}
diff --git a/debian/patches/netcdf.patch b/debian/patches/netcdf.patch
new file mode 100644
index 0000000..2fe84d6
--- /dev/null
+++ b/debian/patches/netcdf.patch
@@ -0,0 +1,290 @@
+From: Markus Koschany <apo at debian.org>
+Date: Mon, 30 Nov 2015 15:53:57 +0000
+Subject: netcdf
+
+---
+ .../java/org/apache/tika/parser/hdf/HDFParser.java | 122 -----------------
+ .../apache/tika/parser/netcdf/NetCDFParser.java    | 144 ---------------------
+ 2 files changed, 266 deletions(-)
+ delete mode 100644 tika-parsers/src/main/java/org/apache/tika/parser/hdf/HDFParser.java
+ delete mode 100644 tika-parsers/src/main/java/org/apache/tika/parser/netcdf/NetCDFParser.java
+
+diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/hdf/HDFParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/hdf/HDFParser.java
+deleted file mode 100644
+index 821493b..0000000
+--- a/tika-parsers/src/main/java/org/apache/tika/parser/hdf/HDFParser.java
++++ /dev/null
+@@ -1,122 +0,0 @@
+-/*
+- * Licensed to the Apache Software Foundation (ASF) under one or more
+- * contributor license agreements.  See the NOTICE file distributed with
+- * this work for additional information regarding copyright ownership.
+- * The ASF licenses this file to You under the Apache License, Version 2.0
+- * (the "License"); you may not use this file except in compliance with
+- * the License.  You may obtain a copy of the License at
+- *
+- *     http://www.apache.org/licenses/LICENSE-2.0
+- *
+- * Unless required by applicable law or agreed to in writing, software
+- * distributed under the License is distributed on an "AS IS" BASIS,
+- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+- * See the License for the specific language governing permissions and
+- * limitations under the License.
+- */
+-
+-package org.apache.tika.parser.hdf;
+-
+-//JDK imports
+-import java.io.ByteArrayOutputStream;
+-import java.io.IOException;
+-import java.io.InputStream;
+-import java.util.Collections;
+-import java.util.Set;
+-
+-import org.apache.commons.io.IOUtils;
+-import org.apache.tika.exception.TikaException;
+-import org.apache.tika.metadata.Metadata;
+-import org.apache.tika.mime.MediaType;
+-import org.apache.tika.parser.AbstractParser;
+-import org.apache.tika.parser.ParseContext;
+-import org.apache.tika.parser.netcdf.NetCDFParser;
+-import org.apache.tika.sax.XHTMLContentHandler;
+-import org.xml.sax.ContentHandler;
+-import org.xml.sax.SAXException;
+-
+-import ucar.nc2.Attribute;
+-import ucar.nc2.Group;
+-import ucar.nc2.NetcdfFile;
+-
+-/**
+- * 
+- * Since the {@link NetCDFParser} depends on the <a
+- * href="http://www.unidata.ucar.edu/software/netcdf-java" >NetCDF-Java</a> API,
+- * we are able to use it to parse HDF files as well. See <a href=
+- * "http://www.unidata.ucar.edu/software/netcdf-java/formats/FileTypes.html"
+- * >this link</a> for more information.
+- */
+-public class HDFParser extends AbstractParser {
+-
+-    /** Serial version UID */
+-    private static final long serialVersionUID = 1091208208003437549L;
+-
+-    private static final Set<MediaType> SUPPORTED_TYPES =
+-        Collections.singleton(MediaType.application("x-hdf"));
+-
+-    /*
+-     * (non-Javadoc)
+-     * 
+-     * @see
+-     * org.apache.tika.parser.netcdf.NetCDFParser#getSupportedTypes(org.apache
+-     * .tika.parser.ParseContext)
+-     */
+-    public Set<MediaType> getSupportedTypes(ParseContext context) {
+-        return SUPPORTED_TYPES;
+-    }
+-
+-    /*
+-     * (non-Javadoc)
+-     * 
+-     * @see
+-     * org.apache.tika.parser.netcdf.NetCDFParser#parse(java.io.InputStream,
+-     * org.xml.sax.ContentHandler, org.apache.tika.metadata.Metadata,
+-     * org.apache.tika.parser.ParseContext)
+-     */
+-    public void parse(InputStream stream, ContentHandler handler,
+-            Metadata metadata, ParseContext context) throws IOException,
+-            SAXException, TikaException {
+-        ByteArrayOutputStream os = new ByteArrayOutputStream();
+-        IOUtils.copy(stream, os);
+-
+-        String name = metadata.get(Metadata.RESOURCE_NAME_KEY);
+-        if (name == null) {
+-            name = "";
+-        }
+-        try {
+-            NetcdfFile ncFile = NetcdfFile.openInMemory(name, os.toByteArray());
+-            unravelStringMet(ncFile, null, metadata);
+-        } catch (IOException e) {
+-            throw new TikaException("HDF parse error", e);
+-        }
+-
+-        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
+-        xhtml.startDocument();
+-        xhtml.endDocument();
+-    }
+-
+-    protected void unravelStringMet(NetcdfFile ncFile, Group group, Metadata met) {
+-        if (group == null) {
+-            group = ncFile.getRootGroup();
+-        }
+-
+-        // get file type
+-        met.set("File-Type-Description", ncFile.getFileTypeDescription());
+-        // unravel its string attrs
+-        for (Attribute attribute : group.getAttributes()) {
+-            if (attribute.isString()) {
+-                met.add(attribute.getFullName(), attribute.getStringValue());
+-            } else {
+-                // try and cast its value to a string
+-                met.add(attribute.getFullName(), String.valueOf(attribute
+-                        .getNumericValue()));
+-            }
+-        }
+-
+-        for (Group g : group.getGroups()) {
+-            unravelStringMet(ncFile, g, met);
+-        }
+-    }
+-
+-}
+diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/netcdf/NetCDFParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/netcdf/NetCDFParser.java
+deleted file mode 100644
+index 57254f8..0000000
+--- a/tika-parsers/src/main/java/org/apache/tika/parser/netcdf/NetCDFParser.java
++++ /dev/null
+@@ -1,144 +0,0 @@
+-/*
+- * Licensed to the Apache Software Foundation (ASF) under one or more
+- * contributor license agreements.  See the NOTICE file distributed with
+- * this work for additional information regarding copyright ownership.
+- * The ASF licenses this file to You under the Apache License, Version 2.0
+- * (the "License"); you may not use this file except in compliance with
+- * the License.  You may obtain a copy of the License at
+- *
+- *     http://www.apache.org/licenses/LICENSE-2.0
+- *
+- * Unless required by applicable law or agreed to in writing, software
+- * distributed under the License is distributed on an "AS IS" BASIS,
+- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+- * See the License for the specific language governing permissions and
+- * limitations under the License.
+- */
+-package org.apache.tika.parser.netcdf;
+-
+-//JDK imports
+-
+-import java.io.IOException;
+-import java.io.InputStream;
+-import java.util.Collections;
+-import java.util.Set;
+-import java.util.List;
+-
+-import org.apache.tika.exception.TikaException;
+-import org.apache.tika.io.TemporaryResources;
+-import org.apache.tika.io.TikaInputStream;
+-import org.apache.tika.metadata.Metadata;
+-import org.apache.tika.metadata.Property;
+-import org.apache.tika.metadata.TikaCoreProperties;
+-import org.apache.tika.mime.MediaType;
+-import org.apache.tika.parser.AbstractParser;
+-import org.apache.tika.parser.ParseContext;
+-import org.apache.tika.parser.Parser;
+-import org.apache.tika.sax.XHTMLContentHandler;
+-import org.xml.sax.ContentHandler;
+-import org.xml.sax.SAXException;
+-
+-import ucar.nc2.Attribute;
+-import ucar.nc2.NetcdfFile;
+-import ucar.nc2.Variable;
+-import ucar.nc2.Dimension;
+-
+-/**
+- * A {@link Parser} for <a
+- * href="http://www.unidata.ucar.edu/software/netcdf/index.html">NetCDF</a>
+- * files using the UCAR, MIT-licensed <a
+- * href="http://www.unidata.ucar.edu/software/netcdf-java/">NetCDF for Java</a>
+- * API.
+- */
+-public class NetCDFParser extends AbstractParser {
+-
+-    /**
+-     * Serial version UID
+-     */
+-    private static final long serialVersionUID = -5940938274907708665L;
+-
+-    private final Set<MediaType> SUPPORTED_TYPES =
+-            Collections.singleton(MediaType.application("x-netcdf"));
+-
+-    /*
+-     * (non-Javadoc)
+-     * 
+-     * @see
+-     * org.apache.tika.parser.Parser#getSupportedTypes(org.apache.tika.parser
+-     * .ParseContext)
+-     */
+-    public Set<MediaType> getSupportedTypes(ParseContext context) {
+-        return SUPPORTED_TYPES;
+-    }
+-
+-    /*
+-     * (non-Javadoc)
+-     * 
+-     * @see org.apache.tika.parser.Parser#parse(java.io.InputStream,
+-     * org.xml.sax.ContentHandler, org.apache.tika.metadata.Metadata,
+-     * org.apache.tika.parser.ParseContext)
+-     */
+-    public void parse(InputStream stream, ContentHandler handler,
+-                      Metadata metadata, ParseContext context) throws IOException,
+-            SAXException, TikaException {
+-
+-        TikaInputStream tis = TikaInputStream.get(stream, new TemporaryResources());
+-        try {
+-            NetcdfFile ncFile = NetcdfFile.open(tis.getFile().getAbsolutePath());
+-            metadata.set("File-Type-Description", ncFile.getFileTypeDescription());
+-            // first parse out the set of global attributes
+-            for (Attribute attr : ncFile.getGlobalAttributes()) {
+-                Property property = resolveMetadataKey(attr.getFullName());
+-                if (attr.getDataType().isString()) {
+-                    metadata.add(property, attr.getStringValue());
+-                } else if (attr.getDataType().isNumeric()) {
+-                    int value = attr.getNumericValue().intValue();
+-                    metadata.add(property, String.valueOf(value));
+-                }
+-            }
+-
+-
+-            XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
+-            xhtml.startDocument();
+-            xhtml.newline();
+-            xhtml.element("h1", "dimensions");
+-            xhtml.startElement("ul");
+-            xhtml.newline();
+-            for (Dimension dim : ncFile.getDimensions()) {
+-                xhtml.element("li", dim.getFullName() + " = " + dim.getLength());
+-            }
+-            xhtml.endElement("ul");
+-
+-            xhtml.element("h1", "variables");
+-            xhtml.startElement("ul");
+-            xhtml.newline();
+-            for (Variable var : ncFile.getVariables()) {
+-                xhtml.startElement("li");
+-                xhtml.characters(var.getDataType() + " " + var.getNameAndDimensions());
+-                xhtml.newline();
+-                List<Attribute> attributes = var.getAttributes();
+-                if (!attributes.isEmpty()) {
+-                    xhtml.startElement("ul");
+-                    for (Attribute element : attributes) {
+-                        xhtml.element("li", element.toString());
+-                    }
+-                    xhtml.endElement("ul");
+-                }
+-                xhtml.endElement("li");
+-            }
+-            xhtml.endElement("ul");
+-
+-            xhtml.endDocument();
+-
+-        } catch (IOException e) {
+-            throw new TikaException("NetCDF parse error", e);
+-        }
+-    }
+-
+-    private Property resolveMetadataKey(String localName) {
+-        if ("title".equals(localName)) {
+-            return TikaCoreProperties.TITLE;
+-        }
+-        return Property.internalText(localName);
+-    }
+-}
+\ No newline at end of file
diff --git a/debian/patches/optional-parser-dependencies.patch b/debian/patches/optional-parser-dependencies.patch
new file mode 100644
index 0000000..5d933bc
--- /dev/null
+++ b/debian/patches/optional-parser-dependencies.patch
@@ -0,0 +1,303 @@
+From: Markus Koschany <apo at debian.org>
+Date: Mon, 30 Nov 2015 16:08:14 +0000
+Subject: optional parser dependencies
+
+---
+ tika-parsers/pom.xml | 44 ++++++++++++++++++++++++++++++++++++++++++++
+ 1 file changed, 44 insertions(+)
+
+diff --git a/tika-parsers/pom.xml b/tika-parsers/pom.xml
+index c0f673f..6872101 100644
+--- a/tika-parsers/pom.xml
++++ b/tika-parsers/pom.xml
+@@ -76,16 +76,19 @@
+       <groupId>org.gagravarr</groupId>
+       <artifactId>vorbis-java-tika</artifactId>
+       <version>${vorbis.version}</version>
++      <optional>true</optional>
+     </dependency>
+     <dependency>
+       <groupId>com.healthmarketscience.jackcess</groupId>
+       <artifactId>jackcess</artifactId>
+       <version>2.1.2</version>
++      <optional>true</optional>
+     </dependency>
+     <dependency>
+       <groupId>com.healthmarketscience.jackcess</groupId>
+       <artifactId>jackcess-encrypt</artifactId>
+       <version>2.1.1</version>
++      <optional>true</optional>
+     </dependency>
+ 
+     <!-- Optional OSGi dependencies, used only when running within OSGi -->
+@@ -93,6 +96,7 @@
+       <groupId>org.apache.felix</groupId>
+       <artifactId>org.apache.felix.scr.annotations</artifactId>
+       <scope>provided</scope>
++      <optional>true</optional>
+     </dependency>
+ 
+     <!-- Upstream parser libraries -->
+@@ -100,37 +104,44 @@
+       <groupId>net.sourceforge.jmatio</groupId>
+       <artifactId>jmatio</artifactId>
+       <version>1.0</version>
++      <optional>true</optional>
+     </dependency>
+     <dependency>
+       <groupId>org.apache.james</groupId>
+       <artifactId>apache-mime4j-core</artifactId>
+       <version>${mime4j.version}</version>
++      <optional>true</optional>
+     </dependency>
+     <dependency>
+       <groupId>org.apache.james</groupId>
+       <artifactId>apache-mime4j-dom</artifactId>
+       <version>${mime4j.version}</version>
++      <optional>true</optional>
+     </dependency>
+     <dependency>
+       <groupId>org.apache.commons</groupId>
+       <artifactId>commons-compress</artifactId>
+       <version>${commons.compress.version}</version>
++      <optional>true</optional>
+     </dependency>
+     <dependency>
+       <groupId>org.tukaani</groupId>
+       <artifactId>xz</artifactId>
+       <version>${tukaani.version}</version>
++      <optional>true</optional>
+     </dependency>
+ 
+     <dependency>
+       <groupId>commons-codec</groupId>
+       <artifactId>commons-codec</artifactId>
+       <version>${codec.version}</version>
++      <optional>true</optional>
+     </dependency>
+     <dependency>
+       <groupId>org.apache.pdfbox</groupId>
+       <artifactId>pdfbox</artifactId>
+       <version>${pdfbox.version}</version>
++      <optional>true</optional>
+     </dependency>
+     <!-- TIKA-370: PDFBox declares the Bouncy Castle dependencies
+          as optional, but we prefer to have them always to avoid
+@@ -139,26 +150,31 @@
+       <groupId>org.bouncycastle</groupId>
+       <artifactId>bcmail-jdk15on</artifactId>
+       <version>1.52</version>
++      <optional>true</optional>
+     </dependency>
+     <dependency>
+       <groupId>org.bouncycastle</groupId>
+       <artifactId>bcprov-jdk15on</artifactId>
+       <version>1.52</version>
++      <optional>true</optional>
+     </dependency>
+     <dependency>
+       <groupId>org.apache.poi</groupId>
+       <artifactId>poi</artifactId>
+       <version>${poi.version}</version>
++      <optional>true</optional>
+     </dependency>
+     <dependency>
+       <groupId>org.apache.poi</groupId>
+       <artifactId>poi-scratchpad</artifactId>
+       <version>${poi.version}</version>
++      <optional>true</optional>
+     </dependency>
+     <dependency>
+       <groupId>org.apache.poi</groupId>
+       <artifactId>poi-ooxml</artifactId>
+       <version>${poi.version}</version>
++      <optional>true</optional>
+       <exclusions>
+         <exclusion>
+           <groupId>stax</groupId>
+@@ -174,61 +190,73 @@
+       <groupId>org.ccil.cowan.tagsoup</groupId>
+       <artifactId>tagsoup</artifactId>
+       <version>1.2.1</version>
++      <optional>true</optional>
+     </dependency>
+     <dependency>
+       <groupId>org.ow2.asm</groupId>
+       <artifactId>asm</artifactId>
+       <version>5.0.4</version>
++      <optional>true</optional>
+     </dependency>
+     <dependency>
+       <groupId>com.googlecode.mp4parser</groupId>
+       <artifactId>isoparser</artifactId>
+       <version>1.0.2</version>
++      <optional>true</optional>
+     </dependency>
+     <dependency>
+       <groupId>com.drewnoakes</groupId>
+       <artifactId>metadata-extractor</artifactId>
+       <version>2.8.0</version>
++      <optional>true</optional>
+     </dependency>
+     <dependency>
+       <groupId>de.l3s.boilerpipe</groupId>
+       <artifactId>boilerpipe</artifactId>
+       <version>1.1.0</version>
++      <optional>true</optional>
+     </dependency>
+     <dependency>
+       <groupId>rome</groupId>
+       <artifactId>rome</artifactId>
+       <version>1.0</version>
++      <optional>true</optional>
+     </dependency>
+     <dependency>
+       <groupId>org.gagravarr</groupId>
+       <artifactId>vorbis-java-core</artifactId>
+       <version>${vorbis.version}</version>
++      <optional>true</optional>
+     </dependency>
+     <dependency>
+       <groupId>com.googlecode.juniversalchardet</groupId>
+       <artifactId>juniversalchardet</artifactId>
+       <version>1.0.3</version>
++      <optional>true</optional>
+     </dependency>
+     <dependency>
+       <groupId>org.codelibs</groupId>
+       <artifactId>jhighlight</artifactId>
+       <version>1.0.2</version>
++      <optional>true</optional>
+     </dependency>
+     <dependency>
+       <groupId>com.pff</groupId>
+       <artifactId>java-libpst</artifactId>
+       <version>0.8.1</version>
++      <optional>true</optional>
+     </dependency>
+     <dependency>
+       <groupId>com.github.junrar</groupId>
+       <artifactId>junrar</artifactId>
+       <version>0.7</version>
++      <optional>true</optional>
+     </dependency>
+ 	<dependency>
+       <groupId>org.apache.cxf</groupId>
+       <artifactId>cxf-rt-rs-client</artifactId>
+       <version>${cxf.version}</version>
++      <optional>true</optional>
+     </dependency>
+ 	
+ 
+@@ -238,30 +266,35 @@
+       <artifactId>sqlite-jdbc</artifactId>
+       <version>3.8.10.1</version> 
+       <scope>provided</scope>
++      <optional>true</optional>
+     </dependency>
+ 
+     <dependency>
+       <groupId>org.apache.opennlp</groupId>
+       <artifactId>opennlp-tools</artifactId>
+       <version>1.5.3</version>
++      <optional>true</optional>
+     </dependency>
+ 
+     <dependency>
+       <groupId>commons-io</groupId>
+       <artifactId>commons-io</artifactId>
+       <version>${commons.io.version}</version>
++      <optional>true</optional>
+     </dependency>
+ 
+     <dependency>
+       <groupId>org.apache.commons</groupId>
+       <artifactId>commons-exec</artifactId>
+       <version>1.3</version>
++      <optional>true</optional>
+     </dependency>
+ 
+     <dependency>
+       <groupId>com.googlecode.json-simple</groupId>
+       <artifactId>json-simple</artifactId>
+       <version>1.1.1</version>
++      <optional>true</optional>
+       <exclusions>
+         <exclusion>
+           <groupId>junit</groupId>
+@@ -274,6 +307,7 @@
+       <groupId>org.json</groupId>
+       <artifactId>json</artifactId>
+       <version>20140107</version>
++      <optional>true</optional>
+     </dependency>
+     
+ 
+@@ -299,16 +333,19 @@
+       <groupId>edu.ucar</groupId>
+       <artifactId>netcdf4</artifactId>
+       <version>${netcdf-java.version}</version>
++      <optional>true</optional>
+     </dependency>
+     <dependency>
+       <groupId>edu.ucar</groupId>
+       <artifactId>grib</artifactId>
+       <version>${netcdf-java.version}</version>
++      <optional>true</optional>
+     </dependency>
+     <dependency>
+       <groupId>edu.ucar</groupId>
+       <artifactId>cdm</artifactId>
+       <version>${netcdf-java.version}</version>
++      <optional>true</optional>
+       <exclusions>
+         <exclusion>
+           <groupId>org.slf4j</groupId>
+@@ -320,33 +357,39 @@
+       <groupId>edu.ucar</groupId>
+       <artifactId>httpservices</artifactId>
+       <version>${netcdf-java.version}</version>
++      <optional>true</optional>
+     </dependency>
+     <!-- Apache Commons CSV -->
+     <dependency>
+       <groupId>org.apache.commons</groupId>
+       <artifactId>commons-csv</artifactId>
+       <version>1.0</version>
++      <optional>true</optional>
+     </dependency>
+ 
+     <dependency>
+       <groupId>org.apache.sis.core</groupId>
+       <artifactId>sis-utility</artifactId>
+       <version>0.5</version>
++      <optional>true</optional>
+     </dependency>
+     <dependency>
+       <groupId>org.apache.sis.storage</groupId>
+       <artifactId>sis-netcdf</artifactId>
+       <version>0.5</version>
++      <optional>true</optional>
+     </dependency>
+     <dependency>
+       <groupId>org.apache.sis.core</groupId>
+       <artifactId>sis-metadata</artifactId>
+       <version>0.5</version>
++      <optional>true</optional>
+     </dependency>
+     <dependency>
+       <groupId>org.opengis</groupId>
+       <artifactId>geoapi</artifactId>
+       <version>3.0.0</version>
++      <optional>true</optional>
+     </dependency>
+     <!-- Apache cTAKES -->
+     <dependency>
+@@ -354,6 +397,7 @@
+       <artifactId>ctakes-core</artifactId>
+       <version>3.2.2</version>
+       <scope>provided</scope>
++      <optional>true</optional>
+     </dependency>
+   </dependencies>
+ 
diff --git a/debian/patches/osgi.patch b/debian/patches/osgi.patch
new file mode 100644
index 0000000..15a86d9
--- /dev/null
+++ b/debian/patches/osgi.patch
@@ -0,0 +1,31 @@
+From: Markus Koschany <apo at debian.org>
+Date: Mon, 30 Nov 2015 15:55:24 +0000
+Subject: osgi
+
+---
+ .../src/main/java/org/apache/tika/parser/internal/Activator.java  | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/internal/Activator.java b/tika-parsers/src/main/java/org/apache/tika/parser/internal/Activator.java
+index a884d3a..f3324b8 100644
+--- a/tika-parsers/src/main/java/org/apache/tika/parser/internal/Activator.java
++++ b/tika-parsers/src/main/java/org/apache/tika/parser/internal/Activator.java
+@@ -35,14 +35,14 @@ public class Activator implements BundleActivator {
+     @Override
+     public void start(BundleContext context) throws Exception {
+         detectorService = context.registerService(
+-                Detector.class.getName(),
++                Detector.class,
+                 new DefaultDetector(Activator.class.getClassLoader()),
+-                new Properties());
++                new java.util.Hashtable<String,String>());
+         Parser parser = new DefaultParser(Activator.class.getClassLoader());
+         parserService = context.registerService(
+-                Parser.class.getName(),
++                Parser.class,
+                 parser,
+-                new Properties());
++                new java.util.Hashtable<String,String>());
+     }
+ 
+     @Override
diff --git a/debian/patches/series b/debian/patches/series
index da693f0..9c90618 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -1 +1,5 @@
 01-jar-packaging.patch
+MP4Parser.patch
+netcdf.patch
+osgi.patch
+optional-parser-dependencies.patch

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-java/tika.git



More information about the pkg-java-commits mailing list