[Pkg-privacy-commits] [mat] 31/68: Remove hachoir from MAT.

Sascha Steinbiss sascha at steinbiss.name
Sun Jan 3 12:32:40 UTC 2016


This is an automated email from the git hooks/post-receive script.

sascha-guest pushed a commit to branch master
in repository mat.

commit 80ece3001895ea13d50915a5215fd47e313bab4c
Author: jvoisin <julien.voisin at dustri.org>
Date:   Wed Dec 2 17:07:19 2015 +0100

    Remove hachoir from MAT.
    
    This (huge) commit removes completely hachoir from MAT.
    Audio files are now processed with mutagen, and images
    with exiftool, since the main python imaging library (PIL)
    isn't super-great to deal with metadata (and damaged/non-standard
    files).
    
    Package maintainer should change the dependencies to reflect this.
---
 .travis.yml                          |   4 +-
 README.md                            |   3 +-
 data/FORMATS                         |   6 +-
 libmat/archive.py                    |  13 +-
 libmat/audio.py                      |  53 ------
 libmat/exiftool.py                   |   4 +-
 libmat/hachoir_editor/__init__.py    |   8 -
 libmat/hachoir_editor/field.py       |  69 -------
 libmat/hachoir_editor/fieldset.py    | 352 -----------------------------------
 libmat/hachoir_editor/typed_field.py | 268 --------------------------
 libmat/images.py                     |  52 ------
 libmat/mat.py                        |  21 +--
 libmat/misc.py                       |   4 +-
 libmat/mutagenstripper.py            |  66 ++++++-
 libmat/office.py                     |   4 +-
 libmat/parser.py                     |  78 +-------
 libmat/strippers.py                  |  18 +-
 mat                                  |   4 +-
 mat.1                                |   2 +-
 setup.py                             |   6 +-
 20 files changed, 100 insertions(+), 935 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 758176a..d30d5e0 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -16,6 +16,7 @@ addons:
             - gir1.2-poppler-0.18
             - python-pdfrw 
             - python-gi-cairo
+            - python-mutagen
 virtualenv:
     system_site_packages: true
 
@@ -25,12 +26,11 @@ install:
     - pip install --user --upgrade setuptools
     - pip install --user coveralls
     - pip install --user codecov
-    - pip install --user mutagen hachoir_core hachoir_parser
     - popd
     - python setup.py install
 
 script:
-    - coverage run --source=libmat --omit='*hachoir_editor*' setup.py test
+    - coverage run --source=libmat setup.py test
 
 after_success:
     - coveralls
diff --git a/README.md b/README.md
index da49130..9e8f691 100644
--- a/README.md
+++ b/README.md
@@ -27,7 +27,6 @@ See README.security
 DEPENDENCIES
 ============
  * python2.7 (at least)
- * python-hachoir-core and python-hachoir-parser
  * python-pdfrw, gir-poppler and python-gi-cairo for full PDF support
  * python-gi for the GUI
  * shred (should be already installed)
@@ -35,7 +34,7 @@ DEPENDENCIES
 OPTIONALS DEPENDENCIES
 ======================
  * python-mutagen: for massive audio format support
- * exiftool: for _massive_ image format support
+ * exiftool: for image format support
 
 USAGE
 =====
diff --git a/data/FORMATS b/data/FORMATS
index b398be1..6880e25 100644
--- a/data/FORMATS
+++ b/data/FORMATS
@@ -5,7 +5,7 @@
         <mimetype>image/png</mimetype>
         <support>Full</support>
         <metadata>Textual metadata and date</metadata>
-        <method>Removal of harmful fields with hachoir.</method>
+        <method>Removal of harmful fields with exiftool.</method>
         <remaining>None</remaining>
     </format>
 
@@ -15,7 +15,7 @@
         <mimetype>image/jpeg</mimetype>
         <support>Partial</support>
         <metadata>Comments and exif/photoshop/adobe</metadata>
-        <method>Removal of harmful fields with hachoir.</method>
+        <method>Removal of harmful fields with exiftool.</method>
         <remaining>Canon Raw tags</remaining>
     </format>
 
@@ -75,7 +75,7 @@
         <mimetype>audio/mpeg</mimetype>
         <support>Full</support>
         <metadata>Id3</metadata>
-        <method>Removal of harmful fields with hachoir</method>
+        <method>Removal of harmful fields with exiftool</method>
         <remaining>None</remaining>
     </format>
 
diff --git a/libmat/archive.py b/libmat/archive.py
index ad9fdc9..2e14538 100644
--- a/libmat/archive.py
+++ b/libmat/archive.py
@@ -20,9 +20,8 @@ class GenericArchiveStripper(parser.GenericParser):
     """ Represent a generic archive
     """
 
-    def __init__(self, filename, parser, mime, backup, is_writable, **kwargs):
-        super(GenericArchiveStripper, self).__init__(filename,
-                                                     parser, mime, backup, is_writable, **kwargs)
+    def __init__(self, filename, mime, backup, is_writable, **kwargs):
+        super(GenericArchiveStripper, self).__init__(filename, mime, backup, is_writable, **kwargs)
         self.compression = ''
         self.add2archive = kwargs['add2archive']
         self.tempdir = tempfile.mkdtemp()
@@ -354,8 +353,8 @@ class GzipStripper(TarStripper):
     """ Represent a tar.gz archive
     """
 
-    def __init__(self, filename, parser, mime, backup, is_writable, **kwargs):
-        super(GzipStripper, self).__init__(filename, parser, mime, backup, is_writable, **kwargs)
+    def __init__(self, filename, mime, backup, is_writable, **kwargs):
+        super(GzipStripper, self).__init__(filename, mime, backup, is_writable, **kwargs)
         self.compression = ':gz'
 
 
@@ -363,6 +362,6 @@ class Bzip2Stripper(TarStripper):
     """ Represent a tar.bz2 archive
     """
 
-    def __init__(self, filename, parser, mime, backup, is_writable, **kwargs):
-        super(Bzip2Stripper, self).__init__(filename, parser, mime, backup, is_writable, **kwargs)
+    def __init__(self, filename, mime, backup, is_writable, **kwargs):
+        super(Bzip2Stripper, self).__init__(filename, mime, backup, is_writable, **kwargs)
         self.compression = ':bz2'
diff --git a/libmat/audio.py b/libmat/audio.py
deleted file mode 100644
index 2747dc1..0000000
--- a/libmat/audio.py
+++ /dev/null
@@ -1,53 +0,0 @@
-""" Care about audio fileformat
-"""
-
-try:
-    from mutagen.flac import FLAC
-    from mutagen.oggvorbis import OggVorbis
-except ImportError:
-    pass
-
-import parser
-import mutagenstripper
-
-
-class MpegAudioStripper(parser.GenericParser):
-    """ Represent mpeg audio file (mp3, ...)
-    """
-    def _should_remove(self, field):
-        return field.name in ("id3v1", "id3v2")
-
-
-class OggStripper(mutagenstripper.MutagenStripper):
-    """ Represent an ogg vorbis file
-    """
-    def _create_mfile(self):
-        self.mfile = OggVorbis(self.filename)
-
-
-class FlacStripper(mutagenstripper.MutagenStripper):
-    """ Represent a Flac audio file
-    """
-    def _create_mfile(self):
-        self.mfile = FLAC(self.filename)
-
-    def remove_all(self):
-        """ Remove the "metadata" block from the file
-        """
-        super(FlacStripper, self).remove_all()
-        self.mfile.clear_pictures()
-        self.mfile.save()
-        return True
-
-    def is_clean(self):
-        """ Check if the "metadata" block is present in the file
-        """
-        return super(FlacStripper, self).is_clean() and not self.mfile.pictures
-
-    def get_meta(self):
-        """ Return the content of the metadata block if present
-        """
-        metadata = super(FlacStripper, self).get_meta()
-        if self.mfile.pictures:
-            metadata['picture:'] = 'yes'
-        return metadata
diff --git a/libmat/exiftool.py b/libmat/exiftool.py
index 07ef06b..ef81ed3 100644
--- a/libmat/exiftool.py
+++ b/libmat/exiftool.py
@@ -9,8 +9,8 @@ class ExiftoolStripper(parser.GenericParser):
     """ A generic stripper class using exiftool as backend
     """
 
-    def __init__(self, filename, parser, mime, backup, is_writable, **kwargs):
-        super(ExiftoolStripper, self).__init__(filename, parser, mime, backup, is_writable, **kwargs)
+    def __init__(self, filename, mime, backup, is_writable, **kwargs):
+        super(ExiftoolStripper, self).__init__(filename, mime, backup, is_writable, **kwargs)
         self.allowed = {'ExifTool Version Number', 'File Name', 'Directory', 'File Size', 'File Modification Date/Time',
                         'File Access Date/Time', 'File Permissions', 'File Type', 'File Type Extension', 'MIME Type',
                         'Image Width', 'Image Height', 'Image Size', 'File Inode Change Date/Time', 'Megapixels'}
diff --git a/libmat/hachoir_editor/__init__.py b/libmat/hachoir_editor/__init__.py
deleted file mode 100644
index 1835676..0000000
--- a/libmat/hachoir_editor/__init__.py
+++ /dev/null
@@ -1,8 +0,0 @@
-from field import (
-    EditorError, FakeField)
-from typed_field import (
-    EditableField, EditableBits, EditableBytes,
-    EditableInteger, EditableString,
-    createEditableField)
-from fieldset import EditableFieldSet, NewFieldSet, createEditor
-
diff --git a/libmat/hachoir_editor/field.py b/libmat/hachoir_editor/field.py
deleted file mode 100644
index 6b1efe3..0000000
--- a/libmat/hachoir_editor/field.py
+++ /dev/null
@@ -1,69 +0,0 @@
-from hachoir_core.error import HachoirError
-from hachoir_core.field import joinPath, MissingField
-
-class EditorError(HachoirError):
-    pass
-
-class FakeField(object):
-    """
-    This class have API looks similar to Field API, but objects don't contain
-    any value: all values are _computed_ by parent methods.
-
-    Example: FakeField(editor, "abc").size calls editor._getFieldSize("abc").
-    """
-    is_field_set = False
-
-    def __init__(self, parent, name):
-        self._parent = parent
-        self._name = name
-
-    def _getPath(self):
-        return joinPath(self._parent.path, self._name)
-    path = property(_getPath)
-
-    def _getName(self):
-        return self._name
-    name = property(_getName)
-
-    def _getAddress(self):
-        return self._parent._getFieldAddress(self._name)
-    address = property(_getAddress)
-
-    def _getSize(self):
-        return self._parent.input[self._name].size
-    size = property(_getSize)
-
-    def _getValue(self):
-        return self._parent.input[self._name].value
-    value = property(_getValue)
-
-    def createDisplay(self):
-        # TODO: Returns new value if field is altered
-        return self._parent.input[self._name].display
-    display = property(createDisplay)
-
-    def _getParent(self):
-        return self._parent
-    parent = property(_getParent)
-
-    def hasValue(self):
-        return self._parent.input[self._name].hasValue()
-
-    def __getitem__(self, key):
-        # TODO: Implement this function!
-        raise MissingField(self, key)
-
-    def _isAltered(self):
-        return False
-    is_altered = property(_isAltered)
-
-    def writeInto(self, output):
-        size = self.size
-        addr = self._parent._getFieldInputAddress(self._name)
-        input = self._parent.input
-        stream = input.stream
-        if size % 8:
-            output.copyBitsFrom(stream, addr, size, input.endian)
-        else:
-            output.copyBytesFrom(stream, addr, size//8)
-
diff --git a/libmat/hachoir_editor/fieldset.py b/libmat/hachoir_editor/fieldset.py
deleted file mode 100644
index b7c9b07..0000000
--- a/libmat/hachoir_editor/fieldset.py
+++ /dev/null
@@ -1,352 +0,0 @@
-from hachoir_core.dict import UniqKeyError
-from hachoir_core.field import MissingField, Float32, Float64, FakeArray
-from hachoir_core.compatibility import any
-from hachoir_core.i18n import _
-from typed_field import createEditableField
-from field import EditorError
-from collections import deque # Python 2.4
-import weakref # Python 2.1
-import struct
-
-class EditableFieldSet(object):
-    MAX_SIZE = (1 << 40) # Arbitrary limit to catch errors
-    is_field_set = True
-
-    def __init__(self, parent, fieldset):
-        self._parent = parent
-        self.input = fieldset  # original FieldSet
-        self._fields = {}      # cache of editable fields
-        self._deleted = set()  # Names of deleted fields
-        self._inserted = {}    # Inserted field (name => list of field,
-                               # where name is the name after)
-
-    def array(self, key):
-        # FIXME: Use cache?
-        return FakeArray(self, key)
-
-    def _getParent(self):
-        return self._parent
-    parent = property(_getParent)
-
-    def _isAltered(self):
-        if self._inserted:
-            return True
-        if self._deleted:
-            return True
-        return any(field.is_altered for field in self._fields.itervalues())
-    is_altered = property(_isAltered)
-
-    def reset(self):
-        """
-        Reset the field set and the input field set.
-        """
-        for key, field in self._fields.iteritems():
-            if not field.is_altered:
-                del self._fields[key]
-        self.input.reset()
-
-    def __len__(self):
-        return len(self.input) \
-            - len(self._deleted) \
-            + sum( len(new) for new in self._inserted.itervalues() )
-
-    def __iter__(self):
-        for field in self.input:
-            name = field.name
-            if name in self._inserted:
-                for newfield in self._inserted[name]:
-                    yield weakref.proxy(newfield)
-            if name not in self._deleted:
-                yield self[name]
-        if None in self._inserted:
-            for newfield in self._inserted[None]:
-                yield weakref.proxy(newfield)
-
-    def insertBefore(self, name, *new_fields):
-        self._insert(name, new_fields, False)
-
-    def insertAfter(self, name, *new_fields):
-        self._insert(name, new_fields, True)
-
-    def insert(self, *new_fields):
-        self._insert(None, new_fields, True)
-
-    def _insert(self, key, new_fields, next):
-        """
-        key is the name of the field before which new_fields
-        will be inserted. If next is True, the fields will be inserted
-        _after_ this field.
-        """
-        # Set unique field name
-        for field in new_fields:
-            if field._name.endswith("[]"):
-                self.input.setUniqueFieldName(field)
-
-        # Check that there is no duplicate in inserted fields
-        new_names = list(field.name for field in new_fields)
-        names_set = set(new_names)
-        if len(names_set) != len(new_fields):
-            duplicates = (name for name in names_set if 1 < new_names.count(name))
-            raise UniqKeyError(_("Duplicates in inserted fields: %s") % ", ".join(duplicates))
-
-        # Check that field names are not in input
-        if self.input: # Write special version for NewFieldSet?
-            for name in new_names:
-                if name in self.input and name not in self._deleted:
-                    raise UniqKeyError(_("Field name '%s' already exists") % name)
-
-        # Check that field names are not in inserted fields
-        for fields in self._inserted.itervalues():
-            for field in fields:
-                if field.name in new_names:
-                    raise UniqKeyError(_("Field name '%s' already exists") % field.name)
-
-        # Input have already inserted field?
-        if key in self._inserted:
-            if next:
-                self._inserted[key].extend( reversed(new_fields) )
-            else:
-                self._inserted[key].extendleft( reversed(new_fields) )
-            return
-
-        # Whould like to insert in inserted fields?
-        if key:
-            for fields in self._inserted.itervalues():
-                names = [item.name for item in fields]
-                try:
-                    pos = names.index(key)
-                except ValueError:
-                    continue
-                if 0 <= pos:
-                    if next:
-                        pos += 1
-                    fields.rotate(-pos)
-                    fields.extendleft( reversed(new_fields) )
-                    fields.rotate(pos)
-                    return
-
-            # Get next field. Use None if we are at the end.
-            if next:
-                index = self.input[key].index + 1
-                try:
-                    key = self.input[index].name
-                except IndexError:
-                    key = None
-
-            # Check that field names are not in input
-            if key not in self.input:
-                raise MissingField(self, key)
-
-        # Insert in original input
-        self._inserted[key]= deque(new_fields)
-
-    def _getDescription(self):
-        return self.input.description
-    description = property(_getDescription)
-
-    def _getStream(self):
-        # FIXME: This property is maybe a bad idea since address may be differents
-        return self.input.stream
-    stream = property(_getStream)
-
-    def _getName(self):
-        return self.input.name
-    name = property(_getName)
-
-    def _getEndian(self):
-        return self.input.endian
-    endian = property(_getEndian)
-
-    def _getAddress(self):
-        if self._parent:
-            return self._parent._getFieldAddress(self.name)
-        else:
-            return 0
-    address = property(_getAddress)
-
-    def _getAbsoluteAddress(self):
-        address = self.address
-        current = self._parent
-        while current:
-            address += current.address
-            current = current._parent
-        return address
-    absolute_address = property(_getAbsoluteAddress)
-
-    def hasValue(self):
-        return False
-#        return self._parent.input[self.name].hasValue()
-
-    def _getSize(self):
-        if self.is_altered:
-            return sum(field.size for field in self)
-        else:
-            return self.input.size
-    size = property(_getSize)
-
-    def _getPath(self):
-        return self.input.path
-    path = property(_getPath)
-
-    def _getOriginalField(self, name):
-        assert name in self.input
-        return self.input[name]
-
-    def _getFieldInputAddress(self, name):
-        """
-        Absolute address of a field from the input field set.
-        """
-        assert name in self.input
-        return self.input[name].absolute_address
-
-    def _getFieldAddress(self, name):
-        """
-        Compute relative address of a field. The operation takes care of
-        deleted and resized fields.
-        """
-        #assert name not in self._deleted
-        addr = 0
-        for field in self:
-            if field.name == name:
-                return addr
-            addr += field.size
-        raise MissingField(self, name)
-
-    def _getItemByPath(self, path):
-        if not path[0]:
-            path = path[1:]
-        field = self
-        for name in path:
-            field = field[name]
-        return field
-
-    def __contains__(self, name):
-        try:
-            field = self[name]
-            return (field is not None)
-        except MissingField:
-            return False
-
-    def __getitem__(self, key):
-        """
-        Create a weak reference to an editable field (EditableField) for the
-        field with specified name. If the field is removed later, using the
-        editable field will raise a weakref.ReferenceError exception.
-
-        May raise a MissingField error if the field doesn't exist in original
-        field set or it has been deleted.
-        """
-        if "/" in key:
-            return self._getItemByPath(key.split("/"))
-        if isinstance(key, (int, long)):
-            raise EditorError("Integer index are not supported")
-
-        if (key in self._deleted) or (key not in self.input):
-            raise MissingField(self, key)
-        if key not in self._fields:
-            field = self.input[key]
-            if field.is_field_set:
-                self._fields[key] = createEditableFieldSet(self, field)
-            else:
-                self._fields[key] = createEditableField(self, field)
-        return weakref.proxy(self._fields[key])
-
-    def __delitem__(self, name):
-        """
-        Remove a field from the field set. May raise an MissingField exception
-        if the field has already been deleted.
-        """
-        parts = name.partition('/')
-        if parts[2]:
-            fieldset = self[parts[0]]
-            del fieldset[parts[2]]
-            return
-        if name in self._deleted:
-            raise MissingField(self, name)
-        self._deleted.add(name)
-        if name in self._fields:
-            del self._fields[name]
-
-    def writeInto(self, output):
-        """
-        Write the content if this field set into the output stream
-        (OutputStream).
-        """
-        if not self.is_altered:
-            # Not altered: just copy bits/bytes
-            input = self.input
-            if input.size % 8:
-                output.copyBitsFrom(input.stream,
-                    input.absolute_address, input.size, input.endian)
-            else:
-                output.copyBytesFrom(input.stream,
-                    input.absolute_address, input.size//8)
-        else:
-            # Altered: call writeInto() method of each field
-            realaddr = 0
-            for field in self:
-                field.writeInto(output)
-                realaddr += field.size
-
-    def _getValue(self):
-        raise EditorError('Field set "%s" has no value' % self.path)
-    def _setValue(self, value):
-        raise EditorError('Field set "%s" value is read only' % self.path)
-    value = property(_getValue, _setValue, "Value of field")
-
-class EditableFloat(EditableFieldSet):
-    _value = None
-
-    def _isAltered(self):
-        return (self._value is not None)
-    is_altered = property(_isAltered)
-
-    def writeInto(self, output):
-        if self._value is not None:
-            self._write(output)
-        else:
-            EditableFieldSet.writeInto(self, output)
-
-    def _write(self, output):
-        format = self.input.struct_format
-        raw = struct.pack(format, self._value)
-        output.writeBytes(raw)
-
-    def _setValue(self, value):
-        self.parent._is_altered = True
-        self._value = value
-    value = property(EditableFieldSet._getValue, _setValue)
-
-def createEditableFieldSet(parent, field):
-    cls = field.__class__
-    # FIXME: Support Float80
-    if cls in (Float32, Float64):
-        return EditableFloat(parent, field)
-    else:
-        return EditableFieldSet(parent, field)
-
-class NewFieldSet(EditableFieldSet):
-    def __init__(self, parent, name):
-        EditableFieldSet.__init__(self, parent, None)
-        self._name = name
-        self._endian = parent.endian
-
-    def __iter__(self):
-        if None in self._inserted:
-            return iter(self._inserted[None])
-        else:
-            raise StopIteration()
-
-    def _getName(self):
-        return self._name
-    name = property(_getName)
-
-    def _getEndian(self):
-        return self._endian
-    endian = property(_getEndian)
-
-    is_altered = property(lambda self: True)
-
-def createEditor(fieldset):
-    return EditableFieldSet(None, fieldset)
-
diff --git a/libmat/hachoir_editor/typed_field.py b/libmat/hachoir_editor/typed_field.py
deleted file mode 100644
index 606d39b..0000000
--- a/libmat/hachoir_editor/typed_field.py
+++ /dev/null
@@ -1,268 +0,0 @@
-from hachoir_core.field import (
-    RawBits, Bit, Bits, PaddingBits,
-    RawBytes, Bytes, PaddingBytes,
-    GenericString, Character,
-    isInteger, isString)
-from field import FakeField
-
-
-class EditableField(FakeField):
-    """
-    Pure virtual class used to write editable field class.
-    """
-
-    _is_altered = False
-
-    def __init__(self, parent, name, value=None):
-        FakeField.__init__(self, parent, name)
-        self._value = value
-
-    def _isAltered(self):
-        return self._is_altered
-
-    is_altered = property(_isAltered)
-
-    def hasValue(self):
-        return True
-
-    def _computeSize(self):
-        raise NotImplementedError()
-
-    def _getValue(self):
-        return self._value
-
-    def _setValue(self, value):
-        self._value = value
-
-    def _propGetValue(self):
-        if self._value is not None:
-            return self._getValue()
-        else:
-            return FakeField._getValue(self)
-
-    def _propSetValue(self, value):
-        self._setValue(value)
-        self._is_altered = True
-
-    value = property(_propGetValue, _propSetValue)
-
-    def _getSize(self):
-        if self._value is not None:
-            return self._computeSize()
-        else:
-            return FakeField._getSize(self)
-
-    size = property(_getSize)
-
-    def _write(self, output):
-        raise NotImplementedError()
-
-    def writeInto(self, output):
-        if self._is_altered:
-            self._write(output)
-        else:
-            return FakeField.writeInto(self, output)
-
-
-class EditableFixedField(EditableField):
-    """
-    Editable field with fixed size.
-    """
-
-    def __init__(self, parent, name, value=None, size=None):
-        EditableField.__init__(self, parent, name, value)
-        if size is not None:
-            self._size = size
-        else:
-            self._size = self._parent._getOriginalField(self._name).size
-
-    def _getSize(self):
-        return self._size
-
-    size = property(_getSize)
-
-
-class EditableBits(EditableFixedField):
-    def __init__(self, parent, name, *args):
-        if args:
-            if len(args) != 2:
-                raise TypeError(
-                    "Wrong argument count, EditableBits constructor prototype is: "
-                    "(parent, name, [size, value])")
-            size = args[0]
-            value = args[1]
-            assert isinstance(value, (int, long))
-        else:
-            size = None
-            value = None
-        EditableFixedField.__init__(self, parent, name, value, size)
-        if args:
-            self._setValue(args[1])
-            self._is_altered = True
-
-    def _setValue(self, value):
-        if not (0 <= value < (1 << self._size)):
-            raise ValueError("Invalid value, must be in range %s..%s"
-                             % (0, (1 << self._size) - 1))
-        self._value = value
-
-    def _write(self, output):
-        output.writeBits(self._size, self._value, self._parent.endian)
-
-
-class EditableBytes(EditableField):
-    def _setValue(self, value):
-        if not value: raise ValueError(
-            "Unable to set empty string to a EditableBytes field")
-        self._value = value
-
-    def _computeSize(self):
-        return len(self._value) * 8
-
-    def _write(self, output):
-        output.writeBytes(self._value)
-
-
-class EditableString(EditableField):
-    MAX_SIZE = {
-        "Pascal8": (1 << 8) - 1,
-        "Pascal16": (1 << 16) - 1,
-        "Pascal32": (1 << 32) - 1,
-    }
-
-    def __init__(self, parent, name, *args, **kw):
-        if len(args) == 2:
-            value = args[1]
-            assert isinstance(value, str)  # TODO: support Unicode
-        elif not args:
-            value = None
-        else:
-            raise TypeError(
-                "Wrong argument count, EditableString constructor prototype is:"
-                "(parent, name, [format, value])")
-        EditableField.__init__(self, parent, name, value)
-        if len(args) == 2:
-            self._charset = kw.get('charset', None)
-            self._format = args[0]
-            if self._format in GenericString.PASCAL_FORMATS:
-                self._prefix_size = GenericString.PASCAL_FORMATS[self._format]
-            else:
-                self._prefix_size = 0
-            self._suffix_str = GenericString.staticSuffixStr(
-                self._format, self._charset, self._parent.endian)
-            self._is_altered = True
-        else:
-            orig = self._parent._getOriginalField(name)
-            self._charset = orig.charset
-            self._format = orig.format
-            self._prefix_size = orig.content_offset
-            self._suffix_str = orig.suffix_str
-
-    def _setValue(self, value):
-        size = len(value)
-        if self._format in self.MAX_SIZE and self.MAX_SIZE[self._format] < size:
-            raise ValueError("String is too big")
-        self._value = value
-
-    def _computeSize(self):
-        return (self._prefix_size + len(self._value) + len(self._suffix_str)) * 8
-
-    def _write(self, output):
-        if self._format in GenericString.SUFFIX_FORMAT:
-            output.writeBytes(self._value)
-            output.writeBytes(self._suffix_str)
-        elif self._format == "fixed":
-            output.writeBytes(self._value)
-        else:
-            assert self._format in GenericString.PASCAL_FORMATS
-            size = GenericString.PASCAL_FORMATS[self._format]
-            output.writeInteger(len(self._value), False, size, self._parent.endian)
-            output.writeBytes(self._value)
-
-
-class EditableCharacter(EditableFixedField):
-    def __init__(self, parent, name, *args):
-        if args:
-            if len(args) != 3:
-                raise TypeError(
-                    "Wrong argument count, EditableCharacter "
-                    "constructor prototype is: (parent, name, [value])")
-            value = args[0]
-            if not isinstance(value, str) or len(value) != 1:
-                raise TypeError("EditableCharacter needs a character")
-        else:
-            value = None
-        EditableFixedField.__init__(self, parent, name, value, 8)
-        if args:
-            self._is_altered = True
-
-    def _setValue(self, value):
-        if not isinstance(value, str) or len(value) != 1:
-            raise TypeError("EditableCharacter needs a character")
-        self._value = value
-
-    def _write(self, output):
-        output.writeBytes(self._value)
-
-
-class EditableInteger(EditableFixedField):
-    VALID_VALUE_SIGNED = {
-        8: (-(1 << 8), (1 << 8) - 1),
-        16: (-(1 << 15), (1 << 15) - 1),
-        32: (-(1 << 31), (1 << 31) - 1),
-    }
-    VALID_VALUE_UNSIGNED = {
-        8: (0, (1 << 8) - 1),
-        16: (0, (1 << 16) - 1),
-        32: (0, (1 << 32) - 1)
-    }
-
-    def __init__(self, parent, name, *args):
-        if args:
-            if len(args) != 3:
-                raise TypeError(
-                    "Wrong argument count, EditableInteger constructor prototype is: "
-                    "(parent, name, [signed, size, value])")
-            size = args[1]
-            value = args[2]
-            assert isinstance(value, (int, long))
-        else:
-            size = None
-            value = None
-        EditableFixedField.__init__(self, parent, name, value, size)
-        if args:
-            self._signed = args[0]
-            self._is_altered = True
-        else:
-            self._signed = self._parent._getOriginalField(self._name).signed
-
-    def _setValue(self, value):
-        if self._signed:
-            valid = self.VALID_VALUE_SIGNED
-        else:
-            valid = self.VALID_VALUE_UNSIGNED
-        minval, maxval = valid[self._size]
-        if not (minval <= value <= maxval):
-            raise ValueError("Invalid value, must be in range %s..%s"
-                             % (minval, maxval))
-        self._value = value
-
-    def _write(self, output):
-        output.writeInteger(
-            self.value, self._signed, self._size // 8, self._parent.endian)
-
-
-def createEditableField(fieldset, field):
-    if isInteger(field):
-        cls = EditableInteger
-    elif isString(field):
-        cls = EditableString
-    elif field.__class__ in (RawBytes, Bytes, PaddingBytes):
-        cls = EditableBytes
-    elif field.__class__ in (RawBits, Bits, Bit, PaddingBits):
-        cls = EditableBits
-    elif field.__class__ == Character:
-        cls = EditableCharacter
-    else:
-        cls = FakeField
-    return cls(fieldset, field.name)
diff --git a/libmat/images.py b/libmat/images.py
deleted file mode 100644
index 813b0fd..0000000
--- a/libmat/images.py
+++ /dev/null
@@ -1,52 +0,0 @@
-""" Takes care about pictures formats
-
-References:
-    - JFIF: http://www.ecma-international.org/publications/techreports/E-TR-098.htm
-    - PNG: http://www.sno.phy.queensu.ca/~phil/exiftool/TagNames/PNG.html
-    - PNG: http://www.w3.org/TR/PNG-Chunks.html
-"""
-
-import parser
-
-
-class JpegStripper(parser.GenericParser):
-    """ Represents a jpeg file.
-        Custom Huffman and Quantization tables
-        are stripped: they may leak
-        some info, and the quality loss is minor.
-    """
-    def _should_remove(self, field):
-        """ Return True if the field is compromising
-        """
-        field_list = frozenset([
-            'start_image',  # start of the image
-            'app0',         # JFIF data
-            'start_frame',  # specify width, height, number of components
-            'start_scan',   # specify which slice of data the top-to-bottom scan contains
-            'data',         # actual data
-            'end_image'])   # end of the image
-        if field.name in field_list:
-            return False
-        elif field.name.startswith('quantization['):  # custom Quant. tables
-            return False
-        elif field.name.startswith('huffman['):  # custom Huffman tables
-            return False
-        return True
-
-
-class PngStripper(parser.GenericParser):
-    """ Represents a png file
-    """
-    def _should_remove(self, field):
-        """ Return True if the field is compromising
-        """
-        field_list = frozenset([
-            'id',
-            'header',    # PNG header
-            'physical',  # the intended pixel size or aspect ratio
-            'end'])      # end of the image
-        if field.name in field_list:
-            return False
-        elif field.name.startswith('data['):  # data
-            return False
-        return True
diff --git a/libmat/mat.py b/libmat/mat.py
index 42357d6..2634cc3 100644
--- a/libmat/mat.py
+++ b/libmat/mat.py
@@ -10,9 +10,6 @@ import platform
 import subprocess
 import xml.sax
 
-import hachoir_core.cmd_line
-import hachoir_parser
-
 import libmat.exceptions
 
 __version__ = '0.5.4'
@@ -20,12 +17,10 @@ __author__ = 'jvoisin'
 
 # Silence
 LOGGING_LEVEL = logging.CRITICAL
-hachoir_core.config.quiet = True
 fname = ''
 
 # Verbose
 # LOGGING_LEVEL = logging.DEBUG
-# hachoir_core.config.quiet = False
 # logname = 'report.log'
 
 logging.basicConfig(filename=fname, level=LOGGING_LEVEL)
@@ -155,22 +150,10 @@ def create_class_file(name, backup, **kwargs):
     elif not os.access(name, os.R_OK):  # check read permissions
         logging.error('%s is is not readable', name)
         return None
-    elif not os.path.getsize(name):  # check if the file is not empty (hachoir crash on empty files)
-        logging.error('%s is empty', name)
-        return None
-
-    try:
-        filename = hachoir_core.cmd_line.unicodeFilename(name)
-    except TypeError:  # get rid of "decoding Unicode is not supported"
-        filename = name
-
-    parser = hachoir_parser.createParser(filename)
-    if not parser:
-        logging.info('Unable to parse %s with hachoir', filename)
 
     mime = mimetypes.guess_type(name)[0]
     if not mime:
-        logging.info('Unable to find mimetype of %s', filename)
+        logging.info('Unable to find mimetype of %s', name)
         return None
 
     if mime.startswith('application/vnd.oasis.opendocument'):
@@ -186,4 +169,4 @@ def create_class_file(name, backup, **kwargs):
         logging.info('Don\'t have stripper for %s format', mime)
         return None
 
-    return stripper_class(filename, parser, mime, backup, is_writable, **kwargs)
+    return stripper_class(name, mime, backup, is_writable, **kwargs)
diff --git a/libmat/misc.py b/libmat/misc.py
index a55b8ed..cc480e5 100644
--- a/libmat/misc.py
+++ b/libmat/misc.py
@@ -11,8 +11,8 @@ class TorrentStripper(parser.GenericParser):
         of the bencode lib from Petru Paler
     """
 
-    def __init__(self, filename, parser, mime, backup, is_writable, **kwargs):
-        super(TorrentStripper, self).__init__(filename, parser, mime, backup, is_writable, **kwargs)
+    def __init__(self, filename, mime, backup, is_writable, **kwargs):
+        super(TorrentStripper, self).__init__(filename, mime, backup, is_writable, **kwargs)
         self.fields = frozenset(['announce', 'info', 'name', 'path', 'piece length', 'pieces',
                                  'length', 'files', 'announce-list', 'nodes', 'httpseeds', 'private', 'root hash'])
 
diff --git a/libmat/mutagenstripper.py b/libmat/mutagenstripper.py
index 0f9520a..692c56f 100644
--- a/libmat/mutagenstripper.py
+++ b/libmat/mutagenstripper.py
@@ -3,11 +3,15 @@
 
 import parser
 
+from mutagen.flac import FLAC
+from mutagen.oggvorbis import OggVorbis
+from mutagen.mp3 import MP3
+
 
 class MutagenStripper(parser.GenericParser):
     """ Parser using the (awesome) mutagen library. """
-    def __init__(self, filename, parser, mime, backup, is_writable, **kwargs):
-        super(MutagenStripper, self).__init__(filename, parser, mime, backup, is_writable, **kwargs)
+    def __init__(self, filename, mime, backup, is_writable, **kwargs):
+        super(MutagenStripper, self).__init__(filename, mime, backup, is_writable, **kwargs)
         self.mfile = None  # This will be instanciated in self._create_mfile()
         self._create_mfile()
 
@@ -36,3 +40,61 @@ class MutagenStripper(parser.GenericParser):
             for key, value in self.mfile.tags:
                 metadata[key] = value
         return metadata
+
+
+class MpegAudioStripper(MutagenStripper):
+    """ Represent a mp3 vorbis file
+    """
+    def _create_mfile(self):
+        self.mfile = MP3(self.filename)
+
+    def get_meta(self):
+        """
+            Return the content of the metadata block is present
+        """
+        metadata = {}
+        if self.mfile.tags:
+            for key in self.mfile.tags.keys():
+                meta = self.mfile.tags[key]
+                try:  # Sometimes, the field has a human-redable description
+                    desc = meta.desc
+                except AttributeError:
+                    desc = key
+                text = meta.text[0]
+                metadata[desc] = text
+        return metadata
+
+
+class OggStripper(MutagenStripper):
+    """ Represent an ogg vorbis file
+    """
+    def _create_mfile(self):
+        self.mfile = OggVorbis(self.filename)
+
+
+class FlacStripper(MutagenStripper):
+    """ Represent a Flac audio file
+    """
+    def _create_mfile(self):
+        self.mfile = FLAC(self.filename)
+
+    def remove_all(self):
+        """ Remove the "metadata" block from the file
+        """
+        super(FlacStripper, self).remove_all()
+        self.mfile.clear_pictures()
+        self.mfile.save()
+        return True
+
+    def is_clean(self):
+        """ Check if the "metadata" block is present in the file
+        """
+        return super(FlacStripper, self).is_clean() and not self.mfile.pictures
+
+    def get_meta(self):
+        """ Return the content of the metadata block if present
+        """
+        metadata = super(FlacStripper, self).get_meta()
+        if self.mfile.pictures:
+            metadata['picture:'] = 'yes'
+        return metadata
diff --git a/libmat/office.py b/libmat/office.py
index c585cb6..47cd622 100644
--- a/libmat/office.py
+++ b/libmat/office.py
@@ -110,8 +110,8 @@ class PdfStripper(parser.GenericParser):
     """ Represent a PDF file
     """
 
-    def __init__(self, filename, parser, mime, backup, is_writable, **kwargs):
-        super(PdfStripper, self).__init__(filename, parser, mime, backup, is_writable, **kwargs)
+    def __init__(self, filename, mime, backup, is_writable, **kwargs):
+        super(PdfStripper, self).__init__(filename, mime, backup, is_writable, **kwargs)
         self.uri = 'file://' + os.path.abspath(self.filename)
         self.password = None
         try:
diff --git a/libmat/parser.py b/libmat/parser.py
index 8e10ae9..2a82a25 100644
--- a/libmat/parser.py
+++ b/libmat/parser.py
@@ -5,8 +5,6 @@ import os
 import shutil
 import tempfile
 
-import hachoir_core
-import hachoir_editor
 
 import mat
 
@@ -24,19 +22,14 @@ FIELD = object()
 class GenericParser(object):
     """ Parent class of all parsers
     """
-    def __init__(self, filename, parser, mime, backup, is_writable, **kwargs):
+    def __init__(self, filename, mime, backup, is_writable, **kwargs):
         self.filename = ''
-        self.parser = parser
         self.mime = mime
         self.backup = backup
         self.is_writable = is_writable
-        self.editor = hachoir_editor.createEditor(parser)
-        try:
-            self.filename = hachoir_core.cmd_line.unicodeFilename(filename)
-        except TypeError:  # get rid of "decoding Unicode is not supported"
-            self.filename = filename
+        self.filename = filename
         self.basename = os.path.basename(filename)
-        self.output = hachoir_core.cmd_line.unicodeFilename(tempfile.mkstemp()[1])
+        self.output = tempfile.mkstemp()[1]
 
     def __del__(self):
         """ Remove tempfile if it was not used
@@ -48,74 +41,11 @@ class GenericParser(object):
         """
             Check if the file is clean from harmful metadatas
         """
-        for field in self.editor:
-            if self._should_remove(field):
-                return self._is_clean(self.editor)
-        return True
-
-    def _is_clean(self, fieldset):
-        """ Helper method of the `is_clean` one """
-        for field in fieldset:
-            remove = self._should_remove(field)
-            if remove is True:
-                return False
-            if remove is FIELD:
-                if not self._is_clean(field):
-                    return False
-        return True
+        raise NotImplementedError
 
     def remove_all(self):
         """ Remove all compromising fields
         """
-        state = self._remove_all(self.editor)
-        hachoir_core.field.writeIntoFile(self.editor, self.output)
-        self.do_backup()
-        return state
-
-    def _remove_all(self, fieldset):
-        """ Recursive way to handle tree metadatas
-        """
-        try:
-            for field in fieldset:
-                remove = self._should_remove(field)
-                if remove is True:
-                    self._remove(fieldset, field.name)
-                if remove is FIELD:
-                    self._remove_all(field)
-            return True
-        except:
-            return False
-
-    @staticmethod
-    def _remove(fieldset, field):
-        """ Delete the given field
-        """
-        del fieldset[field]
-
-    def get_meta(self):
-        """ Return a dict with all the meta of the file
-        """
-        metadata = {}
-        self._get_meta(self.editor, metadata)
-        return metadata
-
-    def _get_meta(self, fieldset, metadata):
-        """ Recursive way to handle tree metadatas
-        """
-        for field in fieldset:
-            remove = self._should_remove(field)
-            if remove:
-                try:
-                    metadata[field.name] = field.value
-                except:
-                    metadata[field.name] = 'harmful content'
-            if remove is FIELD:
-                self._get_meta(field, None)
-
-    def _should_remove(self, key):
-        """ Return True if the field is compromising
-            abstract method
-        """
         raise NotImplementedError
 
     def create_backup_copy(self):
diff --git a/libmat/strippers.py b/libmat/strippers.py
index 3aca04f..5920c41 100644
--- a/libmat/strippers.py
+++ b/libmat/strippers.py
@@ -2,8 +2,7 @@
 """
 
 import archive
-import audio
-import images
+import mutagenstripper
 import logging
 import mat
 import misc
@@ -15,7 +14,6 @@ STRIPPERS = {
     'application/x-bzip2': archive.Bzip2Stripper,
     'application/x-gzip': archive.GzipStripper,
     'application/zip': archive.ZipStripper,
-    'audio/mpeg': audio.MpegAudioStripper,
     'application/x-bittorrent': misc.TorrentStripper,
     'application/torrent': misc.TorrentStripper,
     'application/opendocument': office.OpenDocumentStripper,
@@ -52,11 +50,11 @@ if pdfSupport:
 # audio format support with mutagen-python
 try:
     import mutagen
-    STRIPPERS['audio/x-flac'] = audio.FlacStripper
-    STRIPPERS['audio/flac'] = audio.FlacStripper
-    STRIPPERS['audio/vorbis'] = audio.OggStripper
-    STRIPPERS['audio/ogg'] = audio.OggStripper
-    STRIPPERS['audio/mpeg'] = audio.MpegAudioStripper
+    STRIPPERS['audio/x-flac'] = mutagenstripper.FlacStripper
+    STRIPPERS['audio/flac'] = mutagenstripper.FlacStripper
+    STRIPPERS['audio/vorbis'] = mutagenstripper.OggStripper
+    STRIPPERS['audio/ogg'] = mutagenstripper.OggStripper
+    STRIPPERS['audio/mpeg'] = mutagenstripper.MpegAudioStripper
 except ImportError:
     logging.info('Unable to import python-mutagen: limited audio format support')
 
@@ -67,7 +65,5 @@ try:
     STRIPPERS['image/jpeg'] = exiftool.JpegStripper
     STRIPPERS['image/png'] = exiftool.PngStripper
     STRIPPERS['image/tiff'] = exiftool.TiffStripper
-except OSError:  # if exiftool is not installed, use hachoir instead
+except OSError:
     logging.info('Unable to find exiftool: limited images support')
-    STRIPPERS['image/jpeg'] = images.JpegStripper
-    STRIPPERS['image/png'] = images.PngStripper
diff --git a/mat b/mat
index 406d41b..03a0ce1 100755
--- a/mat
+++ b/mat
@@ -7,8 +7,6 @@ import sys
 import argparse
 import os
 
-import hachoir_core
-
 from libmat import mat
 from libmat import archive
 
@@ -35,7 +33,7 @@ def parse():
     info.add_argument('-l', '--list', action='store_true',
                       help='list all supported fileformats')
     info.add_argument('-v', '--version', action='version',
-                      version='MAT %s - Hachoir %s' % (mat.__version__, hachoir_core.__version__))
+                      version='MAT %s' % mat.__version__)
 
     return parser.parse_args()
 
diff --git a/mat.1 b/mat.1
index c230276..c32cc00 100644
--- a/mat.1
+++ b/mat.1
@@ -73,4 +73,4 @@ You should only use it for big files.
 
 
 .SH "SEE ALSO"
-\fBexiftool\fR(1), \fBhachoir\-metadata\fR(1)
+\fBexiftool\fR(1)
diff --git a/setup.py b/setup.py
index 6d4e320..295afb5 100755
--- a/setup.py
+++ b/setup.py
@@ -31,13 +31,13 @@ setup(
     name='MAT',
     version=__version__,
     description='Metadata Anonymisation Toolkit',
-    long_description='A Metadata Anonymisation Toolkit in Python, using python-hachoir',
+    long_description='A Metadata Anonymisation Toolkit in Python',
     author='jvoisin',
     author_email='julien.voisin at dustri.org',
     platforms='linux',
     license='GPLv2',
     url='https://mat.boum.org',
-    packages=['libmat', 'libmat.hachoir_editor', 'libmat.bencode'],
+    packages=['libmat', 'libmat.bencode'],
     scripts=['mat', 'mat-gui'],
     data_files=[
         ('share/applications', ['mat.desktop']),
@@ -55,5 +55,5 @@ setup(
         'build_icons': build_icons.build_icons,
         'clean': clean_i18n.clean_i18n,
     },
-    requires=['mutagen', 'gi', 'pdfrw', 'hachoir_core', 'hachoir_parser']
+    requires=['mutagen', 'gi', 'pdfrw']
 )

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-privacy/packages/mat.git



More information about the Pkg-privacy-commits mailing list