[SCM] exiv2 packaging branch, master, updated. debian/0.25-3.1-3734-gdcbc29a

Maximiliano Curia maxy at moszumanska.debian.org
Thu Jul 13 17:41:07 UTC 2017


Gitweb-URL: http://git.debian.org/?p=pkg-kde/kde-extras/exiv2.git;a=commitdiff;h=0bc517b

The following commit has been merged in the master branch:
commit 0bc517b5453e5895c0a746a904b053eb981f7761
Author: Andreas Huggel <ahuggel at gmx.net>
Date:   Wed Jan 20 12:07:40 2010 +0000

    #662: Detect and interpret a BOM.
---
 src/value.cpp | 18 ++++++++++++++++--
 src/value.hpp |  9 ++++++---
 2 files changed, 22 insertions(+), 5 deletions(-)

diff --git a/src/value.cpp b/src/value.cpp
index b8a4f9f..293792c 100644
--- a/src/value.cpp
+++ b/src/value.cpp
@@ -527,7 +527,7 @@ namespace Exiv2 {
         }
         c = value_.substr(8);
         if (charsetId() == unicode) {
-            const char* from = encoding == 0 ? detectCharset() : encoding;
+            const char* from = encoding == 0 ? detectCharset(c) : encoding;
             convertStringCharset(c, from, "UTF-8");
         }
         return c;
@@ -543,8 +543,22 @@ namespace Exiv2 {
         return charsetId;
     }
 
-    const char* CommentValue::detectCharset() const
+    const char* CommentValue::detectCharset(std::string& c) const
     {
+        // Interpret a BOM if there is one
+        if (0 == strncmp(c.data(), "\xef\xbb\xbf", 3)) {
+            c = c.substr(3);
+            return "UTF-8";
+        }
+        if (0 == strncmp(c.data(), "\xff\xfe", 2)) {
+            c = c.substr(2);
+            return "UCS-2LE";
+        }
+        if (0 == strncmp(c.data(), "\xfe\xff", 2)) {
+            c = c.substr(2);
+            return "UCS-2BE";
+        }
+
         // Todo: Add logic to guess if the comment is encoded in UTF-8
 
         return byteOrder_ == littleEndian ? "UCS-2LE" : "UCS-2BE";
diff --git a/src/value.hpp b/src/value.hpp
index ab815ed..4a7321e 100644
--- a/src/value.hpp
+++ b/src/value.hpp
@@ -611,11 +611,14 @@ namespace Exiv2 {
         std::string comment(const char* encoding =0) const;
         /*!
           @brief Determine the character encoding that was used to encode the
-              comment value as an iconv(3) name.
+              UNICODE comment value as an iconv(3) name.
 
-          Todo: Implement rules
+          If the comment \em c starts with a BOM, the BOM is interpreted and
+          removed from the string.
+
+          Todo: Implement rules to guess if the comment is UTF-8 encoded.
          */
-        const char* detectCharset() const;
+        const char* detectCharset(std::string& c) const;
         //! Return the Exif charset id of the comment
         CharsetId charsetId() const;
         //@}

-- 
exiv2 packaging



More information about the pkg-kde-commits mailing list