[Python-modules-commits] [python-feather-format] 01/04: Import python-feather-format_0.2.0.orig.tar.gz

ChangZhuo Chen czchen at moszumanska.debian.org
Thu May 5 11:16:16 UTC 2016


This is an automated email from the git hooks/post-receive script.

czchen pushed a commit to tag debian/0.2.0-1
in repository python-feather-format.

commit cd9b82c0fbb37d69c71ded220a7a7fe6680a964c
Author: ChangZhuo Chen (陳昌倬) <czchen at debian.org>
Date:   Thu May 5 18:23:21 2016 +0800

    Import python-feather-format_0.2.0.orig.tar.gz
---
 PKG-INFO                           | 28 ++++++++++++++++++----------
 README.md                          | 26 +++++++++++++++++---------
 feather/api.py                     |  4 +++-
 feather/interop.h                  | 27 ++++++++++++++++++++++++---
 feather/version.py                 |  2 +-
 feather_format.egg-info/PKG-INFO   | 28 ++++++++++++++++++----------
 setup.py                           |  4 ++--
 src/feather/metadata.cc            | 22 ++++++++++++++++++----
 src/feather/metadata.fbs           |  6 ++++++
 src/feather/metadata.h             |  2 ++
 src/feather/metadata_generated.h   | 21 ++++++++++++++++++---
 src/feather/reader.cc              |  4 ++++
 src/feather/reader.h               |  2 ++
 src/feather/tests/metadata-test.cc |  5 +++++
 14 files changed, 138 insertions(+), 43 deletions(-)

diff --git a/PKG-INFO b/PKG-INFO
index 84bee28..411aa7d 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
 Metadata-Version: 1.1
 Name: feather-format
-Version: 0.1.2
+Version: 0.2.0
 Summary: Python interface to the Apache Arrow-based Feather File Format
 Home-page: http://github.com/wesm/feather
 Author: Wes McKinney
@@ -10,6 +10,23 @@ Description: ## Python interface to the Apache Arrow-based Feather File Format
         
         Feather efficiently stores pandas DataFrame objects on disk.
         
+        ## Installing
+        
+        ```shell
+        pip install feather-format
+        ```
+        
+        #### Mac notes
+        
+        Anaconda uses a default 10.5 deployment target which does not have C++11
+        properly available. This can be fixed by setting:
+        
+        ```
+        export MACOSX_DEPLOYMENT_TARGET=10.10
+        ```
+        
+        This may be necessary in some other OS X environments.
+        
         ## Build
         
         Building Feather requires a C++11 compiler. We've simplified the PyPI packaging
@@ -52,15 +69,6 @@ Description: ## Python interface to the Apache Arrow-based Feather File Format
         * Row indexes
         * Object-type columns with non-homogeneous data
         
-        ## Mac notes
-        
-        Anaconda uses a default 10.5 deployment target which does not have C++11
-        properly available. This can be fixed by setting:
-        
-        ```
-        export MACOSX_DEPLOYMENT_TARGET=10.10
-        ```
-        
 Platform: UNKNOWN
 Classifier: Development Status :: 3 - Alpha
 Classifier: Environment :: Console
diff --git a/README.md b/README.md
index dd9a748..a916ea0 100644
--- a/README.md
+++ b/README.md
@@ -2,6 +2,23 @@
 
 Feather efficiently stores pandas DataFrame objects on disk.
 
+## Installing
+
+```shell
+pip install feather-format
+```
+
+#### Mac notes
+
+Anaconda uses a default 10.5 deployment target which does not have C++11
+properly available. This can be fixed by setting:
+
+```
+export MACOSX_DEPLOYMENT_TARGET=10.10
+```
+
+This may be necessary in some other OS X environments.
+
 ## Build
 
 Building Feather requires a C++11 compiler. We've simplified the PyPI packaging
@@ -43,12 +60,3 @@ Some features of pandas are not supported in Feather:
 * Non-string column names
 * Row indexes
 * Object-type columns with non-homogeneous data
-
-## Mac notes
-
-Anaconda uses a default 10.5 deployment target which does not have C++11
-properly available. This can be fixed by setting:
-
-```
-export MACOSX_DEPLOYMENT_TARGET=10.10
-```
diff --git a/feather/api.py b/feather/api.py
index 95ea54e..a04e449 100644
--- a/feather/api.py
+++ b/feather/api.py
@@ -51,9 +51,11 @@ def read_dataframe(path, columns=None):
 
     # TODO(wesm): pipeline conversion to Arrow memory layout
     data = {}
+    names = []
     for i in range(reader.num_columns):
         name, arr = reader.read_array(i)
         data[name] = arr
+        names.append(name)
 
     # TODO(wesm):
-    return pd.DataFrame(data)
+    return pd.DataFrame(data, columns=names)
diff --git a/feather/interop.h b/feather/interop.h
index 4c94efb..2c53626 100644
--- a/feather/interop.h
+++ b/feather/interop.h
@@ -62,13 +62,16 @@ NPY_INT_DECL(INT8, INT8, int8_t);
 NPY_INT_DECL(INT16, INT16, int16_t);
 NPY_INT_DECL(INT32, INT32, int32_t);
 NPY_INT_DECL(INT64, INT64, int64_t);
-NPY_INT_DECL(LONGLONG, INT64, int64_t);
 
 NPY_INT_DECL(UINT8, UINT8, uint8_t);
 NPY_INT_DECL(UINT16, UINT16, uint16_t);
 NPY_INT_DECL(UINT32, UINT32, uint32_t);
 NPY_INT_DECL(UINT64, UINT64, uint64_t);
+
+#if NPY_INT64 != NPY_LONGLONG
+NPY_INT_DECL(LONGLONG, INT64, int64_t);
 NPY_INT_DECL(ULONGLONG, UINT64, uint64_t);
+#endif
 
 template <>
 struct npy_traits<NPY_FLOAT32> {
@@ -411,24 +414,42 @@ Status pandas_masked_to_primitive(PyObject* ao, PyObject* mo,
     return Status::Invalid("only handle 1-dimensional arrays");
   }
 
-  switch(PyArray_DESCR(arr)->type_num) {
+  int type_num = PyArray_DESCR(arr)->type_num;
+
+#if (NPY_INT64 == NPY_LONGLONG) && (NPY_SIZEOF_LONGLONG == 8)
+  // GH #129, on i386 / Apple Python, both LONGLONG and INT64 can be observed
+  // in the wild, which is buggy. We set U/LONGLONG to U/INT64 so things work
+  // properly.
+  if (type_num == NPY_LONGLONG) {
+    type_num = NPY_INT64;
+  }
+  if (type_num == NPY_ULONGLONG) {
+    type_num = NPY_UINT64;
+  }
+#endif
+
+  switch(type_num) {
     TO_FEATHER_CASE(BOOL);
     TO_FEATHER_CASE(INT8);
     TO_FEATHER_CASE(INT16);
     TO_FEATHER_CASE(INT32);
     TO_FEATHER_CASE(INT64);
+#if (NPY_INT64 != NPY_LONGLONG)
     TO_FEATHER_CASE(LONGLONG);
+#endif
     TO_FEATHER_CASE(UINT8);
     TO_FEATHER_CASE(UINT16);
     TO_FEATHER_CASE(UINT32);
     TO_FEATHER_CASE(UINT64);
+#if (NPY_UINT64 != NPY_ULONGLONG)
     TO_FEATHER_CASE(ULONGLONG);
+#endif
     TO_FEATHER_CASE(FLOAT32);
     TO_FEATHER_CASE(FLOAT64);
     TO_FEATHER_CASE(OBJECT);
     default:
       std::stringstream ss;
-      ss << "unsupported type " << PyArray_DESCR(arr)->type_num
+      ss << "unsupported type " << type_num
          << std::endl;
       return Status::Invalid(ss.str());
   }
diff --git a/feather/version.py b/feather/version.py
index 7c21425..df4ca77 100644
--- a/feather/version.py
+++ b/feather/version.py
@@ -1,4 +1,4 @@
 
 # THIS FILE IS GENERATED FROM SETUP.PY
-version = '0.1.2'
+version = '0.2.0'
 isrelease = 'True'
\ No newline at end of file
diff --git a/feather_format.egg-info/PKG-INFO b/feather_format.egg-info/PKG-INFO
index 84bee28..411aa7d 100644
--- a/feather_format.egg-info/PKG-INFO
+++ b/feather_format.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
 Metadata-Version: 1.1
 Name: feather-format
-Version: 0.1.2
+Version: 0.2.0
 Summary: Python interface to the Apache Arrow-based Feather File Format
 Home-page: http://github.com/wesm/feather
 Author: Wes McKinney
@@ -10,6 +10,23 @@ Description: ## Python interface to the Apache Arrow-based Feather File Format
         
         Feather efficiently stores pandas DataFrame objects on disk.
         
+        ## Installing
+        
+        ```shell
+        pip install feather-format
+        ```
+        
+        #### Mac notes
+        
+        Anaconda uses a default 10.5 deployment target which does not have C++11
+        properly available. This can be fixed by setting:
+        
+        ```
+        export MACOSX_DEPLOYMENT_TARGET=10.10
+        ```
+        
+        This may be necessary in some other OS X environments.
+        
         ## Build
         
         Building Feather requires a C++11 compiler. We've simplified the PyPI packaging
@@ -52,15 +69,6 @@ Description: ## Python interface to the Apache Arrow-based Feather File Format
         * Row indexes
         * Object-type columns with non-homogeneous data
         
-        ## Mac notes
-        
-        Anaconda uses a default 10.5 deployment target which does not have C++11
-        properly available. This can be fixed by setting:
-        
-        ```
-        export MACOSX_DEPLOYMENT_TARGET=10.10
-        ```
-        
 Platform: UNKNOWN
 Classifier: Development Status :: 3 - Alpha
 Classifier: Environment :: Console
diff --git a/setup.py b/setup.py
index c42b7ed..9a5dba7 100644
--- a/setup.py
+++ b/setup.py
@@ -33,8 +33,8 @@ if Cython.__version__ < '0.19.1':
     raise Exception('Please upgrade to Cython 0.19.1 or newer')
 
 MAJOR = 0
-MINOR = 1
-MICRO = 2
+MINOR = 2
+MICRO = 0
 VERSION = '%d.%d.%d' % (MAJOR, MINOR, MICRO)
 ISRELEASED = True
 
diff --git a/src/feather/metadata.cc b/src/feather/metadata.cc
index 6b37c17..f059945 100644
--- a/src/feather/metadata.cc
+++ b/src/feather/metadata.cc
@@ -26,8 +26,10 @@ namespace metadata {
 
 typedef flatbuffers::FlatBufferBuilder FBB;
 
+using FBString = flatbuffers::Offset<flatbuffers::String>;
+
 // Flatbuffers conveniences
-typedef std::vector<flatbuffers::Offset<fbs::Column> > ColumnVector;
+using ColumnVector = std::vector<flatbuffers::Offset<fbs::Column>>;
 
 // ----------------------------------------------------------------------
 // Primitive array
@@ -124,6 +126,8 @@ fbs::TypeMetadata ToFlatbufferEnum(ColumnType::type column_type) {
 // ----------------------------------------------------------------------
 // TableBuilder
 
+static constexpr int FEATHER_VERSION = 1;
+
 class TableBuilder::Impl {
  public:
   explicit Impl(int64_t num_rows) :
@@ -138,14 +142,19 @@ class TableBuilder::Impl {
     if (finished_) {
       return Status::Invalid("can only call this once");
     }
-    flatbuffers::Offset<flatbuffers::String> desc = 0;
+
+    FBString desc = 0;
     if (!description_.empty()) {
       desc = fbb_.CreateString(description_);
     }
 
-    auto root = fbs::CreateCTable(fbb_, desc,
+    flatbuffers::Offset<flatbuffers::String> metadata = 0;
+
+    auto root = fbs::CreateCTable(fbb_,
+        desc,
         num_rows_,
-        fbb_.CreateVector(columns_));
+        fbb_.CreateVector(columns_),
+        FEATHER_VERSION, metadata);
     fbb_.Finish(root);
     finished_ = true;
 
@@ -402,6 +411,11 @@ int64_t Table::num_rows() const {
   return table->num_rows();
 }
 
+int Table::version() const {
+  const fbs::CTable* table = static_cast<const fbs::CTable*>(table_);
+  return table->version();
+}
+
 size_t Table::num_columns() const {
   const fbs::CTable* table = static_cast<const fbs::CTable*>(table_);
   return table->columns()->size();
diff --git a/src/feather/metadata.fbs b/src/feather/metadata.fbs
index b36104e..a6e00fd 100644
--- a/src/feather/metadata.fbs
+++ b/src/feather/metadata.fbs
@@ -112,6 +112,12 @@ table CTable {
 
   num_rows: long;
   columns: [Column];
+
+  /// Version number of the Feather format
+  version: int;
+
+  /// Table metadata (likely JSON), not yet used
+  metadata: string;
 }
 
 root_type CTable;
diff --git a/src/feather/metadata.h b/src/feather/metadata.h
index fc1a286..6451f75 100644
--- a/src/feather/metadata.h
+++ b/src/feather/metadata.h
@@ -153,6 +153,8 @@ class Table {
 
   std::string description() const;
 
+  int version() const;
+
   // Optional
   bool has_description() const;
 
diff --git a/src/feather/metadata_generated.h b/src/feather/metadata_generated.h
index 86bf23e..d214174 100644
--- a/src/feather/metadata_generated.h
+++ b/src/feather/metadata_generated.h
@@ -365,12 +365,18 @@ struct CTable FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
   enum {
     VT_DESCRIPTION = 4,
     VT_NUM_ROWS = 6,
-    VT_COLUMNS = 8
+    VT_COLUMNS = 8,
+    VT_VERSION = 10,
+    VT_METADATA = 12
   };
   /// Some text (or a name) metadata about what the file is, optional
   const flatbuffers::String *description() const { return GetPointer<const flatbuffers::String *>(VT_DESCRIPTION); }
   int64_t num_rows() const { return GetField<int64_t>(VT_NUM_ROWS, 0); }
   const flatbuffers::Vector<flatbuffers::Offset<Column>> *columns() const { return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<Column>> *>(VT_COLUMNS); }
+  /// Version number of the Feather format
+  int32_t version() const { return GetField<int32_t>(VT_VERSION, 0); }
+  /// Table metadata (likely JSON), not yet used
+  const flatbuffers::String *metadata() const { return GetPointer<const flatbuffers::String *>(VT_METADATA); }
   bool Verify(flatbuffers::Verifier &verifier) const {
     return VerifyTableStart(verifier) &&
            VerifyField<flatbuffers::uoffset_t>(verifier, VT_DESCRIPTION) &&
@@ -379,6 +385,9 @@ struct CTable FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
            VerifyField<flatbuffers::uoffset_t>(verifier, VT_COLUMNS) &&
            verifier.Verify(columns()) &&
            verifier.VerifyVectorOfTables(columns()) &&
+           VerifyField<int32_t>(verifier, VT_VERSION) &&
+           VerifyField<flatbuffers::uoffset_t>(verifier, VT_METADATA) &&
+           verifier.Verify(metadata()) &&
            verifier.EndTable();
   }
 };
@@ -389,10 +398,12 @@ struct CTableBuilder {
   void add_description(flatbuffers::Offset<flatbuffers::String> description) { fbb_.AddOffset(CTable::VT_DESCRIPTION, description); }
   void add_num_rows(int64_t num_rows) { fbb_.AddElement<int64_t>(CTable::VT_NUM_ROWS, num_rows, 0); }
   void add_columns(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Column>>> columns) { fbb_.AddOffset(CTable::VT_COLUMNS, columns); }
+  void add_version(int32_t version) { fbb_.AddElement<int32_t>(CTable::VT_VERSION, version, 0); }
+  void add_metadata(flatbuffers::Offset<flatbuffers::String> metadata) { fbb_.AddOffset(CTable::VT_METADATA, metadata); }
   CTableBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) { start_ = fbb_.StartTable(); }
   CTableBuilder &operator=(const CTableBuilder &);
   flatbuffers::Offset<CTable> Finish() {
-    auto o = flatbuffers::Offset<CTable>(fbb_.EndTable(start_, 3));
+    auto o = flatbuffers::Offset<CTable>(fbb_.EndTable(start_, 5));
     return o;
   }
 };
@@ -400,9 +411,13 @@ struct CTableBuilder {
 inline flatbuffers::Offset<CTable> CreateCTable(flatbuffers::FlatBufferBuilder &_fbb,
    flatbuffers::Offset<flatbuffers::String> description = 0,
    int64_t num_rows = 0,
-   flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Column>>> columns = 0) {
+   flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Column>>> columns = 0,
+   int32_t version = 0,
+   flatbuffers::Offset<flatbuffers::String> metadata = 0) {
   CTableBuilder builder_(_fbb);
   builder_.add_num_rows(num_rows);
+  builder_.add_metadata(metadata);
+  builder_.add_version(version);
   builder_.add_columns(columns);
   builder_.add_description(description);
   return builder_.Finish();
diff --git a/src/feather/reader.cc b/src/feather/reader.cc
index 334aa27..bd792d6 100644
--- a/src/feather/reader.cc
+++ b/src/feather/reader.cc
@@ -82,6 +82,10 @@ std::string TableReader::GetDescription() const {
   return metadata_.description();
 }
 
+int TableReader::version() const {
+  return metadata_.version();
+}
+
 int64_t TableReader::num_rows() const {
   return metadata_.num_rows();
 }
diff --git a/src/feather/reader.h b/src/feather/reader.h
index cc8bc9a..ff9a9f6 100644
--- a/src/feather/reader.h
+++ b/src/feather/reader.h
@@ -150,6 +150,8 @@ class TableReader {
   std::string GetDescription() const;
   bool HasDescription() const;
 
+  int version() const;
+
   int64_t num_rows() const;
   int64_t num_columns() const;
 
diff --git a/src/feather/tests/metadata-test.cc b/src/feather/tests/metadata-test.cc
index 4bee466..986e731 100644
--- a/src/feather/tests/metadata-test.cc
+++ b/src/feather/tests/metadata-test.cc
@@ -43,6 +43,11 @@ class TestTableBuilder : public ::testing::Test {
 };
 
 
+TEST_F(TestTableBuilder, Version) {
+  Finish();
+  ASSERT_EQ(1, table_->version());
+}
+
 TEST_F(TestTableBuilder, EmptyTable) {
   Finish();
 

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/python-modules/packages/python-feather-format.git



More information about the Python-modules-commits mailing list