[Python-modules-commits] [python-feather-format] 01/04: Import python-feather-format_0.2.0.orig.tar.gz
ChangZhuo Chen
czchen at moszumanska.debian.org
Thu May 5 11:16:16 UTC 2016
This is an automated email from the git hooks/post-receive script.
czchen pushed a commit to tag debian/0.2.0-1
in repository python-feather-format.
commit cd9b82c0fbb37d69c71ded220a7a7fe6680a964c
Author: ChangZhuo Chen (陳昌倬) <czchen at debian.org>
Date: Thu May 5 18:23:21 2016 +0800
Import python-feather-format_0.2.0.orig.tar.gz
---
PKG-INFO | 28 ++++++++++++++++++----------
README.md | 26 +++++++++++++++++---------
feather/api.py | 4 +++-
feather/interop.h | 27 ++++++++++++++++++++++++---
feather/version.py | 2 +-
feather_format.egg-info/PKG-INFO | 28 ++++++++++++++++++----------
setup.py | 4 ++--
src/feather/metadata.cc | 22 ++++++++++++++++++----
src/feather/metadata.fbs | 6 ++++++
src/feather/metadata.h | 2 ++
src/feather/metadata_generated.h | 21 ++++++++++++++++++---
src/feather/reader.cc | 4 ++++
src/feather/reader.h | 2 ++
src/feather/tests/metadata-test.cc | 5 +++++
14 files changed, 138 insertions(+), 43 deletions(-)
diff --git a/PKG-INFO b/PKG-INFO
index 84bee28..411aa7d 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 1.1
Name: feather-format
-Version: 0.1.2
+Version: 0.2.0
Summary: Python interface to the Apache Arrow-based Feather File Format
Home-page: http://github.com/wesm/feather
Author: Wes McKinney
@@ -10,6 +10,23 @@ Description: ## Python interface to the Apache Arrow-based Feather File Format
Feather efficiently stores pandas DataFrame objects on disk.
+ ## Installing
+
+ ```shell
+ pip install feather-format
+ ```
+
+ #### Mac notes
+
+ Anaconda uses a default 10.5 deployment target which does not have C++11
+ properly available. This can be fixed by setting:
+
+ ```
+ export MACOSX_DEPLOYMENT_TARGET=10.10
+ ```
+
+ This may be necessary in some other OS X environments.
+
## Build
Building Feather requires a C++11 compiler. We've simplified the PyPI packaging
@@ -52,15 +69,6 @@ Description: ## Python interface to the Apache Arrow-based Feather File Format
* Row indexes
* Object-type columns with non-homogeneous data
- ## Mac notes
-
- Anaconda uses a default 10.5 deployment target which does not have C++11
- properly available. This can be fixed by setting:
-
- ```
- export MACOSX_DEPLOYMENT_TARGET=10.10
- ```
-
Platform: UNKNOWN
Classifier: Development Status :: 3 - Alpha
Classifier: Environment :: Console
diff --git a/README.md b/README.md
index dd9a748..a916ea0 100644
--- a/README.md
+++ b/README.md
@@ -2,6 +2,23 @@
Feather efficiently stores pandas DataFrame objects on disk.
+## Installing
+
+```shell
+pip install feather-format
+```
+
+#### Mac notes
+
+Anaconda uses a default 10.5 deployment target which does not have C++11
+properly available. This can be fixed by setting:
+
+```
+export MACOSX_DEPLOYMENT_TARGET=10.10
+```
+
+This may be necessary in some other OS X environments.
+
## Build
Building Feather requires a C++11 compiler. We've simplified the PyPI packaging
@@ -43,12 +60,3 @@ Some features of pandas are not supported in Feather:
* Non-string column names
* Row indexes
* Object-type columns with non-homogeneous data
-
-## Mac notes
-
-Anaconda uses a default 10.5 deployment target which does not have C++11
-properly available. This can be fixed by setting:
-
-```
-export MACOSX_DEPLOYMENT_TARGET=10.10
-```
diff --git a/feather/api.py b/feather/api.py
index 95ea54e..a04e449 100644
--- a/feather/api.py
+++ b/feather/api.py
@@ -51,9 +51,11 @@ def read_dataframe(path, columns=None):
# TODO(wesm): pipeline conversion to Arrow memory layout
data = {}
+ names = []
for i in range(reader.num_columns):
name, arr = reader.read_array(i)
data[name] = arr
+ names.append(name)
# TODO(wesm):
- return pd.DataFrame(data)
+ return pd.DataFrame(data, columns=names)
diff --git a/feather/interop.h b/feather/interop.h
index 4c94efb..2c53626 100644
--- a/feather/interop.h
+++ b/feather/interop.h
@@ -62,13 +62,16 @@ NPY_INT_DECL(INT8, INT8, int8_t);
NPY_INT_DECL(INT16, INT16, int16_t);
NPY_INT_DECL(INT32, INT32, int32_t);
NPY_INT_DECL(INT64, INT64, int64_t);
-NPY_INT_DECL(LONGLONG, INT64, int64_t);
NPY_INT_DECL(UINT8, UINT8, uint8_t);
NPY_INT_DECL(UINT16, UINT16, uint16_t);
NPY_INT_DECL(UINT32, UINT32, uint32_t);
NPY_INT_DECL(UINT64, UINT64, uint64_t);
+
+#if NPY_INT64 != NPY_LONGLONG
+NPY_INT_DECL(LONGLONG, INT64, int64_t);
NPY_INT_DECL(ULONGLONG, UINT64, uint64_t);
+#endif
template <>
struct npy_traits<NPY_FLOAT32> {
@@ -411,24 +414,42 @@ Status pandas_masked_to_primitive(PyObject* ao, PyObject* mo,
return Status::Invalid("only handle 1-dimensional arrays");
}
- switch(PyArray_DESCR(arr)->type_num) {
+ int type_num = PyArray_DESCR(arr)->type_num;
+
+#if (NPY_INT64 == NPY_LONGLONG) && (NPY_SIZEOF_LONGLONG == 8)
+ // GH #129, on i386 / Apple Python, both LONGLONG and INT64 can be observed
+ // in the wild, which is buggy. We set U/LONGLONG to U/INT64 so things work
+ // properly.
+ if (type_num == NPY_LONGLONG) {
+ type_num = NPY_INT64;
+ }
+ if (type_num == NPY_ULONGLONG) {
+ type_num = NPY_UINT64;
+ }
+#endif
+
+ switch(type_num) {
TO_FEATHER_CASE(BOOL);
TO_FEATHER_CASE(INT8);
TO_FEATHER_CASE(INT16);
TO_FEATHER_CASE(INT32);
TO_FEATHER_CASE(INT64);
+#if (NPY_INT64 != NPY_LONGLONG)
TO_FEATHER_CASE(LONGLONG);
+#endif
TO_FEATHER_CASE(UINT8);
TO_FEATHER_CASE(UINT16);
TO_FEATHER_CASE(UINT32);
TO_FEATHER_CASE(UINT64);
+#if (NPY_UINT64 != NPY_ULONGLONG)
TO_FEATHER_CASE(ULONGLONG);
+#endif
TO_FEATHER_CASE(FLOAT32);
TO_FEATHER_CASE(FLOAT64);
TO_FEATHER_CASE(OBJECT);
default:
std::stringstream ss;
- ss << "unsupported type " << PyArray_DESCR(arr)->type_num
+ ss << "unsupported type " << type_num
<< std::endl;
return Status::Invalid(ss.str());
}
diff --git a/feather/version.py b/feather/version.py
index 7c21425..df4ca77 100644
--- a/feather/version.py
+++ b/feather/version.py
@@ -1,4 +1,4 @@
# THIS FILE IS GENERATED FROM SETUP.PY
-version = '0.1.2'
+version = '0.2.0'
isrelease = 'True'
\ No newline at end of file
diff --git a/feather_format.egg-info/PKG-INFO b/feather_format.egg-info/PKG-INFO
index 84bee28..411aa7d 100644
--- a/feather_format.egg-info/PKG-INFO
+++ b/feather_format.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 1.1
Name: feather-format
-Version: 0.1.2
+Version: 0.2.0
Summary: Python interface to the Apache Arrow-based Feather File Format
Home-page: http://github.com/wesm/feather
Author: Wes McKinney
@@ -10,6 +10,23 @@ Description: ## Python interface to the Apache Arrow-based Feather File Format
Feather efficiently stores pandas DataFrame objects on disk.
+ ## Installing
+
+ ```shell
+ pip install feather-format
+ ```
+
+ #### Mac notes
+
+ Anaconda uses a default 10.5 deployment target which does not have C++11
+ properly available. This can be fixed by setting:
+
+ ```
+ export MACOSX_DEPLOYMENT_TARGET=10.10
+ ```
+
+ This may be necessary in some other OS X environments.
+
## Build
Building Feather requires a C++11 compiler. We've simplified the PyPI packaging
@@ -52,15 +69,6 @@ Description: ## Python interface to the Apache Arrow-based Feather File Format
* Row indexes
* Object-type columns with non-homogeneous data
- ## Mac notes
-
- Anaconda uses a default 10.5 deployment target which does not have C++11
- properly available. This can be fixed by setting:
-
- ```
- export MACOSX_DEPLOYMENT_TARGET=10.10
- ```
-
Platform: UNKNOWN
Classifier: Development Status :: 3 - Alpha
Classifier: Environment :: Console
diff --git a/setup.py b/setup.py
index c42b7ed..9a5dba7 100644
--- a/setup.py
+++ b/setup.py
@@ -33,8 +33,8 @@ if Cython.__version__ < '0.19.1':
raise Exception('Please upgrade to Cython 0.19.1 or newer')
MAJOR = 0
-MINOR = 1
-MICRO = 2
+MINOR = 2
+MICRO = 0
VERSION = '%d.%d.%d' % (MAJOR, MINOR, MICRO)
ISRELEASED = True
diff --git a/src/feather/metadata.cc b/src/feather/metadata.cc
index 6b37c17..f059945 100644
--- a/src/feather/metadata.cc
+++ b/src/feather/metadata.cc
@@ -26,8 +26,10 @@ namespace metadata {
typedef flatbuffers::FlatBufferBuilder FBB;
+using FBString = flatbuffers::Offset<flatbuffers::String>;
+
// Flatbuffers conveniences
-typedef std::vector<flatbuffers::Offset<fbs::Column> > ColumnVector;
+using ColumnVector = std::vector<flatbuffers::Offset<fbs::Column>>;
// ----------------------------------------------------------------------
// Primitive array
@@ -124,6 +126,8 @@ fbs::TypeMetadata ToFlatbufferEnum(ColumnType::type column_type) {
// ----------------------------------------------------------------------
// TableBuilder
+static constexpr int FEATHER_VERSION = 1;
+
class TableBuilder::Impl {
public:
explicit Impl(int64_t num_rows) :
@@ -138,14 +142,19 @@ class TableBuilder::Impl {
if (finished_) {
return Status::Invalid("can only call this once");
}
- flatbuffers::Offset<flatbuffers::String> desc = 0;
+
+ FBString desc = 0;
if (!description_.empty()) {
desc = fbb_.CreateString(description_);
}
- auto root = fbs::CreateCTable(fbb_, desc,
+ flatbuffers::Offset<flatbuffers::String> metadata = 0;
+
+ auto root = fbs::CreateCTable(fbb_,
+ desc,
num_rows_,
- fbb_.CreateVector(columns_));
+ fbb_.CreateVector(columns_),
+ FEATHER_VERSION, metadata);
fbb_.Finish(root);
finished_ = true;
@@ -402,6 +411,11 @@ int64_t Table::num_rows() const {
return table->num_rows();
}
+int Table::version() const {
+ const fbs::CTable* table = static_cast<const fbs::CTable*>(table_);
+ return table->version();
+}
+
size_t Table::num_columns() const {
const fbs::CTable* table = static_cast<const fbs::CTable*>(table_);
return table->columns()->size();
diff --git a/src/feather/metadata.fbs b/src/feather/metadata.fbs
index b36104e..a6e00fd 100644
--- a/src/feather/metadata.fbs
+++ b/src/feather/metadata.fbs
@@ -112,6 +112,12 @@ table CTable {
num_rows: long;
columns: [Column];
+
+ /// Version number of the Feather format
+ version: int;
+
+ /// Table metadata (likely JSON), not yet used
+ metadata: string;
}
root_type CTable;
diff --git a/src/feather/metadata.h b/src/feather/metadata.h
index fc1a286..6451f75 100644
--- a/src/feather/metadata.h
+++ b/src/feather/metadata.h
@@ -153,6 +153,8 @@ class Table {
std::string description() const;
+ int version() const;
+
// Optional
bool has_description() const;
diff --git a/src/feather/metadata_generated.h b/src/feather/metadata_generated.h
index 86bf23e..d214174 100644
--- a/src/feather/metadata_generated.h
+++ b/src/feather/metadata_generated.h
@@ -365,12 +365,18 @@ struct CTable FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
enum {
VT_DESCRIPTION = 4,
VT_NUM_ROWS = 6,
- VT_COLUMNS = 8
+ VT_COLUMNS = 8,
+ VT_VERSION = 10,
+ VT_METADATA = 12
};
/// Some text (or a name) metadata about what the file is, optional
const flatbuffers::String *description() const { return GetPointer<const flatbuffers::String *>(VT_DESCRIPTION); }
int64_t num_rows() const { return GetField<int64_t>(VT_NUM_ROWS, 0); }
const flatbuffers::Vector<flatbuffers::Offset<Column>> *columns() const { return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<Column>> *>(VT_COLUMNS); }
+ /// Version number of the Feather format
+ int32_t version() const { return GetField<int32_t>(VT_VERSION, 0); }
+ /// Table metadata (likely JSON), not yet used
+ const flatbuffers::String *metadata() const { return GetPointer<const flatbuffers::String *>(VT_METADATA); }
bool Verify(flatbuffers::Verifier &verifier) const {
return VerifyTableStart(verifier) &&
VerifyField<flatbuffers::uoffset_t>(verifier, VT_DESCRIPTION) &&
@@ -379,6 +385,9 @@ struct CTable FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
VerifyField<flatbuffers::uoffset_t>(verifier, VT_COLUMNS) &&
verifier.Verify(columns()) &&
verifier.VerifyVectorOfTables(columns()) &&
+ VerifyField<int32_t>(verifier, VT_VERSION) &&
+ VerifyField<flatbuffers::uoffset_t>(verifier, VT_METADATA) &&
+ verifier.Verify(metadata()) &&
verifier.EndTable();
}
};
@@ -389,10 +398,12 @@ struct CTableBuilder {
void add_description(flatbuffers::Offset<flatbuffers::String> description) { fbb_.AddOffset(CTable::VT_DESCRIPTION, description); }
void add_num_rows(int64_t num_rows) { fbb_.AddElement<int64_t>(CTable::VT_NUM_ROWS, num_rows, 0); }
void add_columns(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Column>>> columns) { fbb_.AddOffset(CTable::VT_COLUMNS, columns); }
+ void add_version(int32_t version) { fbb_.AddElement<int32_t>(CTable::VT_VERSION, version, 0); }
+ void add_metadata(flatbuffers::Offset<flatbuffers::String> metadata) { fbb_.AddOffset(CTable::VT_METADATA, metadata); }
CTableBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) { start_ = fbb_.StartTable(); }
CTableBuilder &operator=(const CTableBuilder &);
flatbuffers::Offset<CTable> Finish() {
- auto o = flatbuffers::Offset<CTable>(fbb_.EndTable(start_, 3));
+ auto o = flatbuffers::Offset<CTable>(fbb_.EndTable(start_, 5));
return o;
}
};
@@ -400,9 +411,13 @@ struct CTableBuilder {
inline flatbuffers::Offset<CTable> CreateCTable(flatbuffers::FlatBufferBuilder &_fbb,
flatbuffers::Offset<flatbuffers::String> description = 0,
int64_t num_rows = 0,
- flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Column>>> columns = 0) {
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Column>>> columns = 0,
+ int32_t version = 0,
+ flatbuffers::Offset<flatbuffers::String> metadata = 0) {
CTableBuilder builder_(_fbb);
builder_.add_num_rows(num_rows);
+ builder_.add_metadata(metadata);
+ builder_.add_version(version);
builder_.add_columns(columns);
builder_.add_description(description);
return builder_.Finish();
diff --git a/src/feather/reader.cc b/src/feather/reader.cc
index 334aa27..bd792d6 100644
--- a/src/feather/reader.cc
+++ b/src/feather/reader.cc
@@ -82,6 +82,10 @@ std::string TableReader::GetDescription() const {
return metadata_.description();
}
+int TableReader::version() const {
+ return metadata_.version();
+}
+
int64_t TableReader::num_rows() const {
return metadata_.num_rows();
}
diff --git a/src/feather/reader.h b/src/feather/reader.h
index cc8bc9a..ff9a9f6 100644
--- a/src/feather/reader.h
+++ b/src/feather/reader.h
@@ -150,6 +150,8 @@ class TableReader {
std::string GetDescription() const;
bool HasDescription() const;
+ int version() const;
+
int64_t num_rows() const;
int64_t num_columns() const;
diff --git a/src/feather/tests/metadata-test.cc b/src/feather/tests/metadata-test.cc
index 4bee466..986e731 100644
--- a/src/feather/tests/metadata-test.cc
+++ b/src/feather/tests/metadata-test.cc
@@ -43,6 +43,11 @@ class TestTableBuilder : public ::testing::Test {
};
+TEST_F(TestTableBuilder, Version) {
+ Finish();
+ ASSERT_EQ(1, table_->version());
+}
+
TEST_F(TestTableBuilder, EmptyTable) {
Finish();
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/python-modules/packages/python-feather-format.git
More information about the Python-modules-commits
mailing list