[pyosmium] 02/08: Imported Upstream version 2.7.1

Sebastiaan Couwenberg sebastic at moszumanska.debian.org
Wed Jun 1 15:43:50 UTC 2016


This is an automated email from the git hooks/post-receive script.

sebastic pushed a commit to branch master
in repository pyosmium.

commit 54e7a8caad772f5c46d3725c2bf05e0c998f9a3d
Author: Bas Couwenberg <sebastic at xs4all.nl>
Date:   Wed Jun 1 13:44:03 2016 +0200

    Imported Upstream version 2.7.1
---
 .travis.yml                       |  21 ++++
 CHANGELOG.md                      |  16 ++-
 doc/conf.py                       |   4 +-
 examples/filter_coastlines.py     |   2 +-
 examples/osm_replication_stats.py |  72 +++++++++++++
 examples/osm_url_stats.py         |  41 ++++++++
 lib/generic_handler.hpp           |  32 ++++--
 lib/merged_input.hpp              |  74 +++++++++++++
 lib/osm.cc                        |   1 +
 lib/osmium.cc                     |  30 +++++-
 lib/replication.cc                |  45 ++++++++
 osmium/replication/__init__.py    |   1 +
 osmium/replication/server.py      | 211 ++++++++++++++++++++++++++++++++++++++
 setup.py                          |   4 +-
 14 files changed, 541 insertions(+), 13 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 37dda0a..4535dbf 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -22,6 +22,22 @@ matrix:
         - os: linux
           compiler: gcc
           env: USE_PYTHON_VERSION=3
+        - os: osx
+          osx_image: xcode6.4
+          compiler: clang
+          env: USE_PYTHON_VERSION=2 PYTHON_SUFFIX= BOOST_PYTHON_OPTION=
+        - os: osx
+          osx_image: xcode6.4
+          compiler: clang
+          env: USE_PYTHON_VERSION=3 PYTHON_SUFFIX=3 BOOST_PYTHON_OPTION="--with-python3 --without-python"
+        - os: osx
+          osx_image: xcode7
+          compiler: clang
+          env: USE_PYTHON_VERSION=2 PYTHON_SUFFIX= BOOST_PYTHON_OPTION=
+        - os: osx
+          osx_image: xcode7
+          compiler: clang
+          env: USE_PYTHON_VERSION=3 PYTHON_SUFFIX=3 BOOST_PYTHON_OPTION="--with-python3 --without-python"
 
 # http://docs.travis-ci.com/user/apt/
 addons:
@@ -42,6 +58,11 @@ addons:
 
 install:
     - git clone --quiet --depth 1 https://github.com/osmcode/libosmium.git ../libosmium
+    - if [ "$TRAVIS_OS_NAME" = 'osx' ]; then
+          brew install python$PYTHON_SUFFIX google-sparsehash;
+          brew install boost-python $BOOST_PYTHON_OPTION;
+          pip$PYTHON_SUFFIX install -q nose;
+      fi
 
 script:
     - if [ "${CXX}" = 'g++' ]; then
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0c82122..400a09f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -13,6 +13,19 @@ This project adheres to [Semantic Versioning](http://semver.org/).
 ### Fixed
 
 
+## [2.7.1] - 2016-06-01
+
+### Added
+
+- `apply_buffer()` for handling in-memory data
+- MergeInputReader for reading and sorting multiple input files
+- Functions using replication change files to update an OSM file or database.
+
+### Changed
+
+- Use current libosmium
+
+
 ## [2.6.0] - 2016-02-04
 
 ### Added
@@ -66,7 +79,8 @@ This project adheres to [Semantic Versioning](http://semver.org/).
 
 - Exception not caught in test.
 
-[unreleased]: https://github.com/osmcode/pyosmium/compare/v2.6.0...HEAD
+[unreleased]: https://github.com/osmcode/pyosmium/compare/v2.7.1...HEAD
+[2.7.1]: https://github.com/osmcode/pyosmium/compare/v2.6.0...v2.7.1
 [2.6.0]: https://github.com/osmcode/pyosmium/compare/v2.5.4...v2.6.0
 [2.5.4]: https://github.com/osmcode/pyosmium/compare/v2.5.3...v2.5.4
 [2.5.3]: https://github.com/osmcode/pyosmium/compare/v2.4.1...v2.5.3
diff --git a/doc/conf.py b/doc/conf.py
index f100bbc..73e23b2 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -63,9 +63,9 @@ copyright = '2015-2016, Sarah Hoffmann'
 # built documents.
 #
 # The short X.Y version.
-version = '2.6'
+version = '2.7'
 # The full version, including alpha/beta/rc tags.
-release = '2.6.0'
+release = '2.7.1'
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
diff --git a/examples/filter_coastlines.py b/examples/filter_coastlines.py
index ced0ce3..4fda1d7 100644
--- a/examples/filter_coastlines.py
+++ b/examples/filter_coastlines.py
@@ -46,7 +46,7 @@ if __name__ == '__main__':
 
 
     # go through the ways to find all relevant nodes
-    ways = WayFilter(writer)
+    ways = WayFilter()
     ways.apply_file(sys.argv[1])
 
     # go through the file again and write out the data
diff --git a/examples/osm_replication_stats.py b/examples/osm_replication_stats.py
new file mode 100644
index 0000000..00edc33
--- /dev/null
+++ b/examples/osm_replication_stats.py
@@ -0,0 +1,72 @@
+"""
+Simple example that counts the number of changes on a replication server
+starting from a given timestamp for a maximum of n hours.
+
+Shows how to detect the different kind of modifications.
+"""
+import osmium as o
+import sys
+import datetime as dt
+import osmium.replication.server as rserv
+
+class Stats(object):
+
+    def __init__(self):
+        self.added = 0
+        self.modified = 0
+        self.deleted = 0
+
+    def add(self, o):
+        if o.deleted:
+            self.deleted += 1
+        elif o.version == 1:
+            self.added += 1
+        else:
+            self.modified += 1
+
+
+    def outstats(self, prefix):
+        print("%s added: %d" % (prefix, self.added))
+        print("%s modified: %d" % (prefix, self.modified))
+        print("%s deleted: %d" % (prefix, self.deleted))
+
+class FileStatsHandler(o.SimpleHandler):
+    def __init__(self):
+        o.SimpleHandler.__init__(self)
+        self.nodes = Stats()
+        self.ways = Stats()
+        self.rels = Stats()
+
+    def node(self, n):
+        self.nodes.add(n)
+
+    def way(self, w):
+        self.ways.add(w)
+
+    def relation(self, r):
+        self.rels.add(r)
+
+
+if __name__ == '__main__':
+    if len(sys.argv) != 4:
+        print("Usage: python osm_replication_stats.py <server_url> <start_time> <max kB>")
+        sys.exit(-1)
+
+    server_url = sys.argv[1]
+    start = dt.datetime.strptime(sys.argv[2], "%Y-%m-%dT%H:%M:%SZ")
+    if sys.version_info >= (3,0):
+        start = start.replace(tzinfo=dt.timezone.utc)
+    maxkb = min(int(sys.argv[3]), 10 * 1024)
+
+    repserv = rserv.ReplicationServer(server_url)
+
+    seqid = repserv.timestamp_to_sequence(start)
+    print("Initial sequence id:", seqid)
+
+    h = FileStatsHandler()
+    seqid = repserv.apply_diffs(h, seqid, maxkb)
+    print("Final sequence id:", seqid)
+
+    h.nodes.outstats("Nodes")
+    h.ways.outstats("Ways")
+    h.rels.outstats("Relations")
diff --git a/examples/osm_url_stats.py b/examples/osm_url_stats.py
new file mode 100644
index 0000000..0f87779
--- /dev/null
+++ b/examples/osm_url_stats.py
@@ -0,0 +1,41 @@
+"""
+This also creates stats over an OSM file, only the file is first read into
+a string buffer.
+
+Shows how to use input from strings.
+"""
+import osmium as o
+import sys
+import urllib.request
+
+class FileStatsHandler(o.SimpleHandler):
+    def __init__(self):
+        o.SimpleHandler.__init__(self)
+        self.nodes = 0
+        self.ways = 0
+        self.rels = 0
+
+    def node(self, n):
+        self.nodes += 1
+
+    def way(self, w):
+        self.ways += 1
+
+    def relation(self, r):
+        self.rels += 1
+
+
+if __name__ == '__main__':
+    if len(sys.argv) != 2:
+        print("Usage: python osm_url_stats.py <osmfile>")
+        sys.exit(-1)
+
+
+    data = urllib.request.urlopen(sys.argv[1]).read()
+
+    h = FileStatsHandler()
+    h.apply_buffer(data, sys.argv[1])
+
+    print("Nodes: %d" % h.nodes)
+    print("Ways: %d" % h.ways)
+    print("Relations: %d" % h.rels)
diff --git a/lib/generic_handler.hpp b/lib/generic_handler.hpp
index e7d0597..46a2958 100644
--- a/lib/generic_handler.hpp
+++ b/lib/generic_handler.hpp
@@ -7,6 +7,7 @@
 #include <osmium/handler/node_locations_for_ways.hpp>
 #include <osmium/index/map/all.hpp>
 #include <osmium/io/any_input.hpp>
+#include <osmium/io/file.hpp>
 #include <osmium/visitor.hpp>
 
 #include <boost/python.hpp>
@@ -58,21 +59,21 @@ void apply_with_area(osmium::io::Reader &r,
 
 
 protected:
-void apply(const std::string &filename, osmium::osm_entity_bits::type types,
+void apply(const osmium::io::File &file, osmium::osm_entity_bits::type types,
            pre_handler pre = no_handler,
            const std::string &idx = "sparse_mem_array") {
 
     switch (pre) {
     case no_handler:
         {
-            osmium::io::Reader reader(filename, types);
+            osmium::io::Reader reader(file, types);
             osmium::apply(reader, *this);
             reader.close();
             break;
         }
     case location_handler:
         {
-            osmium::io::Reader reader(filename, types);
+            osmium::io::Reader reader(file, types);
             apply_with_location(reader, idx);
             reader.close();
             break;
@@ -82,11 +83,11 @@ void apply(const std::string &filename, osmium::osm_entity_bits::type types,
             osmium::area::Assembler::config_type assembler_config;
             osmium::area::MultipolygonCollector<osmium::area::Assembler> collector(assembler_config);
 
-            osmium::io::Reader reader1(filename);
+            osmium::io::Reader reader1(file);
             collector.read_relations(reader1);
             reader1.close();
 
-            osmium::io::Reader reader2(filename);
+            osmium::io::Reader reader2(file);
             apply_with_area(reader2, collector, idx);
             reader2.close();
             break;
@@ -145,6 +146,25 @@ struct SimpleHandlerWrap: BaseHandler, wrapper<BaseHandler> {
     void apply_file(const std::string &filename, bool locations = false,
                     const std::string &idx = "sparse_mem_array")
     {
+        apply_object(osmium::io::File(filename), locations, idx);
+    }
+
+    void apply_buffer(const boost::python::object &buf, const boost::python::str &format,
+                      bool locations = false,
+                      const std::string &idx = "sparse_mem_array")
+    {
+        Py_buffer pybuf;
+        PyObject_GetBuffer(buf.ptr(), &pybuf, PyBUF_C_CONTIGUOUS);
+        size_t len = pybuf.len;
+        const char *cbuf = reinterpret_cast<const char *>(pybuf.buf);
+        const char *cfmt = boost::python::extract<const char *>(format);
+
+        apply_object(osmium::io::File(cbuf, len, cfmt), locations, idx);
+    }
+
+private:
+    void apply_object(osmium::io::File file, bool locations, const std::string &idx)
+    {
         osmium::osm_entity_bits::type entities = osmium::osm_entity_bits::nothing;
         BaseHandler::pre_handler handler = locations?
                                             BaseHandler::location_handler
@@ -166,7 +186,7 @@ struct SimpleHandlerWrap: BaseHandler, wrapper<BaseHandler> {
         if (this->get_override("changeset"))
             entities |= osmium::osm_entity_bits::changeset;
 
-        apply(filename, entities, handler, idx);
+        apply(file, entities, handler, idx);
     }
 };
 
diff --git a/lib/merged_input.hpp b/lib/merged_input.hpp
new file mode 100644
index 0000000..cffd70d
--- /dev/null
+++ b/lib/merged_input.hpp
@@ -0,0 +1,74 @@
+#ifndef PYOSMIUM_MERGED_INPUT_HPP
+#define PYOSMIUM_MERGED_INPUT_HPP
+
+#include <vector>
+
+#include <osmium/osm/object_comparisons.hpp>
+#include <osmium/io/any_input.hpp>
+#include <osmium/handler.hpp>
+#include <osmium/object_pointer_collection.hpp>
+#include <osmium/visitor.hpp>
+
+#include <boost/python.hpp>
+
+namespace pyosmium {
+
+class MergeInputReader {
+public:
+    void apply(BaseHandler& handler, bool simplify = true) {
+        if (simplify) {
+            objects.sort(osmium::object_order_type_id_reverse_version());
+            osmium::item_type prev_type = osmium::item_type::undefined;
+            osmium::object_id_type prev_id = 0;
+            for (const auto &item: objects) {
+                if (item.type() != prev_type || item.id() != prev_id) {
+                    prev_type = item.type();
+                    prev_id = item.id();
+                    osmium::apply_item(item, handler);
+                }
+            }
+        } else {
+            objects.sort(osmium::object_order_type_id_version());
+            osmium::apply(objects.cbegin(), objects.cend(), handler);
+        }
+
+        objects = osmium::ObjectPointerCollection();
+        changes.clear();
+
+    }
+
+    size_t add_file(const std::string &filename) {
+        return internal_add(osmium::io::File(filename));
+    }
+
+    size_t add_buffer(const boost::python::object &buf,
+                      const boost::python::str &format) {
+        Py_buffer pybuf;
+        PyObject_GetBuffer(buf.ptr(), &pybuf, PyBUF_C_CONTIGUOUS);
+        size_t len = (size_t) pybuf.len;
+        const char *cbuf = reinterpret_cast<const char *>(pybuf.buf);
+        const char *cfmt = boost::python::extract<const char *>(format);
+
+        return internal_add(osmium::io::File(cbuf, len, cfmt));
+    }
+
+private:
+    size_t internal_add(osmium::io::File change_file) {
+        size_t sz = 0;
+        osmium::io::Reader reader(change_file, osmium::osm_entity_bits::object);
+        while (osmium::memory::Buffer buffer = reader.read()) {
+            osmium::apply(buffer, objects);
+            sz += buffer.committed();
+            changes.push_back(std::move(buffer));
+        }
+
+        return sz;
+    }
+
+    std::vector<osmium::memory::Buffer> changes;
+    osmium::ObjectPointerCollection objects;
+};
+
+}
+
+#endif /* PYOSMIUM_MERGED_INPUT_HPP */
diff --git a/lib/osm.cc b/lib/osm.cc
index a038302..857e287 100644
--- a/lib/osm.cc
+++ b/lib/osm.cc
@@ -50,6 +50,7 @@ BOOST_PYTHON_MODULE(_osm)
 
     to_python_converter<osmium::Timestamp, Timestamp_to_python>();
     std_pair_to_python_converter<int, int>();
+    std_pair_to_python_converter<unsigned long, unsigned long>();
 
     enum_<osmium::osm_entity_bits::type>("osm_entity_bits")
         .value("NOTHING", osmium::osm_entity_bits::nothing)
diff --git a/lib/osmium.cc b/lib/osmium.cc
index 70d926a..a571124 100644
--- a/lib/osmium.cc
+++ b/lib/osmium.cc
@@ -6,6 +6,7 @@
 
 #include "generic_writer.hpp"
 #include "generic_handler.hpp"
+#include "merged_input.hpp"
 
 template <typename T>
 void apply_reader_simple(osmium::io::Reader &rd, T &h) {
@@ -97,6 +98,11 @@ BOOST_PYTHON_MODULE(_osmium)
              "the file will be scanned twice and a location handler and a\n"
              "handler for assembling multipolygones and areas from ways will\n"
              "be executed.")
+        .def("apply_buffer", &SimpleHandlerWrap::apply_buffer,
+              (arg("self"), arg("buffer"), arg("format"),
+               arg("locations")=false, arg("idx")="sparse_mem_array"),
+             "Apply the handler to a string buffer. The buffer must be a\n"
+             "byte string.")
     ;
     def("apply", &apply_reader_simple<BaseHandler>,
         "Apply a chain of handlers.");
@@ -109,7 +115,7 @@ BOOST_PYTHON_MODULE(_osmium)
         "exists. The file type to output is determined from the file extension. "
         "The second (optional) parameter is the buffer size. osmium caches the "
         "output data in an internal memory buffer before writing it on disk. This "
-        "parameter allows to change the default buffer size of 4MB. Larger buffers "
+        "parameter allows changing the default buffer size of 4MB. Larger buffers "
         "are normally better but you should be aware that there are normally multiple "
         "buffers in use during the write process.",
         init<const char*, unsigned long>())
@@ -136,4 +142,26 @@ BOOST_PYTHON_MODULE(_osmium)
              "strongly recommended to close the writer as soon as possible, so "
              "that the buffer memory can be freed.")
     ;
+
+    class_<pyosmium::MergeInputReader, boost::noncopyable>("MergeInputReader",
+        "Collects data from multiple input files and sorts and optionally "
+        "deduplicates the data before apllying it to a handler.")
+        .def("apply", &pyosmium::MergeInputReader::apply,
+            (arg("self"), arg("handler"), arg("simplify")=true),
+            "Apply collected data to a handler. The data will be sorted first. "
+            "If `simplify` is true (default) then duplicates will be eliminated "
+            "and only the newest version of each object kept. After the data "
+            "has been applied the buffer of the MergeInputReader is empty and "
+            "new data can be added for the next round of application.")
+        .def("add_file", &pyosmium::MergeInputReader::add_file,
+            (arg("self"), arg("file")),
+             "Add data from a file to the internal cache. The file type will be "
+             "determined from the file extension.")
+        .def("add_buffer", &pyosmium::MergeInputReader::add_buffer,
+             (arg("self"), arg("buffer"), arg("format")),
+             "Add data from a byte buffer. The format of the input data must "
+             "be given in the `format` argument as a string. The data will be "
+             "copied into internal buffers, so that the input buffer can be "
+             "safely discarded after the function has been called.")
+    ;
 }
diff --git a/lib/replication.cc b/lib/replication.cc
new file mode 100644
index 0000000..cb0e266
--- /dev/null
+++ b/lib/replication.cc
@@ -0,0 +1,45 @@
+#include <osmium/osm.hpp>
+#include <osmium/io/any_input.hpp>
+#include <osmium/handler.hpp>
+#include <osmium/visitor.hpp>
+
+#include <boost/python.hpp>
+
+namespace pyosmium {
+
+struct LastChangeHandler : public osmium::handler::Handler {
+    osmium::Timestamp last_change;
+
+    void osm_object(const osmium::OSMObject& obj) {
+        set(obj.timestamp());
+    }
+
+private:
+    void set(const osmium::Timestamp& ts) {
+        if (ts > last_change)
+            last_change = ts;
+    }
+};
+
+osmium::Timestamp compute_latest_change(const char* filename)
+{
+    osmium::io::Reader reader(filename, osmium::osm_entity_bits::node |
+                                        osmium::osm_entity_bits::way |
+                                        osmium::osm_entity_bits::relation);
+
+    LastChangeHandler handler;
+    osmium::apply(reader, handler);
+    reader.close();
+
+    return handler.last_change;
+}
+
+
+}
+
+BOOST_PYTHON_MODULE(_replication)
+{
+    using namespace boost::python;
+    def("newest_change_from_file", &pyosmium::compute_latest_change,
+        "Find the date of the newest change in a file");
+}
diff --git a/osmium/replication/__init__.py b/osmium/replication/__init__.py
new file mode 100644
index 0000000..7052631
--- /dev/null
+++ b/osmium/replication/__init__.py
@@ -0,0 +1 @@
+from ._replication import *
diff --git a/osmium/replication/server.py b/osmium/replication/server.py
new file mode 100644
index 0000000..d8421f2
--- /dev/null
+++ b/osmium/replication/server.py
@@ -0,0 +1,211 @@
+""" Helper functions to communicate with replication servers.
+"""
+
+import sys
+try:
+    import urllib.request as urlrequest
+except ImportError:
+    import urllib2 as urlrequest
+try:
+    import urllib.error as urlerror
+except ImportError:
+    import urllib2 as urlerror
+import datetime as dt
+from collections import namedtuple
+from math import ceil
+from osmium import MergeInputReader
+
+OsmosisState = namedtuple('OsmosisState', ['sequence', 'timestamp'])
+
+class ReplicationServer(object):
+    """ Represents a server that publishes replication data. Replication
+        change files allow to keep local OSM data up-to-date without downloading
+        the full dataset again.
+    """
+
+    def __init__(self, url, diff_type='osc.gz'):
+        self.baseurl = url
+        self.diff_type = diff_type
+
+    def apply_diffs(self, handler, start_id, max_size=1024, simplify=True):
+        """ Download diffs starting with sequence id `start_id`, merge them
+            together and then apply them to handler `handler`. `max_size`
+            restricts the number of diffs that are downloaded. The download
+            stops as soon as either a diff cannot be downloaded or the
+            unpacked data in memory exceeds `max_size` kB.
+
+            The function returns the sequence id of the last diff that was
+            downloaded or None if the download failed completely.
+        """
+        left_size = max_size * 1024
+        current_id = start_id
+
+        # must not read data newer than the published sequence id
+        # or we might end up reading partial data
+        newest = self.get_state_info()
+
+        if newest is None or current_id > newest.sequence:
+            return None
+
+        rd = MergeInputReader()
+
+        while left_size > 0 and current_id <= newest.sequence:
+            try:
+                diffdata = self.get_diff_block(current_id)
+            except:
+                diffdata = ''
+            if len(diffdata) == 0:
+                if start_id == current_id:
+                    return None
+                break
+
+            left_size -= rd.add_buffer(diffdata, self.diff_type)
+            current_id += 1
+
+        rd.apply(handler, simplify)
+
+        return current_id - 1
+
+
+    def timestamp_to_sequence(self, timestamp, balanced_search=False):
+        """ Get the sequence number of the replication file that contains the
+            given timestamp. The search algorithm is optimised for replication
+            servers that publish updates in regular intervals. For servers
+            with irregular change file publication dates 'balanced_search`
+            should be set to true so that a standard binary search for the
+            sequence will be used. The default is good for all known
+            OSM replication services.
+        """
+
+        # get the current timestamp from the server
+        upper = self.get_state_info()
+
+        if upper is None:
+            return None
+        if timestamp >= upper.timestamp or upper.sequence <= 0:
+            return upper.sequence
+
+        # find a state file that is before the required timestamp
+        lower = None
+        lowerid = 0
+        while lower is None:
+            lower = self.get_state_info(lowerid)
+
+            if lower is not None and lower.timestamp >= timestamp:
+                if lower.sequence == 0 or lower.sequence + 1 >= upper.sequence:
+                    return lower.sequence
+                upper = lower
+                lower = None
+                lowerid = 0
+
+            if lower is None:
+                # no lower yet, so try a higher id (binary search wise)
+                newid = int((lowerid + upper.sequence) / 2)
+                if newid <= lowerid:
+                    # nothing suitable found, so upper is probably the best we can do
+                    return upper.sequence
+                lowerid = newid
+
+        # Now do a binary search between upper and lower.
+        # We could be clever here and compute the most likely state file
+        # by interpolating over the timestamps but that creates a whole ton of
+        # special cases that need to be handled correctly.
+        while True:
+            if balanced_search:
+                base_splitid = int((lower.sequence + upper.sequence) / 2)
+            else:
+                ts_int = (upper.timestamp - lower.timestamp).total_seconds()
+                seq_int = upper.sequence - lower.sequence
+                goal = (timestamp - lower.timestamp).total_seconds()
+                base_splitid = lower.sequence + ceil(goal * seq_int / ts_int)
+                if base_splitid >= upper.sequence:
+                    base_splitid = upper.sequence - 1
+            split = self.get_state_info(base_splitid)
+
+            if split is None:
+                # file missing, search the next towards lower
+                splitid = base_splitid - 1
+                while split is None and splitid > lower.sequence:
+                    split = self.get_state_info(splitid)
+                    splitid -= 1
+            if split is None:
+                # still nothing? search towards upper
+                splitid = base_splitid + 1
+                while split is None and splitid < upper.sequence:
+                    split = self.get_state_info(splitid)
+                    splitid += 1
+            if split is None:
+                # still nothing? Then lower has to do
+                return lower.sequence
+
+            # set new boundary
+            if split.timestamp < timestamp:
+                lower = split
+            else:
+                upper = split
+
+            if lower.sequence + 1 >= upper.sequence:
+                return lower.sequence
+
+
+    def get_state_info(self, seq=None):
+        """ Downloads and returns the state information for the given
+            sequence. If the download is successful, a namedtuple with
+            `sequence` and `timestamp` is returned, otherwise the function
+            returns `None`.
+        """
+        try:
+            response = urlrequest.urlopen(self.get_state_url(seq))
+        except:
+            return None
+
+        ts = None
+        seq = None
+        line = response.readline()
+        while line:
+            line = line.decode('utf-8')
+            if '#' in line:
+                line = line[0:line.index('#')]
+            else:
+                line = line.strip()
+            if line:
+                key, val = line.split('=', 2)
+                if key == 'sequenceNumber':
+                    seq = int(val)
+                elif key == 'timestamp':
+                    ts = dt.datetime.strptime(val, "%Y-%m-%dT%H\\:%M\\:%SZ")
+                    if sys.version_info >= (3,0):
+                        ts = ts.replace(tzinfo=dt.timezone.utc)
+            line = response.readline()
+
+        return OsmosisState(sequence=seq, timestamp=ts)
+
+    def get_diff_block(self, seq):
+        """ Downloads the diff with the given sequence number and returns
+            it as a byte sequence. Throws a `urllib.error.HTTPError`
+            (or `urllib2.HTTPError` in python2)
+            if the file cannot be downloaded.
+        """
+        return urlrequest.urlopen(self.get_diff_url(seq)).read()
+
+
+    def get_state_url(self, seq):
+        """ Returns the URL of the state.txt files for a given sequence id.
+
+            If seq is `None` the URL for the latest state info is returned,
+            i.e. the state file in the root directory of the replication
+            service.
+        """
+        if seq is None:
+            return self.baseurl + '/state.txt'
+
+        return '%s/%03i/%03i/%03i.state.txt' % (self.baseurl,
+                     seq / 1000000, (seq % 1000000) / 1000, seq % 1000)
+
+
+    def get_diff_url(self, seq):
+        """ Returns the URL to the diff file for the given sequence id.
+        """
+        return '%s/%03i/%03i/%03i.%s' % (self.baseurl,
+                     seq / 1000000, (seq % 1000000) / 1000, seq % 1000,
+                     self.diff_type)
diff --git a/setup.py b/setup.py
index d97ae28..e3aa529 100644
--- a/setup.py
+++ b/setup.py
@@ -58,7 +58,7 @@ for ext in ('io', 'index', 'geom'):
            extra_compile_args = extra_compile_args
          ))
 
-for ext in ('osm', ):
+for ext in ('osm', 'replication'):
     extensions.append(Extension('osmium.%s._%s' % (ext, ext),
            sources = ['lib/%s.cc' % ext],
            include_dirs = includes,
@@ -70,7 +70,7 @@ for ext in ('osm', ):
     packages.append('osmium.%s' % ext)
 
 setup (name = 'pyosmium',
-       version = '2.6.0',
+       version = '2.7.1',
        description = 'Provides python bindings for libosmium.',
        packages = packages,
        ext_modules = extensions)

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-grass/pyosmium.git



More information about the Pkg-grass-devel mailing list