[osmium-tool] 30/44: Extend renumber subcommand: Can serialize mapping to disk.

Sebastiaan Couwenberg sebastic at moszumanska.debian.org
Tue Jul 21 20:15:56 UTC 2015


This is an automated email from the git hooks/post-receive script.

sebastic pushed a commit to tag v1.1.0
in repository osmium-tool.

commit db10ea6778cfdc174557e1fe0aa1f428c96c6e21
Author: Jochen Topf <jochen at topf.org>
Date:   Thu Jul 2 19:45:50 2015 +0200

    Extend renumber subcommand: Can serialize mapping to disk.
    
    This allows, for instance, to map a data file and a suitable change file.
---
 man/osmium-renumber.md          |  20 ++++++-
 src/command_renumber.cpp        | 114 +++++++++++++++++++++++++++++++++-------
 src/command_renumber.hpp        |  16 ++++--
 test/renumber/CMakeLists.txt    |  13 +++++
 test/renumber/input-change.osc  |  19 +++++++
 test/renumber/output-change.osc |  19 +++++++
 6 files changed, 176 insertions(+), 25 deletions(-)

diff --git a/man/osmium-renumber.md b/man/osmium-renumber.md
index 0151814..be4f48a 100644
--- a/man/osmium-renumber.md
+++ b/man/osmium-renumber.md
@@ -43,8 +43,16 @@ IDs.
 
 --generator=NAME
 :   The name and version of the program generating the output file. It will be
-    added to the header of the output file. Default is "*osmium/*" and the version
-    of osmium.
+    added to the header of the output file. Default is "*osmium/*" and the
+    version of osmium.
+
+-i, --index-directory=DIR
+:   Directory where the index files for mapping between old and news IDs are
+    read from and written to, respectively. Use this if you want to map IDs
+    in several OSM files. Without this option, the indexes are not read from
+    or written to disk. The directory must exist. Use '.' for the current
+    directory. The files written will be named `nodes.idx`, `ways.idx`, and
+    `relations.idx`.
 
 -o, --output=FILE
 :   Name of the output file. Default is '-' (*stdout*).
@@ -78,6 +86,14 @@ Renumber a PBF file and output to a compressed XML file:
 Renumbering Switzerland currently (summer 2015) takes only about a minute and
 needs a bit more than 2 GB RAM.
 
+Renumber an OSM file storing the indexes on disk:
+
+    osmium renumber -i. -o renumbered.osm data.osm
+
+then rewrite a change file, too:
+
+    osmium renumber -i. -o renumbered.osc changes.osc
+
 
 # SEE ALSO
 
diff --git a/src/command_renumber.cpp b/src/command_renumber.cpp
index a1288b9..baa71dd 100644
--- a/src/command_renumber.cpp
+++ b/src/command_renumber.cpp
@@ -20,14 +20,23 @@ along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
 */
 
+#include <algorithm>
+#include <fcntl.h>
 #include <iostream>
 #include <iterator>
+#include <map>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <vector>
 
 #include <boost/program_options.hpp>
 
 #include <osmium/index/index.hpp>
 #include <osmium/io/any_input.hpp>
 #include <osmium/io/any_output.hpp>
+#include <osmium/io/detail/read_write.hpp>
+#include <osmium/util/file.hpp>
+#include <osmium/util/memory_mapping.hpp>
 
 #include "command_renumber.hpp"
 
@@ -42,6 +51,7 @@ bool CommandRenumber::setup(const std::vector<std::string>& arguments) {
     ("output-format,f", po::value<std::string>(), "Format of output file")
     ("input-format,F", po::value<std::string>(), "Format of input files")
     ("generator", po::value<std::string>(), "Generator setting for file header")
+    ("index-directory,i", po::value<std::string>(), "Index directory")
     ("output-header", po::value<std::vector<std::string>>(), "Add output header")
     ("overwrite,O", "Allow existing output file to be overwritten")
     ;
@@ -72,6 +82,10 @@ bool CommandRenumber::setup(const std::vector<std::string>& arguments) {
         m_output_headers = vm["output-header"].as<std::vector<std::string>>();
     }
 
+    if (vm.count("index-directory")) {
+        m_index_directory = vm["index-directory"].as<std::string>();
+    }
+
     setup_input_file(vm);
     setup_output_file(vm);
 
@@ -87,44 +101,36 @@ bool CommandRenumber::setup(const std::vector<std::string>& arguments) {
     for (const auto& h : m_output_headers) {
         m_vout << "    " << h << "\n";
     }
+    m_vout << "  index directory: " << m_index_directory << "\n";
 
     return true;
 }
 
-osmium::object_id_type CommandRenumber::lookup(int n, osmium::object_id_type id) {
-    osmium::object_id_type result;
-
+osmium::object_id_type CommandRenumber::lookup(osmium::item_type type, osmium::object_id_type id) {
     try {
-        result = m_id_index[n].get(id);
-    } catch (osmium::not_found& e) {
-        m_id_index[n].set(id, ++m_last_id[n]);
-        result = m_last_id[n];
+        return index(type).at(id);
+    } catch (std::out_of_range&) {
+        index(type)[id] = ++last_id(type);
+        return last_id(type);
     }
-
-    return result;
 }
 
-
 void CommandRenumber::renumber(osmium::memory::Buffer& buffer) {
     for (auto it = buffer.begin<osmium::OSMObject>(); it != buffer.end<osmium::OSMObject>(); ++it) {
         switch (it->type()) {
             case osmium::item_type::node:
-                m_id_index[0].set(it->id(), ++m_last_id[0]);
-                it->set_id(m_last_id[0]);
+                it->set_id(lookup(osmium::item_type::node, it->id()));
                 break;
             case osmium::item_type::way:
-                m_id_index[1].set(it->id(), ++m_last_id[1]);
-                it->set_id(m_last_id[1]);
+                it->set_id(lookup(osmium::item_type::way, it->id()));
                 for (auto& ref : static_cast<osmium::Way&>(*it).nodes()) {
-                    ref.set_ref(lookup(0, ref.ref()));
+                    ref.set_ref(lookup(osmium::item_type::node, ref.ref()));
                 }
                 break;
             case osmium::item_type::relation:
-                it->set_id(m_id_index[2].get(it->id()));
+                it->set_id(lookup(osmium::item_type::relation, it->id()));
                 for (auto& member : static_cast<osmium::Relation&>(*it).members()) {
-                    int n = uint16_t(member.type()) - 1;
-                    assert(n >= 0 && n <= 2);
-                    member.set_ref(lookup(n, member.ref()));
+                    member.set_ref(lookup(member.type(), member.ref()));
                 }
                 break;
             default:
@@ -133,8 +139,68 @@ void CommandRenumber::renumber(osmium::memory::Buffer& buffer) {
     }
 }
 
+std::string CommandRenumber::filename(const std::string& name) {
+    return m_index_directory + "/" + name + ".idx";
+}
+
+remap_index_type& CommandRenumber::index(osmium::item_type type) {
+    return m_id_index[osmium::item_type_to_nwr_index(type)];
+}
+
+osmium::object_id_type& CommandRenumber::last_id(osmium::item_type type) {
+    return m_last_id[osmium::item_type_to_nwr_index(type)];
+}
+
+void CommandRenumber::read_index(osmium::item_type type, const std::string& name) {
+    std::string f { filename(name) };
+    int fd = ::open(f.c_str(), O_RDONLY);
+    if (fd < 0) {
+        // ignore if the file is not there
+        if (errno == ENOENT) {
+            return;
+        }
+        std::runtime_error(std::string("Can't open file '") + f + "': " + strerror(errno));
+    }
+
+    size_t file_size = osmium::util::file_size(fd);
+    if (file_size % sizeof(remap_index_type::value_type) == 0) {
+        std::runtime_error(std::string("index file '") + f + "' has wrong file size");
+    }
+    osmium::util::TypedMemoryMapping<remap_index_type::value_type> mapping(file_size / sizeof(remap_index_type::value_type), false, fd);
+    std::copy(mapping.begin(), mapping.end(), std::inserter(index(type), index(type).begin()));
+
+    last_id(type) = std::max_element(mapping.begin(), mapping.end())->second;
+
+    close(fd);
+}
+
+void CommandRenumber::write_index(osmium::item_type type, const std::string& name) {
+    std::string f { filename(name) };
+    int fd = ::open(f.c_str(), O_WRONLY | O_CREAT, 0666);
+    if (fd < 0) {
+        std::runtime_error(std::string("Can't open file '") + f + "': " + strerror(errno));
+    }
+
+    std::vector<remap_index_type::value_type> data;
+    std::copy(index(type).begin(), index(type).end(), std::back_inserter(data));
+    osmium::io::detail::reliable_write(fd, reinterpret_cast<const char*>(data.data()), sizeof(remap_index_type::value_type) * data.size());
+
+    close(fd);
+}
+
 bool CommandRenumber::run() {
     try {
+        if (!m_index_directory.empty()) {
+            m_vout << "Reading index files...\n";
+            read_index(osmium::item_type::node, "nodes");
+            read_index(osmium::item_type::way, "ways");
+            read_index(osmium::item_type::relation, "relations");
+
+            m_vout << "  Nodes     index contains " << index(osmium::item_type::node).size()     << " items\n";
+            m_vout << "  Ways      index contains " << index(osmium::item_type::way).size()      << " items\n";
+            m_vout << "  Relations index contains " << index(osmium::item_type::relation).size() << " items\n";
+        }
+
         m_vout << "First pass through input file (reading relations)...\n";
         osmium::io::Reader reader_pass1(m_input_file, osmium::osm_entity_bits::relation);
 
@@ -150,7 +216,7 @@ bool CommandRenumber::run() {
         osmium::io::InputIterator<osmium::io::Reader, osmium::Relation> end {};
 
         for (; it != end; ++it) {
-            m_id_index[2].set(it->id(), ++m_last_id[2]);
+            lookup(osmium::item_type::relation, it->id());
         }
 
         reader_pass1.close();
@@ -164,6 +230,14 @@ bool CommandRenumber::run() {
         reader_pass2.close();
 
         writer.close();
+
+        if (!m_index_directory.empty()) {
+            m_vout << "Writing index files...\n";
+            write_index(osmium::item_type::node, "nodes");
+            write_index(osmium::item_type::way, "ways");
+            write_index(osmium::item_type::relation, "relations");
+        }
+
     } catch (std::exception& e) {
         std::cerr << e.what() << "\n";
         return false;
diff --git a/src/command_renumber.hpp b/src/command_renumber.hpp
index 31ac347..8527ec0 100644
--- a/src/command_renumber.hpp
+++ b/src/command_renumber.hpp
@@ -23,21 +23,22 @@ along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
 */
 
+#include <map>
 #include <string>
 #include <vector>
 
-#include <osmium/index/map/sparse_mem_map.hpp>
 #include <osmium/memory/buffer.hpp>
 #include <osmium/osm/entity_bits.hpp>
 #include <osmium/osm/types.hpp>
 
 #include "osmc.hpp"
 
-typedef osmium::index::map::SparseMemMap<osmium::unsigned_object_id_type, osmium::unsigned_object_id_type> remap_index_type;
+typedef std::map<osmium::unsigned_object_id_type, osmium::unsigned_object_id_type> remap_index_type;
 
 class CommandRenumber : public Command, with_single_osm_input, with_osm_output {
 
     std::vector<std::string> m_output_headers;
+    std::string m_index_directory;
 
     remap_index_type m_id_index[3];
     osmium::object_id_type m_last_id[3] = {0, 0, 0};
@@ -48,10 +49,19 @@ public:
 
     bool setup(const std::vector<std::string>& arguments) override final;
 
-    osmium::object_id_type lookup(int n, osmium::object_id_type id);
+    osmium::object_id_type lookup(osmium::item_type type, osmium::object_id_type id);
 
     void renumber(osmium::memory::Buffer& buffer);
 
+    std::string filename(const std::string& name);
+
+    remap_index_type& index(osmium::item_type type);
+    osmium::object_id_type& last_id(osmium::item_type type);
+
+    void read_index(osmium::item_type type, const std::string& name);
+
+    void write_index(osmium::item_type type, const std::string& name);
+
     bool run() override final;
 
 }; // class CommandRenumber
diff --git a/test/renumber/CMakeLists.txt b/test/renumber/CMakeLists.txt
index 2da8098..f28978e 100644
--- a/test/renumber/CMakeLists.txt
+++ b/test/renumber/CMakeLists.txt
@@ -10,6 +10,19 @@ function(check_renumber _name _input _output)
     check_output(renumber-${_name} "renumber --generator=test -f osm renumber/${_input}" "renumber/${_output}")
 endfunction()
 
+function(check_renumber2 _name _in1 _in2 _out)
+    set(_idxdir "${PROJECT_BINARY_DIR}/test/renumber/index")
+    file(REMOVE_RECURSE ${_idxdir})
+    file(MAKE_DIRECTORY ${_idxdir})
+    check_output2(renumber-${_name}
+                  "renumber --index-directory=${_idxdir} --generator=test -f osm --overwrite -o /dev/null renumber/${_in1}"
+                  "renumber --index-directory=${_idxdir} --generator=test -f osc renumber/${_in2}"
+                  "renumber/${_out}"
+    )
+endfunction()
+
 check_renumber(sorted input-sorted.osm output-sorted.osm)
 
+check_renumber2(change input-sorted.osm input-change.osc output-change.osc)
+
 #-----------------------------------------------------------------------------
diff --git a/test/renumber/input-change.osc b/test/renumber/input-change.osc
new file mode 100644
index 0000000..0a32239
--- /dev/null
+++ b/test/renumber/input-change.osc
@@ -0,0 +1,19 @@
+<?xml version='1.0' encoding='UTF-8'?>
+<osmChange version="0.6" generator="testdata">
+  <modify>
+    <node id="11" version="2" timestamp="2015-01-01T01:00:00Z" uid="1" user="test" changeset="2" lat="2" lon="2"/>
+  </modify>
+  <delete>
+    <node id="13" version="2" timestamp="2015-01-01T01:00:00Z" uid="1" user="test" changeset="2" lat="4" lon="1"/>
+  </delete>
+  <create>
+    <node id="22" version="1" timestamp="2015-01-01T01:00:00Z" uid="1" user="test" changeset="2" lat="2" lon="2"/>
+  </create>
+  <modify>
+    <way id="21" version="2" timestamp="2015-01-01T01:00:00Z" uid="1" user="test" changeset="2">
+      <nd ref="12"/>
+      <nd ref="22"/>
+      <tag k="xyz" v="new"/>
+    </way>
+  </modify>
+</osmChange>
diff --git a/test/renumber/output-change.osc b/test/renumber/output-change.osc
new file mode 100644
index 0000000..5767d0b
--- /dev/null
+++ b/test/renumber/output-change.osc
@@ -0,0 +1,19 @@
+<?xml version='1.0' encoding='UTF-8'?>
+<osmChange version="0.6" generator="test">
+  <modify>
+    <node id="2" version="2" timestamp="2015-01-01T01:00:00Z" uid="1" user="test" changeset="2" lat="2" lon="2"/>
+  </modify>
+  <delete>
+    <node id="4" version="2" timestamp="2015-01-01T01:00:00Z" uid="1" user="test" changeset="2" lat="4" lon="1"/>
+  </delete>
+  <create>
+    <node id="5" version="1" timestamp="2015-01-01T01:00:00Z" uid="1" user="test" changeset="2" lat="2" lon="2"/>
+  </create>
+  <modify>
+    <way id="2" version="2" timestamp="2015-01-01T01:00:00Z" uid="1" user="test" changeset="2">
+      <nd ref="3"/>
+      <nd ref="5"/>
+      <tag k="xyz" v="new"/>
+    </way>
+  </modify>
+</osmChange>

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-grass/osmium-tool.git



More information about the Pkg-grass-devel mailing list