[osmium-tool] 13/44: Add check-refs subcommand to check referential integrity of OSM file.
Sebastiaan Couwenberg
sebastic at moszumanska.debian.org
Tue Jul 21 20:15:54 UTC 2015
This is an automated email from the git hooks/post-receive script.
sebastic pushed a commit to tag v1.1.0
in repository osmium-tool.
commit 951f28a14a655b134608c2146cbe7ef14aca36c3
Author: Jochen Topf <jochen at topf.org>
Date: Wed Jun 24 21:34:23 2015 +0200
Add check-refs subcommand to check referential integrity of OSM file.
---
CMakeLists.txt | 1 +
man/osmium-check-refs.md | 65 +++++++++
src/command_check_refs.cpp | 325 +++++++++++++++++++++++++++++++++++++++++++++
src/command_check_refs.hpp | 52 ++++++++
zsh_completion/_osmium | 19 ++-
5 files changed, 459 insertions(+), 3 deletions(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 7afcdd1..21c486f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -117,6 +117,7 @@ if(PANDOC)
add_man_page(1 osmium)
add_man_page(1 osmium-apply-changes)
add_man_page(1 osmium-cat)
+ add_man_page(1 osmium-check-refs)
add_man_page(1 osmium-fileinfo)
add_man_page(1 osmium-merge-changes)
add_man_page(1 osmium-time-filter)
diff --git a/man/osmium-check-refs.md b/man/osmium-check-refs.md
new file mode 100644
index 0000000..98c43f1
--- /dev/null
+++ b/man/osmium-check-refs.md
@@ -0,0 +1,65 @@
+
+# NAME
+
+osmium-check-refs - check referential integrity of OSM file
+
+
+# SYNOPSIS
+
+**osmium check-refs** \[*OPTIONS*\] *INPUT-FILE*
+
+
+# DESCRIPTION
+
+Ways in OSM files refer to OSM nodes; relations refer to nodes, ways, or other
+relations. This command checks whether all objects referenced in the input
+file are also present in the input file.
+
+Referential integrity is often broken in extracts. This can lead to problems
+with some uses of the OSM data. Use this command to make sure your data is
+good.
+
+This command will do the check in one pass through the input data. It needs
+enough main memory to store all temporary data. Largest memory need will be
+1 bit for each node ID, thats roughly 500 MB these days (Summer 2015).
+
+If the option -r is not given, this command will only check if all nodes
+references in ways are in the file, with the option, relations will also be
+checked.
+
+This command expects the input file to be ordered in the usual way: First
+nodes in order of ID, then ways in order of ID, then relations in order of ID.
+
+
+# OPTIONS
+
+-F, --input-format=FORMAT
+: The format of the input file. Can be used to set the input format if it
+ can't be autodetected from the file name. See **osmium-file-formats**(5)
+ or the libosmium manual for details.
+
+-i, --show-ids
+: Print all missing IDs to stdout. If you don't give this option, only a
+ summary is shown.
+
+-r, --check-relations
+: Also check referential integrity of relations. Without this option, only
+ nodes in ways are checked.
+
+-v, --verbose
+: Set verbose mode. The program will output information about what it is
+ doing to *stderr*.
+
+
+# DIAGNOSTICS
+
+**osmium check-refs** exits with code 2 if there was a problem with the command
+line arguments, code 0 if all referenced nodes (with -r: all objects) are in
+the file, and with exit code 1 otherwise.
+
+
+# SEE ALSO
+
+* [Osmium website](http://osmcode.org/osmium)
+
+
diff --git a/src/command_check_refs.cpp b/src/command_check_refs.cpp
new file mode 100644
index 0000000..5287ece
--- /dev/null
+++ b/src/command_check_refs.cpp
@@ -0,0 +1,325 @@
+/*
+
+Osmium -- OpenStreetMap data manipulation command line tool
+http://osmcode.org/osmium
+
+Copyright (C) 2013-2015 Jochen Topf <jochen at topf.org>
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+*/
+
+#include <algorithm>
+#include <iostream>
+#include <iterator>
+#include <set>
+#include <vector>
+
+#include <boost/program_options.hpp>
+
+#include <osmium/io/any_input.hpp>
+#include <osmium/io/any_output.hpp>
+
+#include "command_check_refs.hpp"
+
+bool CommandCheckRefs::setup(const std::vector<std::string>& arguments) {
+ namespace po = boost::program_options;
+ po::variables_map vm;
+ try {
+ po::options_description cmdline("Allowed options");
+ cmdline.add_options()
+ ("verbose,v", "Set verbose mode")
+ ("show-ids,i", "Show IDs of missing objects")
+ ("input-format,F", po::value<std::string>(), "Format of input files")
+ ("check-relations,r", "Also check relations")
+ ;
+
+ po::options_description hidden("Hidden options");
+ hidden.add_options()
+ ("input-filename", po::value<std::string>(), "Input file")
+ ;
+
+ po::options_description desc("Allowed options");
+ desc.add(cmdline).add(hidden);
+
+ po::positional_options_description positional;
+ positional.add("input-filename", 1);
+
+ po::store(po::command_line_parser(arguments).options(desc).positional(positional).run(), vm);
+ po::notify(vm);
+
+ if (vm.count("verbose")) {
+ m_vout.verbose(true);
+ }
+
+ if (vm.count("show-ids")) {
+ m_show_ids = true;
+ }
+
+ if (vm.count("input-filename")) {
+ m_input_filename = vm["input-filename"].as<std::string>();
+ }
+
+ if (vm.count("input-format")) {
+ m_input_format = vm["input-format"].as<std::string>();
+ }
+
+ if (vm.count("check-relations")) {
+ m_check_relations = true;
+ }
+
+ } catch (boost::program_options::error& e) {
+ std::cerr << "Error parsing command line: " << e.what() << std::endl;
+ return false;
+ }
+
+ m_vout << "Started osmium check-refs\n";
+
+ m_vout << "Command line options and default settings:\n";
+ m_vout << " input filename: " << m_input_filename << "\n";
+ m_vout << " input format: " << m_input_format << "\n";
+ m_vout << " show ids: " << (m_show_ids ? "yes\n" : "no\n");
+ m_vout << " check relations: " << (m_check_relations ? "yes\n" : "no\n");
+
+ if ((m_input_filename == "-" || m_input_filename == "") && m_input_format.empty()) {
+ std::cerr << "When reading from STDIN you need to use the --input-format,F option to declare the file format.\n";
+ return false;
+ }
+
+ if (m_input_format.empty()) {
+ m_input_file = osmium::io::File(m_input_filename);
+ } else {
+ m_input_file = osmium::io::File(m_input_filename, m_input_format);
+ }
+
+ return true;
+}
+
+
+/*
+ * Small wrapper class around std::vector<bool> that make sure the vector is
+ * always large enough for the data we are putting in.
+ */
+class bitsvec {
+
+ std::vector<bool> m_bits;
+
+public:
+
+ bitsvec() :
+ m_bits() {
+ }
+
+ void set(osmium::object_id_type id) {
+ osmium::unsigned_object_id_type pid = std::abs(id);
+
+ if (m_bits.size() <= pid) {
+ m_bits.resize(pid + 1024 * 1024);
+ }
+
+ m_bits[pid] = true;
+ }
+
+ bool get(osmium::object_id_type id) const {
+ osmium::unsigned_object_id_type pid = std::abs(id);
+
+ return pid < m_bits.size() && m_bits[pid];
+ }
+
+}; // class bitsvec
+
+class RefCheckHandler : public osmium::handler::Handler {
+
+ bitsvec m_nodes;
+ bitsvec m_ways;
+
+ std::vector<uint32_t> m_relation_ids;
+ std::set<uint32_t> m_member_relation_ids;
+ std::vector<uint32_t> m_missing_relation_ids;
+
+ uint64_t m_node_count = 0;
+ uint64_t m_way_count = 0;
+ uint64_t m_relation_count = 0;
+
+ uint64_t m_missing_nodes_in_ways = 0;
+ uint64_t m_missing_nodes_in_relations = 0;
+ uint64_t m_missing_ways_in_relations = 0;
+
+ osmium::util::VerboseOutput& m_vout;
+ bool m_show_ids;
+ bool m_check_relations;
+ bool m_relations_done = false;
+
+public:
+
+ RefCheckHandler(osmium::util::VerboseOutput& vout, bool show_ids, bool check_relations) :
+ m_vout(vout),
+ m_show_ids(show_ids),
+ m_check_relations(check_relations) {
+ }
+
+ uint64_t node_count() const {
+ return m_node_count;
+ }
+
+ uint64_t way_count() const {
+ return m_way_count;
+ }
+
+ uint64_t relation_count() const {
+ return m_relation_count;
+ }
+
+ uint64_t missing_nodes_in_ways() const {
+ return m_missing_nodes_in_ways;
+ }
+
+ uint64_t missing_nodes_in_relations() const {
+ return m_missing_nodes_in_relations;
+ }
+
+ uint64_t missing_ways_in_relations() const {
+ return m_missing_ways_in_relations;
+ }
+
+ uint64_t missing_relations_in_relations() {
+ if (!m_relations_done) {
+ std::sort(m_relation_ids.begin(), m_relation_ids.end());
+
+ std::set_difference(m_member_relation_ids.cbegin(), m_member_relation_ids.cend(),
+ m_relation_ids.cbegin(), m_relation_ids.cend(),
+ std::back_inserter(m_missing_relation_ids));
+
+ m_relations_done = true;
+ }
+
+ return m_missing_relation_ids.size();
+ }
+
+ bool any_errors() {
+ return missing_nodes_in_ways() > 0 ||
+ missing_nodes_in_relations() > 0 ||
+ missing_ways_in_relations() > 0 ||
+ missing_relations_in_relations() > 0;
+ }
+
+ void node(const osmium::Node& node) {
+ if (m_node_count == 0) {
+ m_vout << "Reading nodes...\n";
+ }
+ ++m_node_count;
+
+ m_nodes.set(node.id());
+ }
+
+ void way(const osmium::Way& way) {
+ if (m_way_count == 0) {
+ m_vout << "Reading ways...\n";
+ }
+ ++m_way_count;
+
+ if (m_check_relations) {
+ m_ways.set(way.id());
+ }
+
+ for (const auto& node_ref : way.nodes()) {
+ if (!m_nodes.get(node_ref.ref())) {
+ ++m_missing_nodes_in_ways;
+ if (m_show_ids) {
+ std::cout << "n" << node_ref.ref() << " in w" << way.id() << "\n";
+ }
+ }
+ }
+ }
+
+ void relation(const osmium::Relation& relation) {
+ if (m_relation_count == 0) {
+ m_vout << "Reading relations...\n";
+ }
+ ++m_relation_count;
+
+ if (m_check_relations) {
+ m_relation_ids.push_back(uint32_t(relation.id()));
+ for (const auto& member : relation.members()) {
+ switch (member.type()) {
+ case osmium::item_type::node:
+ if (!m_nodes.get(member.ref())) {
+ ++m_missing_nodes_in_relations;
+ m_nodes.set(member.ref());
+ if (m_show_ids) {
+ std::cout << "n" << member.ref() << " in r" << relation.id() << "\n";
+ }
+ }
+ break;
+ case osmium::item_type::way:
+ if (!m_ways.get(member.ref())) {
+ ++m_missing_ways_in_relations;
+ m_ways.set(member.ref());
+ if (m_show_ids) {
+ std::cout << "w" << member.ref() << " in r" << relation.id() << "\n";
+ }
+ }
+ break;
+ case osmium::item_type::relation:
+ m_member_relation_ids.insert(uint32_t(relation.id()));
+ break;
+ default:
+ break;
+ }
+ }
+ }
+ }
+
+ void show_missing_relation_ids() {
+ for (auto id : m_missing_relation_ids) {
+ std::cout << "r" << id << " in r\n";
+ }
+ }
+
+}; // class RefCheckHandler
+
+bool CommandCheckRefs::run() {
+ osmium::io::Reader reader(m_input_file);
+
+ RefCheckHandler handler(m_vout, m_show_ids, m_check_relations);
+ osmium::apply(reader, handler);
+
+ std::cerr << "There are " << handler.node_count() << " nodes, " << handler.way_count() << " ways, and " << handler.relation_count() << " relations in this file.\n";
+
+ if (m_check_relations) {
+ std::cerr << "Nodes in ways missing: " << handler.missing_nodes_in_ways() << "\n";
+ std::cerr << "Nodes in relations missing: " << handler.missing_nodes_in_relations() << "\n";
+ std::cerr << "Ways in relations missing: " << handler.missing_ways_in_relations() << "\n";
+ std::cerr << "Relations in relations missing: " << handler.missing_relations_in_relations() << "\n";
+ } else {
+ std::cerr << "Nodes in ways missing: " << handler.missing_nodes_in_ways() << "\n";
+ }
+
+ if (m_show_ids) {
+ handler.show_missing_relation_ids();
+ }
+
+ m_vout << "Done.\n";
+
+ return !handler.any_errors();
+}
+
+namespace {
+
+ const bool register_check_refs_command = CommandFactory::add("check-refs", "Check referential integrity of an OSM file", []() {
+ return new CommandCheckRefs();
+ });
+
+}
+
diff --git a/src/command_check_refs.hpp b/src/command_check_refs.hpp
new file mode 100644
index 0000000..ccfddc9
--- /dev/null
+++ b/src/command_check_refs.hpp
@@ -0,0 +1,52 @@
+#ifndef COMMAND_CHECK_REFS_HPP
+#define COMMAND_CHECK_REFS_HPP
+
+/*
+
+Osmium -- OpenStreetMap data manipulation command line tool
+http://osmcode.org/osmium
+
+Copyright (C) 2013-2015 Jochen Topf <jochen at topf.org>
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+*/
+
+#include <string>
+
+#include <osmium/io/file.hpp>
+
+#include "osmc.hpp"
+
+class CommandCheckRefs : public Command {
+
+ std::string m_input_filename = "-"; // default: stdin
+ std::string m_input_format;
+ osmium::io::File m_input_file;
+
+ bool m_show_ids = false;
+ bool m_check_relations = false;
+
+public:
+
+ CommandCheckRefs() = default;
+
+ bool setup(const std::vector<std::string>& arguments) override final;
+
+ bool run() override final;
+
+}; // class CommandCheckRefs
+
+
+#endif // COMMAND_CHECK_REFS_HPP
diff --git a/zsh_completion/_osmium b/zsh_completion/_osmium
index 1aa92c6..a7f58d4 100644
--- a/zsh_completion/_osmium
+++ b/zsh_completion/_osmium
@@ -4,7 +4,7 @@
#
# To test this file:
# 1) Comment out last line
-# 2) Call: . ./zsh_osmium
+# 2) Call: . ./_osmium
# 3) Call: compdef _osmium osmium
#
# To read more about what is happening here:
@@ -17,7 +17,7 @@ osmium_file_glob="'*.(osm|osh|osc|pbf|osm.pbf) *.(osm|osh|osc).(bz2|gz)'"
_osmium() {
local -a osmium_commands
- osmium_commands=(apply-changes cat fileinfo help merge-changes time-filter)
+ osmium_commands=(apply-changes cat check-refs fileinfo help merge-changes time-filter)
if (( CURRENT > 2 )); then
# Remember the subcommand name
local cmd=${words[2]}
@@ -72,6 +72,19 @@ _osmium-cat() {
"*::input OSM file:_files -g ${osmium_file_glob}"
}
+_osmium-check-refs() {
+ _arguments : \
+ '(-F)--input-format=[format of input OSM file]:OSM file format:_osmium_file_formats' \
+ '(--input-format)-F=[format of input OSM file]:OSM file format:_osmium_file_formats' \
+ '(--show-ids)-i[show ids of missing objects]' \
+ '(-i)--show-ids[show ids of missing objects]' \
+ '(--check-relations)-r[also check referential integrity of relations]' \
+ '(-r)--check-relations[also check referential integrity of relations]' \
+ '(--verbose)-v[set verbose mode]' \
+ '(-v)--verbose[set verbose mode]' \
+ "*::input OSM file:_files -g ${osmium_file_glob}"
+}
+
_osmium-fileinfo() {
_arguments : \
'(--extended)-e[show extended info (reads entire file)]' \
@@ -141,7 +154,7 @@ _osmium_object_type() {
_osmium-help() {
local -a osmium_help_topics
- osmium_help_topics=(apply-changes cat fileinfo help merge-changes time-filter file-formats)
+ osmium_help_topics=(apply-changes cat check-refs fileinfo help merge-changes time-filter file-formats)
_describe -t osmium-help-topics 'osmium help topics' osmium_help_topics
}
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-grass/osmium-tool.git
More information about the Pkg-grass-devel
mailing list