[osmium-tool] 13/44: Add check-refs subcommand to check referential integrity of OSM file.

Sebastiaan Couwenberg sebastic at moszumanska.debian.org
Tue Jul 21 20:15:54 UTC 2015


This is an automated email from the git hooks/post-receive script.

sebastic pushed a commit to tag v1.1.0
in repository osmium-tool.

commit 951f28a14a655b134608c2146cbe7ef14aca36c3
Author: Jochen Topf <jochen at topf.org>
Date:   Wed Jun 24 21:34:23 2015 +0200

    Add check-refs subcommand to check referential integrity of OSM file.
---
 CMakeLists.txt             |   1 +
 man/osmium-check-refs.md   |  65 +++++++++
 src/command_check_refs.cpp | 325 +++++++++++++++++++++++++++++++++++++++++++++
 src/command_check_refs.hpp |  52 ++++++++
 zsh_completion/_osmium     |  19 ++-
 5 files changed, 459 insertions(+), 3 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 7afcdd1..21c486f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -117,6 +117,7 @@ if(PANDOC)
     add_man_page(1 osmium)
     add_man_page(1 osmium-apply-changes)
     add_man_page(1 osmium-cat)
+    add_man_page(1 osmium-check-refs)
     add_man_page(1 osmium-fileinfo)
     add_man_page(1 osmium-merge-changes)
     add_man_page(1 osmium-time-filter)
diff --git a/man/osmium-check-refs.md b/man/osmium-check-refs.md
new file mode 100644
index 0000000..98c43f1
--- /dev/null
+++ b/man/osmium-check-refs.md
@@ -0,0 +1,65 @@
+
+# NAME
+
+osmium-check-refs - check referential integrity of OSM file
+
+
+# SYNOPSIS
+
+**osmium check-refs** \[*OPTIONS*\] *INPUT-FILE*
+
+
+# DESCRIPTION
+
+Ways in OSM files refer to OSM nodes; relations refer to nodes, ways, or other
+relations. This command checks whether all objects referenced in the input
+file are also present in the input file.
+
+Referential integrity is often broken in extracts. This can lead to problems
+with some uses of the OSM data. Use this command to make sure your data is
+good.
+
+This command will do the check in one pass through the input data. It needs
+enough main memory to store all temporary data. Largest memory need will be
+1 bit for each node ID, thats roughly 500 MB these days (Summer 2015).
+
+If the option -r is not given, this command will only check if all nodes
+references in ways are in the file, with the option, relations will also be
+checked.
+
+This command expects the input file to be ordered in the usual way: First
+nodes in order of ID, then ways in order of ID, then relations in order of ID.
+
+
+# OPTIONS
+
+-F, --input-format=FORMAT
+:   The format of the input file. Can be used to set the input format if it
+    can't be autodetected from the file name. See **osmium-file-formats**(5)
+    or the libosmium manual for details.
+
+-i, --show-ids
+:   Print all missing IDs to stdout. If you don't give this option, only a
+    summary is shown.
+
+-r, --check-relations
+:   Also check referential integrity of relations. Without this option, only
+    nodes in ways are checked.
+
+-v, --verbose
+:   Set verbose mode. The program will output information about what it is
+    doing to *stderr*.
+
+
+# DIAGNOSTICS
+
+**osmium check-refs** exits with code 2 if there was a problem with the command
+line arguments, code 0 if all referenced nodes (with -r: all objects) are in
+the file, and with exit code 1 otherwise.
+
+
+# SEE ALSO
+
+* [Osmium website](http://osmcode.org/osmium)
+
+
diff --git a/src/command_check_refs.cpp b/src/command_check_refs.cpp
new file mode 100644
index 0000000..5287ece
--- /dev/null
+++ b/src/command_check_refs.cpp
@@ -0,0 +1,325 @@
+/*
+
+Osmium -- OpenStreetMap data manipulation command line tool
+http://osmcode.org/osmium
+
+Copyright (C) 2013-2015  Jochen Topf <jochen at topf.org>
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+*/
+
+#include <algorithm>
+#include <iostream>
+#include <iterator>
+#include <set>
+#include <vector>
+
+#include <boost/program_options.hpp>
+
+#include <osmium/io/any_input.hpp>
+#include <osmium/io/any_output.hpp>
+
+#include "command_check_refs.hpp"
+
+bool CommandCheckRefs::setup(const std::vector<std::string>& arguments) {
+    namespace po = boost::program_options;
+    po::variables_map vm;
+    try {
+        po::options_description cmdline("Allowed options");
+        cmdline.add_options()
+        ("verbose,v", "Set verbose mode")
+        ("show-ids,i", "Show IDs of missing objects")
+        ("input-format,F", po::value<std::string>(), "Format of input files")
+        ("check-relations,r", "Also check relations")
+        ;
+
+        po::options_description hidden("Hidden options");
+        hidden.add_options()
+        ("input-filename", po::value<std::string>(), "Input file")
+        ;
+
+        po::options_description desc("Allowed options");
+        desc.add(cmdline).add(hidden);
+
+        po::positional_options_description positional;
+        positional.add("input-filename", 1);
+
+        po::store(po::command_line_parser(arguments).options(desc).positional(positional).run(), vm);
+        po::notify(vm);
+
+        if (vm.count("verbose")) {
+            m_vout.verbose(true);
+        }
+
+        if (vm.count("show-ids")) {
+            m_show_ids = true;
+        }
+
+        if (vm.count("input-filename")) {
+            m_input_filename = vm["input-filename"].as<std::string>();
+        }
+
+        if (vm.count("input-format")) {
+            m_input_format = vm["input-format"].as<std::string>();
+        }
+
+        if (vm.count("check-relations")) {
+            m_check_relations = true;
+        }
+
+    } catch (boost::program_options::error& e) {
+        std::cerr << "Error parsing command line: " << e.what() << std::endl;
+        return false;
+    }
+
+    m_vout << "Started osmium check-refs\n";
+
+    m_vout << "Command line options and default settings:\n";
+    m_vout << "  input filename: " << m_input_filename << "\n";
+    m_vout << "  input format: " << m_input_format << "\n";
+    m_vout << "  show ids: " << (m_show_ids ? "yes\n" : "no\n");
+    m_vout << "  check relations: " << (m_check_relations ? "yes\n" : "no\n");
+
+    if ((m_input_filename == "-" || m_input_filename == "") && m_input_format.empty()) {
+        std::cerr << "When reading from STDIN you need to use the --input-format,F option to declare the file format.\n";
+        return false;
+    }
+
+    if (m_input_format.empty()) {
+        m_input_file = osmium::io::File(m_input_filename);
+    } else {
+        m_input_file = osmium::io::File(m_input_filename, m_input_format);
+    }
+
+    return true;
+}
+
+
+/*
+ * Small wrapper class around std::vector<bool> that make sure the vector is
+ * always large enough for the data we are putting in.
+ */
+class bitsvec {
+
+    std::vector<bool> m_bits;
+
+public:
+
+    bitsvec() :
+        m_bits() {
+    }
+
+    void set(osmium::object_id_type id) {
+        osmium::unsigned_object_id_type pid = std::abs(id);
+
+        if (m_bits.size() <= pid) {
+            m_bits.resize(pid + 1024 * 1024);
+        }
+
+        m_bits[pid] = true;
+    }
+
+    bool get(osmium::object_id_type id) const {
+        osmium::unsigned_object_id_type pid = std::abs(id);
+
+        return pid < m_bits.size() && m_bits[pid];
+    }
+
+}; // class bitsvec
+
+class RefCheckHandler : public osmium::handler::Handler {
+
+    bitsvec m_nodes;
+    bitsvec m_ways;
+
+    std::vector<uint32_t> m_relation_ids;
+    std::set<uint32_t> m_member_relation_ids;
+    std::vector<uint32_t> m_missing_relation_ids;
+
+    uint64_t m_node_count = 0;
+    uint64_t m_way_count = 0;
+    uint64_t m_relation_count = 0;
+
+    uint64_t m_missing_nodes_in_ways = 0;
+    uint64_t m_missing_nodes_in_relations = 0;
+    uint64_t m_missing_ways_in_relations = 0;
+
+    osmium::util::VerboseOutput& m_vout;
+    bool m_show_ids;
+    bool m_check_relations;
+    bool m_relations_done = false;
+
+public:
+
+    RefCheckHandler(osmium::util::VerboseOutput& vout, bool show_ids, bool check_relations) :
+        m_vout(vout),
+        m_show_ids(show_ids),
+        m_check_relations(check_relations) {
+    }
+
+    uint64_t node_count() const {
+        return m_node_count;
+    }
+
+    uint64_t way_count() const {
+        return m_way_count;
+    }
+
+    uint64_t relation_count() const {
+        return m_relation_count;
+    }
+
+    uint64_t missing_nodes_in_ways() const {
+        return m_missing_nodes_in_ways;
+    }
+
+    uint64_t missing_nodes_in_relations() const {
+        return m_missing_nodes_in_relations;
+    }
+
+    uint64_t missing_ways_in_relations() const {
+        return m_missing_ways_in_relations;
+    }
+
+    uint64_t missing_relations_in_relations() {
+        if (!m_relations_done) {
+            std::sort(m_relation_ids.begin(), m_relation_ids.end());
+
+            std::set_difference(m_member_relation_ids.cbegin(), m_member_relation_ids.cend(),
+                                m_relation_ids.cbegin(), m_relation_ids.cend(),
+                                std::back_inserter(m_missing_relation_ids));
+
+            m_relations_done = true;
+        }
+
+        return m_missing_relation_ids.size();
+    }
+
+    bool any_errors() {
+        return missing_nodes_in_ways()          > 0 ||
+               missing_nodes_in_relations()     > 0 ||
+               missing_ways_in_relations()      > 0 ||
+               missing_relations_in_relations() > 0;
+    }
+
+    void node(const osmium::Node& node) {
+        if (m_node_count == 0) {
+            m_vout << "Reading nodes...\n";
+        }
+        ++m_node_count;
+
+        m_nodes.set(node.id());
+    }
+
+    void way(const osmium::Way& way) {
+        if (m_way_count == 0) {
+            m_vout << "Reading ways...\n";
+        }
+        ++m_way_count;
+
+        if (m_check_relations) {
+            m_ways.set(way.id());
+        }
+
+        for (const auto& node_ref : way.nodes()) {
+            if (!m_nodes.get(node_ref.ref())) {
+                ++m_missing_nodes_in_ways;
+                if (m_show_ids) {
+                    std::cout << "n" << node_ref.ref() << " in w" << way.id() << "\n";
+                }
+            }
+        }
+    }
+
+    void relation(const osmium::Relation& relation) {
+        if (m_relation_count == 0) {
+            m_vout << "Reading relations...\n";
+        }
+        ++m_relation_count;
+
+        if (m_check_relations) {
+            m_relation_ids.push_back(uint32_t(relation.id()));
+            for (const auto& member : relation.members()) {
+                switch (member.type()) {
+                    case osmium::item_type::node:
+                        if (!m_nodes.get(member.ref())) {
+                            ++m_missing_nodes_in_relations;
+                            m_nodes.set(member.ref());
+                            if (m_show_ids) {
+                                std::cout << "n" << member.ref() << " in r" << relation.id() << "\n";
+                            }
+                        }
+                        break;
+                    case osmium::item_type::way:
+                        if (!m_ways.get(member.ref())) {
+                            ++m_missing_ways_in_relations;
+                            m_ways.set(member.ref());
+                            if (m_show_ids) {
+                                std::cout << "w" << member.ref() << " in r" << relation.id() << "\n";
+                            }
+                        }
+                        break;
+                    case osmium::item_type::relation:
+                        m_member_relation_ids.insert(uint32_t(relation.id()));
+                        break;
+                    default:
+                        break;
+                }
+            }
+        }
+    }
+
+    void show_missing_relation_ids() {
+        for (auto id : m_missing_relation_ids) {
+            std::cout << "r" << id << " in r\n";
+        }
+    }
+
+}; // class RefCheckHandler
+
+bool CommandCheckRefs::run() {
+    osmium::io::Reader reader(m_input_file);
+
+    RefCheckHandler handler(m_vout, m_show_ids, m_check_relations);
+    osmium::apply(reader, handler);
+
+    std::cerr << "There are " << handler.node_count() << " nodes, " << handler.way_count() << " ways, and " << handler.relation_count() << " relations in this file.\n";
+
+    if (m_check_relations) {
+        std::cerr << "Nodes     in ways      missing: " << handler.missing_nodes_in_ways()          << "\n";
+        std::cerr << "Nodes     in relations missing: " << handler.missing_nodes_in_relations()     << "\n";
+        std::cerr << "Ways      in relations missing: " << handler.missing_ways_in_relations()      << "\n";
+        std::cerr << "Relations in relations missing: " << handler.missing_relations_in_relations() << "\n";
+    } else {
+        std::cerr << "Nodes in ways missing: " << handler.missing_nodes_in_ways() << "\n";
+    }
+
+    if (m_show_ids) {
+        handler.show_missing_relation_ids();
+    }
+
+    m_vout << "Done.\n";
+
+    return !handler.any_errors();
+}
+
+namespace {
+
+    const bool register_check_refs_command = CommandFactory::add("check-refs", "Check referential integrity of an OSM file", []() {
+        return new CommandCheckRefs();
+    });
+
+}
+
diff --git a/src/command_check_refs.hpp b/src/command_check_refs.hpp
new file mode 100644
index 0000000..ccfddc9
--- /dev/null
+++ b/src/command_check_refs.hpp
@@ -0,0 +1,52 @@
+#ifndef COMMAND_CHECK_REFS_HPP
+#define COMMAND_CHECK_REFS_HPP
+
+/*
+
+Osmium -- OpenStreetMap data manipulation command line tool
+http://osmcode.org/osmium
+
+Copyright (C) 2013-2015  Jochen Topf <jochen at topf.org>
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+*/
+
+#include <string>
+
+#include <osmium/io/file.hpp>
+
+#include "osmc.hpp"
+
+class CommandCheckRefs : public Command {
+
+    std::string m_input_filename = "-"; // default: stdin
+    std::string m_input_format;
+    osmium::io::File m_input_file;
+
+    bool m_show_ids = false;
+    bool m_check_relations = false;
+
+public:
+
+    CommandCheckRefs() = default;
+
+    bool setup(const std::vector<std::string>& arguments) override final;
+
+    bool run() override final;
+
+}; // class CommandCheckRefs
+
+
+#endif // COMMAND_CHECK_REFS_HPP
diff --git a/zsh_completion/_osmium b/zsh_completion/_osmium
index 1aa92c6..a7f58d4 100644
--- a/zsh_completion/_osmium
+++ b/zsh_completion/_osmium
@@ -4,7 +4,7 @@
 #
 # To test this file:
 # 1) Comment out last line
-# 2) Call: . ./zsh_osmium
+# 2) Call: . ./_osmium
 # 3) Call: compdef _osmium osmium
 #
 # To read more about what is happening here:
@@ -17,7 +17,7 @@ osmium_file_glob="'*.(osm|osh|osc|pbf|osm.pbf) *.(osm|osh|osc).(bz2|gz)'"
 
 _osmium() {
     local -a osmium_commands
-    osmium_commands=(apply-changes cat fileinfo help merge-changes time-filter)
+    osmium_commands=(apply-changes cat check-refs fileinfo help merge-changes time-filter)
     if (( CURRENT > 2 )); then
         # Remember the subcommand name
         local cmd=${words[2]}
@@ -72,6 +72,19 @@ _osmium-cat() {
         "*::input OSM file:_files -g ${osmium_file_glob}"
 }
 
+_osmium-check-refs() {
+    _arguments : \
+        '(-F)--input-format=[format of input OSM file]:OSM file format:_osmium_file_formats' \
+        '(--input-format)-F=[format of input OSM file]:OSM file format:_osmium_file_formats' \
+        '(--show-ids)-i[show ids of missing objects]' \
+        '(-i)--show-ids[show ids of missing objects]' \
+        '(--check-relations)-r[also check referential integrity of relations]' \
+        '(-r)--check-relations[also check referential integrity of relations]' \
+        '(--verbose)-v[set verbose mode]' \
+        '(-v)--verbose[set verbose mode]' \
+        "*::input OSM file:_files -g ${osmium_file_glob}"
+}
+
 _osmium-fileinfo() {
     _arguments : \
         '(--extended)-e[show extended info (reads entire file)]' \
@@ -141,7 +154,7 @@ _osmium_object_type() {
 
 _osmium-help() {
     local -a osmium_help_topics
-    osmium_help_topics=(apply-changes cat fileinfo help merge-changes time-filter file-formats)
+    osmium_help_topics=(apply-changes cat check-refs fileinfo help merge-changes time-filter file-formats)
     _describe -t osmium-help-topics 'osmium help topics' osmium_help_topics
 }
 

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-grass/osmium-tool.git



More information about the Pkg-grass-devel mailing list