[Git][debian-gis-team/osm2pgsql][buster-backports] 8 commits: Don't define ACCEPT_USE_OF_DEPRECATED_PROJ_API_H, fixed in libosmium.

Sat Nov 2 13:49:10 GMT 2019


Bas Couwenberg pushed to branch buster-backports at Debian GIS Project / osm2pgsql


Commits:
4eaa8b7b by Bas Couwenberg at 2019-08-30T13:10:54Z
Don't define ACCEPT_USE_OF_DEPRECATED_PROJ_API_H, fixed in libosmium.

- - - - -
52012ad3 by Bas Couwenberg at 2019-09-30T17:35:44Z
Bump Standards-Version to 4.4.1, no changes.

- - - - -
ac48ed01 by Bas Couwenberg at 2019-10-22T04:00:34Z
New upstream version 1.2.0+ds
- - - - -
c61df14c by Bas Couwenberg at 2019-10-22T04:00:36Z
Update upstream source from tag 'upstream/1.2.0+ds'

Update to upstream version '1.2.0+ds'
with Debian dir 0aa48122b13462d4370c45ec1fd47b25647281dd
- - - - -
5de64ebd by Bas Couwenberg at 2019-10-22T04:00:49Z
New upstream release.

- - - - -
ddb1f587 by Bas Couwenberg at 2019-10-22T04:01:31Z
Set distribution to unstable.

- - - - -
889433cc by Bas Couwenberg at 2019-11-02T13:34:58Z
Merge tag 'debian/1.2.0+ds-1' into buster-backports

releasing package osm2pgsql version 1.2.0+ds-1

- - - - -
5f7180dd by Bas Couwenberg at 2019-11-02T13:35:05Z
Rebuild for buster-backports.

- - - - -


14 changed files:

- CMakeLists.txt
- db-copy.cpp
- db-copy.hpp
- debian/changelog
- debian/control
- debian/rules
- docs/osm2pgsql.1
- docs/usage.md
- options.cpp
- table.cpp
- tests/CMakeLists.txt
- + tests/test-output-pgsql-int4.cpp
- + tests/test_output_pgsql_int4.osm
- + tests/test_output_pgsql_int4.style


Changes:

=====================================
CMakeLists.txt
=====================================
@@ -1,6 +1,6 @@
 set(PACKAGE osm2pgsql)
 set(PACKAGE_NAME osm2pgsql)
-set(PACKAGE_VERSION 1.0.0)
+set(PACKAGE_VERSION 1.2.0)
 
 cmake_minimum_required(VERSION 2.8.7)
 


=====================================
db-copy.cpp
=====================================
@@ -28,7 +28,11 @@ db_copy_thread_t::~db_copy_thread_t() { finish(); }
 void db_copy_thread_t::add_buffer(std::unique_ptr<db_cmd_t> &&buffer)
 {
     assert(m_worker.joinable()); // thread must not have been finished
+
     std::unique_lock<std::mutex> lock(m_queue_mutex);
+    m_queue_full_cond.wait(lock,
+            [&]{ return m_worker_queue.size() < db_cmd_copy_t::Max_buffers; });
+
     m_worker_queue.push_back(std::move(buffer));
     m_queue_cond.notify_one();
 }
@@ -60,13 +64,11 @@ void db_copy_thread_t::worker_thread()
         std::unique_ptr<db_cmd_t> item;
         {
             std::unique_lock<std::mutex> lock(m_queue_mutex);
-            if (m_worker_queue.empty()) {
-                m_queue_cond.wait(lock);
-                continue;
-            }
+            m_queue_cond.wait(lock, [&]{ return !m_worker_queue.empty(); });
 
             item = std::move(m_worker_queue.front());
             m_worker_queue.pop_front();
+            m_queue_full_cond.notify_one();
         }
 
         switch (item->type) {


=====================================
db-copy.hpp
=====================================
@@ -67,7 +67,22 @@ protected:
 
 struct db_cmd_copy_t : public db_cmd_t
 {
-    enum { Max_buf_size = 10 * 1024 * 1024 };
+    enum {
+        /** Size of a single buffer with COPY data for Postgresql.
+         *  This is a trade-off between memory usage and sending large chunks
+         *  to speed up processing. Currently a one-size fits all value.
+         *  Needs more testing and individual values per queue.
+         */
+        Max_buf_size = 10 * 1024 * 1024,
+        /** Maximum length of the queue with COPY data.
+         *  In the usual case, PostgreSQL should be faster processing the
+         *  data than it can be produced and there should only be one element
+         *  in the queue. If PostgreSQL is slower, then the queue will always
+         *  be full and it is better to keep the queue smaller to reduce memory
+         *  usage. Current value is just assumed to be a reasonable trade off.
+         */
+        Max_buffers = 10
+    };
     /// Name of the target table for the copy operation
     std::shared_ptr<db_target_descr_t> target;
     /// Vector with object to delete before copying
@@ -141,6 +156,7 @@ private:
     std::thread m_worker;
     std::mutex m_queue_mutex;
     std::condition_variable m_queue_cond;
+    std::condition_variable m_queue_full_cond;
     std::deque<std::unique_ptr<db_cmd_t>> m_worker_queue;
 
     // Target for copy operation currently ongoing.


=====================================
debian/changelog
=====================================
@@ -1,3 +1,17 @@
+osm2pgsql (1.2.0+ds-1~bpo10+1) buster-backports; urgency=medium
+
+  * Rebuild for buster-backports.
+
+ -- Bas Couwenberg <sebastic at debian.org>  Sat, 02 Nov 2019 14:35:02 +0100
+
+osm2pgsql (1.2.0+ds-1) unstable; urgency=medium
+
+  * New upstream release.
+  * Don't define ACCEPT_USE_OF_DEPRECATED_PROJ_API_H, fixed in libosmium.
+  * Bump Standards-Version to 4.4.1, no changes.
+
+ -- Bas Couwenberg <sebastic at debian.org>  Tue, 22 Oct 2019 06:01:22 +0200
+
 osm2pgsql (1.0.0+ds-1~bpo10+1) buster-backports; urgency=medium
 
   * Rebuild for buster-backports.


=====================================
debian/control
=====================================
@@ -21,7 +21,7 @@ Build-Depends: debhelper (>= 9),
                lua5.2,
                python3,
                python3-psycopg2
-Standards-Version: 4.4.0
+Standards-Version: 4.4.1
 Vcs-Browser: https://salsa.debian.org/debian-gis-team/osm2pgsql
 Vcs-Git: https://salsa.debian.org/debian-gis-team/osm2pgsql.git -b buster-backports
 Homepage: https://wiki.openstreetmap.org/wiki/Osm2pgsql


=====================================
debian/rules
=====================================
@@ -10,8 +10,8 @@ export DEB_BUILD_MAINT_OPTIONS=hardening=+all
 # Verbose test output
 export VERBOSE=1
 
-CFLAGS += $(CPPFLAGS) -DACCEPT_USE_OF_DEPRECATED_PROJ_API_H -DNDEBUG
-CXXFLAGS += $(CPPFLAGS) -DACCEPT_USE_OF_DEPRECATED_PROJ_API_H -DNDEBUG
+CFLAGS += $(CPPFLAGS) -DNDEBUG
+CXXFLAGS += $(CPPFLAGS) -DNDEBUG
 
 %:
 	dh $@ \


=====================================
docs/osm2pgsql.1
=====================================
@@ -112,9 +112,11 @@ imported into database columns and which tags get dropped. Defaults to /usr/shar
 \fB\-C\fR|\-\-cache num
 Only for slim mode: Use up to num many MB of RAM for caching nodes. Giving osm2pgsql sufficient cache
 to store all imported nodes typically greatly increases the speed of the import. Each cached node
-requires 8 bytes of cache, plus about 10% \- 30% overhead. For a current OSM full planet import with
-its ~ 3 billion nodes, a good value would be 27000 if you have enough RAM. If you don't have enough
-RAM, it is likely beneficial to give osm2pgsql close to the full available amount of RAM. Defaults to 800.
+requires 8 bytes of cache, plus about 10% \- 30% overhead. As a rule of thumb,
+give a bit more than the size of the import file in PBF format. If the RAM is not
+big enough, use about 75% of memory. Make sure to leave enough RAM for PostgreSQL.
+It needs at least the amount of `shared_buffers` given in its configuration.
+Defaults to 800.
 .TP
 \fB\  \fR\-\-cache\-strategy strategy
 There are a number of different modes in which osm2pgsql can organize its


=====================================
docs/usage.md
=====================================
@@ -25,11 +25,15 @@ use them.
 Performance is heavily influenced by other options, but there are some options
 that only impact performance.
 
-* ``--cache`` specifies how much memory to allocate for caching information. In
-  ``--slim`` mode, this is just node positions while in non-slim it has to
-  store information about ways and relations too. The maximum RAM it is useful
-  to set this to in slim mode is 8 bytes * number of nodes / efficiency, where
-  efficiency ranges from 50% on small imports to 80% for a planet.
+* ``--cache`` specifies how much memory in MB to allocate for caching information.
+  In ``--slim`` mode, this is just node positions while in non-slim it has to
+  store information about ways and relations too. The rule of thumb in slim mode
+  is as follows: use the size of the PBF file you are trying to import or about
+  75% of RAM, whatever is smaller. Make sure there is enough RAM left for
+  PostgreSQL. It needs at least the amount of `shared_buffers` given in its
+  configuration. You may also set ``--cache`` to 0 to disable node caching
+  completely. This makes only sense when a flat node file is given and there
+  is not enough RAM to fit most of the cache.
 
 * ``--number-processes`` sets the number of processes to use. This should
   typically be set to the number of CPU threads, but gains in speed are minimal


=====================================
options.cpp
=====================================
@@ -8,6 +8,7 @@
 #define basename /*SKIP IT*/
 #endif
 #include <boost/format.hpp>
+#include <algorithm>
 #include <cstdio>
 #include <cstring>
 #include <osmium/version.hpp>
@@ -119,7 +120,7 @@ namespace
     \n\
     Database options:\n\
        -d|--database    The name of the PostgreSQL database to connect to.\n\
-       -U|--username    PostgreSQL user name (specify passsword in PGPASS\n\
+       -U|--username    PostgreSQL user name (specify passsword in PGPASSWORD\n\
                         environment variable or use -W).\n\
        -W|--password    Force password prompt.\n\
        -H|--host        Database server host name or socket location.\n\
@@ -227,7 +228,8 @@ namespace
             printf("    %s -c -d gis --slim -C <cache size> -k \\\n", name);
             printf("      --flat-nodes <flat nodes> planet-latest.osm.pbf\n");
             printf("where\n");
-            printf("    <cache size> is 50000 on machines with 64GB or more RAM \n");
+            printf("    <cache size> should be equivalent to the size of the \n");
+            printf("      pbf file to be imported if there is enough RAM \n");
             printf("      or about 75%% of memory in MB on machines with less\n");
             printf("    <flat nodes> is a location where a 50+GB file can be saved.\n");
             printf("\n");
@@ -299,7 +301,7 @@ options_t::options_t()
   output_backend("pgsql"), input_reader("auto"), bbox(boost::none),
   extra_attributes(false), verbose(false)
 {
-    num_procs = std::thread::hardware_concurrency();
+    num_procs = (int) std::min(4U, std::thread::hardware_concurrency());
     if (num_procs < 1) {
         fprintf(stderr, "WARNING: unable to detect number of hardware threads supported!\n");
         num_procs = 1;


=====================================
table.cpp
=====================================
@@ -457,12 +457,21 @@ void table_t::escape_type(const string &value, ColumnType flags)
     switch (flags) {
     case COLUMN_TYPE_INT: {
         // For integers we take the first number, or the average if it's a-b
-        long from, to;
-        int items = sscanf(value.c_str(), "%ld-%ld", &from, &to);
-        if (items == 1) {
+        long long from, to;
+        // limit number of digits parsed to avoid undefined behaviour in sscanf
+        int items = sscanf(value.c_str(), "%18lld-%18lld", &from, &to);
+        if (items == 1 && from <= std::numeric_limits<int32_t>::max() &&
+            from >= std::numeric_limits<int32_t>::min()) {
             m_copy.add_column(from);
         } else if (items == 2) {
-            m_copy.add_column((from + to) / 2);
+            // calculate mean while avoiding overflows
+            int64_t mean = (from / 2) + (to / 2) + ((from % 2 + to % 2) / 2);
+            if (mean <= std::numeric_limits<int32_t>::max() &&
+                mean >= std::numeric_limits<int32_t>::min()) {
+                m_copy.add_column(mean);
+            } else {
+                m_copy.add_null_column();
+            }
         } else {
             m_copy.add_null_column();
         }


=====================================
tests/CMakeLists.txt
=====================================
@@ -21,6 +21,7 @@ set(TESTS
   test-output-multi-tags.cpp
   test-output-pgsql-area.cpp
   test-output-pgsql-schema.cpp
+  test-output-pgsql-int4.cpp
   test-output-pgsql-tablespace.cpp
   test-output-pgsql-validgeom.cpp
   test-output-pgsql-z_order.cpp


=====================================
tests/test-output-pgsql-int4.cpp
=====================================
@@ -0,0 +1,117 @@
+#include <iostream>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <cassert>
+#include <sstream>
+#include <stdexcept>
+#include <memory>
+
+#include "osmtypes.hpp"
+#include "osmdata.hpp"
+#include "output-pgsql.hpp"
+#include "options.hpp"
+#include "middle-pgsql.hpp"
+#include "middle-ram.hpp"
+#include "taginfo_impl.hpp"
+
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <boost/lexical_cast.hpp>
+
+#include "tests/middle-tests.hpp"
+#include "tests/common-pg.hpp"
+#include "tests/common.hpp"
+
+namespace {
+
+struct skip_test : public std::exception {
+    const char *what() const noexcept { return "Test skipped."; }
+};
+
+void run_test(const char* test_name, void (*testfunc)()) {
+    try {
+        fprintf(stderr, "%s\n", test_name);
+        testfunc();
+
+    } catch (const skip_test &) {
+        exit(77); // <-- code to skip this test.
+
+    } catch (const std::exception& e) {
+        fprintf(stderr, "%s\n", e.what());
+        fprintf(stderr, "FAIL\n");
+        exit(EXIT_FAILURE);
+    }
+
+    fprintf(stderr, "PASS\n");
+}
+#define RUN_TEST(x) run_test(#x, &(x))
+
+// "simple" test modeled on the basic regression test from
+// the python script. this is just to check everything is
+// working as expected before we start the complex stuff.
+void test_int4() {
+    std::unique_ptr<pg::tempdb> db;
+
+    try {
+        db.reset(new pg::tempdb);
+    } catch (const std::exception &e) {
+        std::cerr << "Unable to setup database: " << e.what() << "\n";
+        throw skip_test();
+    }
+
+    std::string proc_name("test-output-pgsql-int4"), input_file("-");
+    char *argv[] = { &proc_name[0], &input_file[0], nullptr };
+
+    options_t options = options_t(2, argv);
+    options.database_options = db->database_options;
+    options.num_procs = 1;
+    options.slim = 1;
+    options.prefix = "osm2pgsql_test";
+    options.style = "tests/test_output_pgsql_int4.style";
+
+    testing::run_osm2pgsql(options, "tests/test_output_pgsql_int4.osm",
+                           "xml");
+
+    db->assert_has_table("osm2pgsql_test_point");
+    db->assert_has_table("osm2pgsql_test_line");
+    db->assert_has_table("osm2pgsql_test_polygon");
+    db->assert_has_table("osm2pgsql_test_roads");
+
+    // First three nodes have population values that are out of range for int4 columns
+    db->check_string("", "SELECT population FROM osm2pgsql_test_point WHERE osm_id = 1");
+    db->check_string("", "SELECT population FROM osm2pgsql_test_point WHERE osm_id = 2");
+    db->check_string("", "SELECT population FROM osm2pgsql_test_point WHERE osm_id = 3");
+    // Check values that are valid for int4 columns, including limits
+    db->check_count(2147483647, "SELECT population FROM osm2pgsql_test_point WHERE osm_id = 4");
+    db->check_count(10000, "SELECT population FROM osm2pgsql_test_point WHERE osm_id = 5");
+    db->check_count(-10000, "SELECT population FROM osm2pgsql_test_point WHERE osm_id = 6");
+    db->check_count(-2147483648, "SELECT population FROM osm2pgsql_test_point WHERE osm_id = 7");
+    // More out of range negative values
+    db->check_string("", "SELECT population FROM osm2pgsql_test_point WHERE osm_id = 8");
+    db->check_string("", "SELECT population FROM osm2pgsql_test_point WHERE osm_id = 9");
+    db->check_string("", "SELECT population FROM osm2pgsql_test_point WHERE osm_id = 10");
+
+    // Ranges are also parsed into int4 columns
+    db->check_string("", "SELECT population FROM osm2pgsql_test_point WHERE osm_id = 11");
+    db->check_string("", "SELECT population FROM osm2pgsql_test_point WHERE osm_id = 12");
+    // Check values that are valid for int4 columns, including limits
+    db->check_count(2147483647, "SELECT population FROM osm2pgsql_test_point WHERE osm_id =13");
+    db->check_count(15000, "SELECT population FROM osm2pgsql_test_point WHERE osm_id = 14");
+    db->check_count(-15000, "SELECT population FROM osm2pgsql_test_point WHERE osm_id = 15");
+    db->check_count(-2147483648, "SELECT population FROM osm2pgsql_test_point WHERE osm_id = 16");
+    // More out of range negative values
+    db->check_string("", "SELECT population FROM osm2pgsql_test_point WHERE osm_id = 17");
+    db->check_string("", "SELECT population FROM osm2pgsql_test_point WHERE osm_id = 18");
+}
+
+} // anonymous namespace
+
+int main(int argc, char *argv[]) {
+    (void)argc;
+    (void)argv;
+    RUN_TEST(test_int4);
+
+    return 0;
+}


=====================================
tests/test_output_pgsql_int4.osm
=====================================
@@ -0,0 +1,75 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<osm version="0.6">
+ <node id="1" visible="true" version="1" changeset="1" timestamp="2018-10-31T10:20:19Z" user="a" uid="1" lat="51.4779481" lon="-0.0014863">
+   <tag k="population" v="10000000000000000000" />
+   <tag k="name" v="longer than long" />
+ </node>
+ <node id="2" visible="true" version="1" changeset="1" timestamp="2018-10-31T10:20:19Z" user="a" uid="1" lat="51.4779481" lon="-0.0014863">
+   <tag k="population" v="10000000000" />
+   <tag k="name" v="long (ten billion)" />
+ </node>
+ <node id="3" visible="true" version="1" changeset="1" timestamp="2018-10-31T10:20:19Z" user="a" uid="1" lat="51.4779481" lon="-0.0014863">
+   <tag k="population" v="2147483648" />
+   <tag k="name" v="postgresql one more than int4 type" />
+ </node>
+ <node id="4" visible="true" version="1" changeset="1" timestamp="2018-10-31T10:20:19Z" user="a" uid="1" lat="51.4779481" lon="-0.0014863">
+   <tag k="population" v="2147483647" />
+   <tag k="name" v="postgresql max int4 type" />
+ </node>
+ <node id="5" visible="true" version="1" changeset="1" timestamp="2018-10-31T10:20:19Z" user="a" uid="1" lat="51.4779481" lon="-0.0014863">
+   <tag k="population" v="10000" />
+   <tag k="name" v="ten thousand" />
+ </node>
+ <node id="6" visible="true" version="1" changeset="1" timestamp="2018-10-31T10:20:19Z" user="a" uid="1" lat="51.4779481" lon="-0.0014863">
+   <tag k="population" v="-10000" />
+   <tag k="name" v="minus ten thousand" />
+ </node>
+ <node id="7" visible="true" version="1" changeset="1" timestamp="2018-10-31T10:20:19Z" user="a" uid="1" lat="51.4779481" lon="-0.0014863">
+   <tag k="population" v="-2147483648" />
+   <tag k="name" v="postgresql min int4 type" />
+ </node>
+ <node id="8" visible="true" version="1" changeset="1" timestamp="2018-10-31T10:20:19Z" user="a" uid="1" lat="51.4779481" lon="-0.0014863">
+   <tag k="population" v="-2147483649" />
+   <tag k="name" v="postgresql one less than min int4 type" />
+ </node>
+ <node id="9" visible="true" version="1" changeset="1" timestamp="2018-10-31T10:20:19Z" user="a" uid="1" lat="51.4779481" lon="-0.0014863">
+   <tag k="population" v="-10000000000" />
+   <tag k="name" v="minus long (minus ten billion)" />
+ </node>
+ <node id="10" visible="true" version="1" changeset="1" timestamp="2018-10-31T10:20:19Z" user="a" uid="1" lat="51.4779481" lon="-0.0014863">
+   <tag k="population" v="-10000000000000000000" />
+   <tag k="name" v="minus longer than long" />
+ </node>
+ <node id="11" visible="true" version="1" changeset="1" timestamp="2018-10-31T10:20:19Z" user="a" uid="1" lat="51.4779481" lon="-0.0014863">
+   <tag k="population" v="10000000000000000000-20000000000000000000" />
+   <tag k="name" v="range, longer than long" />
+ </node>
+ <node id="12" visible="true" version="1" changeset="1" timestamp="2018-10-31T10:20:19Z" user="a" uid="1" lat="51.4779481" lon="-0.0014863">
+   <tag k="population" v="10000000000-20000000000" />
+   <tag k="name" v="range, 15 billion" />
+ </node>
+ <node id="13" visible="true" version="1" changeset="1" timestamp="2018-10-31T10:20:19Z" user="a" uid="1" lat="51.4779481" lon="-0.0014863">
+   <tag k="population" v="2147483646-2147483648" />
+   <tag k="name" v="range, mean is max int4" />
+ </node>
+ <node id="14" visible="true" version="1" changeset="1" timestamp="2018-10-31T10:20:19Z" user="a" uid="1" lat="51.4779481" lon="-0.0014863">
+   <tag k="population" v="10000-20000" />
+   <tag k="name" v="range, 15 thousand" />
+ </node>
+ <node id="15" visible="true" version="1" changeset="1" timestamp="2018-10-31T10:20:19Z" user="a" uid="1" lat="51.4779481" lon="-0.0014863">
+   <tag k="population" v="-10000--20000" />
+   <tag k="name" v="range, negative 15 thousand" />
+ </node>
+ <node id="16" visible="true" version="1" changeset="1" timestamp="2018-10-31T10:20:19Z" user="a" uid="1" lat="51.4779481" lon="-0.0014863">
+   <tag k="population" v="-2147483647--2147483649" />
+   <tag k="name" v="range, mean is min int4" />
+ </node>
+ <node id="17" visible="true" version="1" changeset="1" timestamp="2018-10-31T10:20:19Z" user="a" uid="1" lat="51.4779481" lon="-0.0014863">
+   <tag k="population" v="-10000000000--20000000000" />
+   <tag k="name" v="range, negative 15 billion" />
+ </node>
+ <node id="18" visible="true" version="1" changeset="1" timestamp="2018-10-31T10:20:19Z" user="a" uid="1" lat="51.4779481" lon="-0.0014863">
+   <tag k="population" v="-10000000000000000000--20000000000000000000" />
+   <tag k="name" v="range, negative longer than long" />
+ </node>
+</osm>


=====================================
tests/test_output_pgsql_int4.style
=====================================
@@ -0,0 +1,3 @@
+# OsmType  Tag          DataType     Flags
+node,way   population      int4      linear
+node,way   name            text      linear



View it on GitLab: https://salsa.debian.org/debian-gis-team/osm2pgsql/compare/9e2a60f98d5e1d56ca783a9b09acd7c07f797bfb...5f7180dd6042610ca936ab30f13dab33bb0736cf

-- 
View it on GitLab: https://salsa.debian.org/debian-gis-team/osm2pgsql/compare/9e2a60f98d5e1d56ca783a9b09acd7c07f797bfb...5f7180dd6042610ca936ab30f13dab33bb0736cf
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/pkg-grass-devel/attachments/20191102/ed3e0d46/attachment-0001.html>