Merge pull request #2237 from joto/remove-non-bucket-index

Remove support for non-bucket index on middle way table
osm2pgsql-dev · Sep 1, 2024 · 7406ff3 · 7406ff3
2 parents c588f5a + 89c24b2
commit 7406ff3
Show file tree

Hide file tree

Showing 5 changed files with 25 additions and 90 deletions.
diff --git a/man/osm2pgsql.md b/man/osm2pgsql.md
@@ -173,10 +173,6 @@ mandatory for short options too.
     database user. By default the schema set with `--schema` is used, or
     `public` if that is not set.
 
-\--middle-way-node-index-id-shift=SHIFT
-:   Set ID shift for way node bucket index in middle. Experts only. See
-    documentation for details.
-
 \--middle-with-nodes
 :   Used together with the **new** middle database format when a flat nodes
     file is used to force storing nodes with tags in the database, too.

diff --git a/src/command-line-parser.cpp b/src/command-line-parser.cpp
@@ -136,12 +136,8 @@ void parse_expire_tiles_param(char const *arg, uint32_t *expire_tiles_zoom_min,
 void check_options_non_slim(CLI::App const &app)
 {
     std::vector<std::string> const slim_options = {
-        "--cache",
-        "--middle-schema",
-        "--middle-with-nodes",
-        "--middle-way-node-index-id-shift",
-        "--tablespace-slim-data",
-        "--tablespace-slim-index"};
+        "--cache", "--middle-schema", "--middle-with-nodes",
+        "--tablespace-slim-data", "--tablespace-slim-index"};
 
     for (auto const &opt : slim_options) {
         if (app.count(opt) > 0) {
@@ -575,13 +571,6 @@ options_t parse_command_line(int argc, char *argv[])
         ->description("Disable concurrent index creation.")
         ->group("Advanced options");
 
-    // --middle-way-node-index-id-shift
-    app.add_option("--middle-way-node-index-id-shift",
-                   options.way_node_index_id_shift)
-        ->description("Set ID shift for bucket index.")
-        ->type_name("N")
-        ->group("Advanced options");
-
     // --number-processes
     app.add_option("--number-processes", options.num_procs)
         // The threads will open up database connections which will

diff --git a/src/middle-pgsql.cpp b/src/middle-pgsql.cpp
@@ -46,17 +46,6 @@
 
 namespace {
 
-bool check_bucket_index(pg_conn_t const *db_connection,
-                        std::string const &prefix)
-{
-    auto const res =
-        db_connection->exec("SELECT relname FROM pg_class"
-                            " WHERE relkind='i'"
-                            " AND relname = '{}_ways_nodes_bucket_idx'",
-                            prefix);
-    return res.num_tuples() > 0;
-}
-
 void send_id_list(pg_conn_t const &db_connection,
                          std::string const &table, idlist_t const &ids)
 {
@@ -97,7 +86,7 @@ std::string build_sql(options_t const &options, std::string const &templ)
         fmt::arg("using_tablespace", using_tablespace),
         fmt::arg("data_tablespace", tablespace_clause(options.tblsslim_data)),
         fmt::arg("index_tablespace", tablespace_clause(options.tblsslim_index)),
-        fmt::arg("way_node_index_id_shift", options.way_node_index_id_shift),
+        fmt::arg("way_node_index_id_shift", 5),
         fmt::arg("attribute_columns_definition",
                  options.extra_attributes ? " created timestamp with time zone,"
                                             " version int4,"
@@ -663,17 +652,13 @@ void middle_pgsql_t::get_node_parents(idlist_t const &changed_nodes,
 
     queries.emplace_back("ANALYZE osm2pgsql_changed_nodes");
 
-    bool const has_bucket_index =
-        check_bucket_index(&m_db_connection, m_options->prefix);
-
-    if (has_bucket_index) {
-        // The query to get the parent ways of changed nodes is "hidden"
-        // inside a PL/pgSQL function so that the query planner only sees
-        // a single node id that is being queried for. If we ask for all
-        // nodes at the same time the query planner sometimes thinks it is
-        // better to do a full table scan which totally destroys performance.
-        // This is due to the PostgreSQL statistics on ARRAYs being way off.
-        queries.emplace_back(R"(
+    // The query to get the parent ways of changed nodes is "hidden"
+    // inside a PL/pgSQL function so that the query planner only sees
+    // a single node id that is being queried for. If we ask for all
+    // nodes at the same time the query planner sometimes thinks it is
+    // better to do a full table scan which totally destroys performance.
+    // This is due to the PostgreSQL statistics on ARRAYs being way off.
+    queries.emplace_back(R"(
 CREATE OR REPLACE FUNCTION osm2pgsql_find_changed_ways() RETURNS void AS $$
 DECLARE
   changed_buckets RECORD;
@@ -692,16 +677,8 @@ BEGIN
 END;
 $$ LANGUAGE plpgsql
 )");
-        queries.emplace_back("SELECT osm2pgsql_find_changed_ways()");
-        queries.emplace_back("DROP FUNCTION osm2pgsql_find_changed_ways()");
-    } else {
-        queries.emplace_back(R"(
-INSERT INTO osm2pgsql_changed_ways
-  SELECT w.id
-    FROM {schema}"{prefix}_ways" w, osm2pgsql_changed_nodes n
-    WHERE w.nodes && ARRAY[n.id]
-        )");
-    }
+    queries.emplace_back("SELECT osm2pgsql_find_changed_ways()");
+    queries.emplace_back("DROP FUNCTION osm2pgsql_find_changed_ways()");
 
     queries.emplace_back(R"(
 INSERT INTO osm2pgsql_changed_relations
@@ -1176,7 +1153,7 @@ table_sql sql_for_nodes(middle_pgsql_options const &options)
     return sql;
 }
 
-table_sql sql_for_ways(middle_pgsql_options const &options)
+table_sql sql_for_ways()
 {
     table_sql sql{};
 
@@ -1200,23 +1177,17 @@ table_sql sql_for_ways(middle_pgsql_options const &options)
                            "  {users_table_access}"
                            " WHERE o.id = ANY($1::int8[])"};
 
-    if (options.way_node_index_id_shift == 0) {
-        sql.create_fw_dep_indexes = {
-            "CREATE INDEX ON {schema}\"{prefix}_ways\" USING GIN (nodes)"
-            "  WITH (fastupdate = off) {index_tablespace}"};
-    } else {
-        sql.create_fw_dep_indexes = {
-            "CREATE OR REPLACE FUNCTION"
-            "    {schema}\"{prefix}_index_bucket\"(int8[])"
-            "  RETURNS int8[] AS $$"
-            "  SELECT ARRAY(SELECT DISTINCT"
-            "    unnest($1) >> {way_node_index_id_shift})"
-            "$$ LANGUAGE SQL IMMUTABLE",
-            "CREATE INDEX \"{prefix}_ways_nodes_bucket_idx\""
-            "  ON {schema}\"{prefix}_ways\""
-            "  USING GIN ({schema}\"{prefix}_index_bucket\"(nodes))"
-            "  WITH (fastupdate = off) {index_tablespace}"};
-    }
+    sql.create_fw_dep_indexes = {
+        "CREATE OR REPLACE FUNCTION"
+        "    {schema}\"{prefix}_index_bucket\"(int8[])"
+        "  RETURNS int8[] AS $$"
+        "  SELECT ARRAY(SELECT DISTINCT"
+        "    unnest($1) >> {way_node_index_id_shift})"
+        "$$ LANGUAGE SQL IMMUTABLE",
+        "CREATE INDEX \"{prefix}_ways_nodes_bucket_idx\""
+        "  ON {schema}\"{prefix}_ways\""
+        "  USING GIN ({schema}\"{prefix}_index_bucket\"(nodes))"
+        "  WITH (fastupdate = off) {index_tablespace}"};
 
     return sql;
 }
@@ -1272,7 +1243,6 @@ middle_pgsql_t::middle_pgsql_t(std::shared_ptr<thread_pool_t> thread_pool,
   m_db_copy(m_copy_thread), m_append(options->append)
 {
     m_store_options.with_attributes = options->extra_attributes;
-    m_store_options.way_node_index_id_shift = options->way_node_index_id_shift;
 
     if (options->middle_with_nodes) {
         m_store_options.nodes = true;
@@ -1289,15 +1259,8 @@ middle_pgsql_t::middle_pgsql_t(std::shared_ptr<thread_pool_t> thread_pool,
 
     log_debug("Mid: pgsql, cache={}", options->cache);
 
-    bool const has_bucket_index =
-        check_bucket_index(&m_db_connection, options->prefix);
-
-    if (!has_bucket_index && options->append) {
-        log_debug("You don't have a bucket index. See manual for details.");
-    }
-
     m_tables.nodes() = table_desc{*options, sql_for_nodes(m_store_options)};
-    m_tables.ways() = table_desc{*options, sql_for_ways(m_store_options)};
+    m_tables.ways() = table_desc{*options, sql_for_ways()};
     m_tables.relations() = table_desc{*options, sql_for_relations()};
 
     m_users_table = table_desc{*options, sql_for_users(m_store_options)};
@@ -1310,8 +1273,6 @@ void middle_pgsql_t::set_requirements(
     log_debug("  nodes: {}", m_store_options.nodes);
     log_debug("  untagged_nodes: {}", m_store_options.untagged_nodes);
     log_debug("  use_flat_node_file: {}", m_store_options.use_flat_node_file);
-    log_debug("  way_node_index_id_shift: {}",
-              m_store_options.way_node_index_id_shift);
     log_debug("  with_attributes: {}", m_store_options.with_attributes);
 }
 

diff --git a/src/middle-pgsql.hpp b/src/middle-pgsql.hpp
@@ -39,9 +39,6 @@ struct middle_pgsql_options
     // Store untagged nodes also (set in addition to nodes=true).
     bool untagged_nodes = false;
 
-    // Bit shift used in way node index
-    uint8_t way_node_index_id_shift = 5;
-
     // Use a flat node file
     bool use_flat_node_file = false;
 

diff --git a/src/options.hpp b/src/options.hpp
@@ -107,14 +107,6 @@ struct options_t
 
     unsigned int num_procs = 1;
 
-    /**
-     * How many bits should the node id be shifted for the way node index?
-     * The result is a lossy index which is significantly smaller.
-     * See https://osm2pgsql.org/doc/manual.html#bucket-index-for-slim-mode
-     * Use 0 to use a classic loss-less GIN index.
-     */
-    uint8_t way_node_index_id_shift = 5;
-
     /**
      * Middle database format:
      * 0 = non-slim mode, no database middle (ram middle)