Skip to content

Commit

Permalink
Merge pull request #2237 from joto/remove-non-bucket-index
Browse files Browse the repository at this point in the history
Remove support for non-bucket index on middle way table
  • Loading branch information
lonvia authored Sep 1, 2024
2 parents c588f5a + 89c24b2 commit 7406ff3
Show file tree
Hide file tree
Showing 5 changed files with 25 additions and 90 deletions.
4 changes: 0 additions & 4 deletions man/osm2pgsql.md
Original file line number Diff line number Diff line change
Expand Up @@ -173,10 +173,6 @@ mandatory for short options too.
database user. By default the schema set with `--schema` is used, or
`public` if that is not set.

\--middle-way-node-index-id-shift=SHIFT
: Set ID shift for way node bucket index in middle. Experts only. See
documentation for details.

\--middle-with-nodes
: Used together with the **new** middle database format when a flat nodes
file is used to force storing nodes with tags in the database, too.
Expand Down
15 changes: 2 additions & 13 deletions src/command-line-parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -136,12 +136,8 @@ void parse_expire_tiles_param(char const *arg, uint32_t *expire_tiles_zoom_min,
void check_options_non_slim(CLI::App const &app)
{
std::vector<std::string> const slim_options = {
"--cache",
"--middle-schema",
"--middle-with-nodes",
"--middle-way-node-index-id-shift",
"--tablespace-slim-data",
"--tablespace-slim-index"};
"--cache", "--middle-schema", "--middle-with-nodes",
"--tablespace-slim-data", "--tablespace-slim-index"};

for (auto const &opt : slim_options) {
if (app.count(opt) > 0) {
Expand Down Expand Up @@ -575,13 +571,6 @@ options_t parse_command_line(int argc, char *argv[])
->description("Disable concurrent index creation.")
->group("Advanced options");

// --middle-way-node-index-id-shift
app.add_option("--middle-way-node-index-id-shift",
options.way_node_index_id_shift)
->description("Set ID shift for bucket index.")
->type_name("N")
->group("Advanced options");

// --number-processes
app.add_option("--number-processes", options.num_procs)
// The threads will open up database connections which will
Expand Down
85 changes: 23 additions & 62 deletions src/middle-pgsql.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,17 +46,6 @@

namespace {

bool check_bucket_index(pg_conn_t const *db_connection,
std::string const &prefix)
{
auto const res =
db_connection->exec("SELECT relname FROM pg_class"
" WHERE relkind='i'"
" AND relname = '{}_ways_nodes_bucket_idx'",
prefix);
return res.num_tuples() > 0;
}

void send_id_list(pg_conn_t const &db_connection,
std::string const &table, idlist_t const &ids)
{
Expand Down Expand Up @@ -97,7 +86,7 @@ std::string build_sql(options_t const &options, std::string const &templ)
fmt::arg("using_tablespace", using_tablespace),
fmt::arg("data_tablespace", tablespace_clause(options.tblsslim_data)),
fmt::arg("index_tablespace", tablespace_clause(options.tblsslim_index)),
fmt::arg("way_node_index_id_shift", options.way_node_index_id_shift),
fmt::arg("way_node_index_id_shift", 5),
fmt::arg("attribute_columns_definition",
options.extra_attributes ? " created timestamp with time zone,"
" version int4,"
Expand Down Expand Up @@ -663,17 +652,13 @@ void middle_pgsql_t::get_node_parents(idlist_t const &changed_nodes,

queries.emplace_back("ANALYZE osm2pgsql_changed_nodes");

bool const has_bucket_index =
check_bucket_index(&m_db_connection, m_options->prefix);

if (has_bucket_index) {
// The query to get the parent ways of changed nodes is "hidden"
// inside a PL/pgSQL function so that the query planner only sees
// a single node id that is being queried for. If we ask for all
// nodes at the same time the query planner sometimes thinks it is
// better to do a full table scan which totally destroys performance.
// This is due to the PostgreSQL statistics on ARRAYs being way off.
queries.emplace_back(R"(
// The query to get the parent ways of changed nodes is "hidden"
// inside a PL/pgSQL function so that the query planner only sees
// a single node id that is being queried for. If we ask for all
// nodes at the same time the query planner sometimes thinks it is
// better to do a full table scan which totally destroys performance.
// This is due to the PostgreSQL statistics on ARRAYs being way off.
queries.emplace_back(R"(
CREATE OR REPLACE FUNCTION osm2pgsql_find_changed_ways() RETURNS void AS $$
DECLARE
changed_buckets RECORD;
Expand All @@ -692,16 +677,8 @@ BEGIN
END;
$$ LANGUAGE plpgsql
)");
queries.emplace_back("SELECT osm2pgsql_find_changed_ways()");
queries.emplace_back("DROP FUNCTION osm2pgsql_find_changed_ways()");
} else {
queries.emplace_back(R"(
INSERT INTO osm2pgsql_changed_ways
SELECT w.id
FROM {schema}"{prefix}_ways" w, osm2pgsql_changed_nodes n
WHERE w.nodes && ARRAY[n.id]
)");
}
queries.emplace_back("SELECT osm2pgsql_find_changed_ways()");
queries.emplace_back("DROP FUNCTION osm2pgsql_find_changed_ways()");

queries.emplace_back(R"(
INSERT INTO osm2pgsql_changed_relations
Expand Down Expand Up @@ -1176,7 +1153,7 @@ table_sql sql_for_nodes(middle_pgsql_options const &options)
return sql;
}

table_sql sql_for_ways(middle_pgsql_options const &options)
table_sql sql_for_ways()
{
table_sql sql{};

Expand All @@ -1200,23 +1177,17 @@ table_sql sql_for_ways(middle_pgsql_options const &options)
" {users_table_access}"
" WHERE o.id = ANY($1::int8[])"};

if (options.way_node_index_id_shift == 0) {
sql.create_fw_dep_indexes = {
"CREATE INDEX ON {schema}\"{prefix}_ways\" USING GIN (nodes)"
" WITH (fastupdate = off) {index_tablespace}"};
} else {
sql.create_fw_dep_indexes = {
"CREATE OR REPLACE FUNCTION"
" {schema}\"{prefix}_index_bucket\"(int8[])"
" RETURNS int8[] AS $$"
" SELECT ARRAY(SELECT DISTINCT"
" unnest($1) >> {way_node_index_id_shift})"
"$$ LANGUAGE SQL IMMUTABLE",
"CREATE INDEX \"{prefix}_ways_nodes_bucket_idx\""
" ON {schema}\"{prefix}_ways\""
" USING GIN ({schema}\"{prefix}_index_bucket\"(nodes))"
" WITH (fastupdate = off) {index_tablespace}"};
}
sql.create_fw_dep_indexes = {
"CREATE OR REPLACE FUNCTION"
" {schema}\"{prefix}_index_bucket\"(int8[])"
" RETURNS int8[] AS $$"
" SELECT ARRAY(SELECT DISTINCT"
" unnest($1) >> {way_node_index_id_shift})"
"$$ LANGUAGE SQL IMMUTABLE",
"CREATE INDEX \"{prefix}_ways_nodes_bucket_idx\""
" ON {schema}\"{prefix}_ways\""
" USING GIN ({schema}\"{prefix}_index_bucket\"(nodes))"
" WITH (fastupdate = off) {index_tablespace}"};

return sql;
}
Expand Down Expand Up @@ -1272,7 +1243,6 @@ middle_pgsql_t::middle_pgsql_t(std::shared_ptr<thread_pool_t> thread_pool,
m_db_copy(m_copy_thread), m_append(options->append)
{
m_store_options.with_attributes = options->extra_attributes;
m_store_options.way_node_index_id_shift = options->way_node_index_id_shift;

if (options->middle_with_nodes) {
m_store_options.nodes = true;
Expand All @@ -1289,15 +1259,8 @@ middle_pgsql_t::middle_pgsql_t(std::shared_ptr<thread_pool_t> thread_pool,

log_debug("Mid: pgsql, cache={}", options->cache);

bool const has_bucket_index =
check_bucket_index(&m_db_connection, options->prefix);

if (!has_bucket_index && options->append) {
log_debug("You don't have a bucket index. See manual for details.");
}

m_tables.nodes() = table_desc{*options, sql_for_nodes(m_store_options)};
m_tables.ways() = table_desc{*options, sql_for_ways(m_store_options)};
m_tables.ways() = table_desc{*options, sql_for_ways()};
m_tables.relations() = table_desc{*options, sql_for_relations()};

m_users_table = table_desc{*options, sql_for_users(m_store_options)};
Expand All @@ -1310,8 +1273,6 @@ void middle_pgsql_t::set_requirements(
log_debug(" nodes: {}", m_store_options.nodes);
log_debug(" untagged_nodes: {}", m_store_options.untagged_nodes);
log_debug(" use_flat_node_file: {}", m_store_options.use_flat_node_file);
log_debug(" way_node_index_id_shift: {}",
m_store_options.way_node_index_id_shift);
log_debug(" with_attributes: {}", m_store_options.with_attributes);
}

Expand Down
3 changes: 0 additions & 3 deletions src/middle-pgsql.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,6 @@ struct middle_pgsql_options
// Store untagged nodes also (set in addition to nodes=true).
bool untagged_nodes = false;

// Bit shift used in way node index
uint8_t way_node_index_id_shift = 5;

// Use a flat node file
bool use_flat_node_file = false;

Expand Down
8 changes: 0 additions & 8 deletions src/options.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,14 +107,6 @@ struct options_t

unsigned int num_procs = 1;

/**
* How many bits should the node id be shifted for the way node index?
* The result is a lossy index which is significantly smaller.
* See https://osm2pgsql.org/doc/manual.html#bucket-index-for-slim-mode
* Use 0 to use a classic loss-less GIN index.
*/
uint8_t way_node_index_id_shift = 5;

/**
* Middle database format:
* 0 = non-slim mode, no database middle (ram middle)
Expand Down

0 comments on commit 7406ff3

Please sign in to comment.