Skip to content

Commit

Permalink
wip sql: eliminate noop projections in short_circuit optimizer
Browse files Browse the repository at this point in the history
  • Loading branch information
erikgrinaker committed Jul 14, 2024
1 parent 2184b59 commit ee70bfe
Show file tree
Hide file tree
Showing 7 changed files with 108 additions and 107 deletions.
4 changes: 3 additions & 1 deletion src/sql/execution/execute.rs
Original file line number Diff line number Diff line change
Expand Up @@ -86,10 +86,12 @@ pub fn execute(node: Node, txn: &impl Transaction) -> Result<Rows> {
}

Node::IndexLookup { table, column, values, alias: _ } => {
let column = table.columns.into_iter().nth(column).expect("invalid column").name;
let table = table.name;
source::lookup_index(txn, table, column, values)
}

Node::KeyLookup { table, keys, alias: _ } => source::lookup_key(txn, table, keys),
Node::KeyLookup { table, keys, alias: _ } => source::lookup_key(txn, table.name, keys),

Node::Limit { source, limit } => {
let source = execute(*source, txn)?;
Expand Down
20 changes: 15 additions & 5 deletions src/sql/planner/optimizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -238,14 +238,13 @@ pub(super) fn index_lookup(node: Node) -> Result<Node> {
};

// Extract the lookup values and expression from the cnf vector.
let (field, values) = cnf.remove(i).into_field_values().expect("field lookup failed");
let (column, values) = cnf.remove(i).into_field_values().expect("field lookup failed");

// Build the primary key or secondary index lookup node.
if field == table.primary_key {
node = Node::KeyLookup { table: table.name, keys: values, alias };
if column == table.primary_key {
node = Node::KeyLookup { table, keys: values, alias };
} else {
let column = table.columns.into_iter().nth(field).unwrap().name;
node = Node::IndexLookup { table: table.name, column, values, alias };
node = Node::IndexLookup { table, column, values, alias };
}

// If there's any remaining CNF expressions add a filter node for them.
Expand Down Expand Up @@ -356,6 +355,17 @@ pub(super) fn short_circuit(node: Node) -> Result<Node> {
Node::Nothing
}

// Remove projections that simply pass through the original columns.
Node::Projection { source, expressions, .. }
if source.size() == expressions.len()
&& expressions
.iter()
.enumerate()
.all(|(i, e)| matches!(e, Expression::Field(f, _) if i == *f)) =>
{
*source
}

node => node,
};

Expand Down
29 changes: 26 additions & 3 deletions src/sql/planner/plan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -109,9 +109,9 @@ pub enum Node {
/// Looks up the given values in a secondary index and emits matching rows.
/// NULL and NaN values are considered equal, to allow IS NULL and IS NAN
/// index lookups, as is -0.0 and 0.0.
IndexLookup { table: String, column: String, values: Vec<Value>, alias: Option<String> },
IndexLookup { table: Table, column: usize, values: Vec<Value>, alias: Option<String> },
/// Looks up the given primary keys and emits their rows.
KeyLookup { table: String, keys: Vec<Value>, alias: Option<String> },
KeyLookup { table: Table, keys: Vec<Value>, alias: Option<String> },
/// Only emits the first limit rows from the source, discards the rest.
Limit { source: Box<Node>, limit: usize },
/// Joins the left and right sources on the given predicate by buffering the
Expand Down Expand Up @@ -273,6 +273,27 @@ impl Node {
| Self::Scan { filter: None, .. }) => node,
})
}

/// Returns the column width of the node.
/// TODO: this can replace various size parameters, e.g. left_size and
/// right_size in NestedLoopJoin. Possibly also Scope.len().
pub fn size(&self) -> usize {
match &self {
Node::Aggregate { aggregates, group_by, .. } => aggregates.len() + group_by.len(),
Node::Filter { source, .. } => source.size(),
Node::HashJoin { left, right, .. } => left.size() + right.size(),
Node::IndexLookup { table, .. } => table.columns.len(),
Node::KeyLookup { table, .. } => table.columns.len(),
Node::Limit { source, .. } => source.size(),
Node::NestedLoopJoin { left, right, .. } => left.size() + right.size(),
Node::Nothing => 0,
Node::Offset { source, .. } => source.size(),
Node::Order { source, .. } => source.size(),
Node::Projection { expressions, .. } => expressions.len(),
Node::Scan { table, .. } => table.columns.len(),
Node::Values { rows } => rows.first().map(|row| row.len()).unwrap_or(0),
}
}
}

/// Formats the plan as an EXPLAIN tree.
Expand Down Expand Up @@ -370,6 +391,8 @@ impl Node {
right.format(f, prefix, false, true)?;
}
Self::IndexLookup { table, column, alias, values } => {
let column = &table.columns[*column].name;
let table = &table.name;
write!(f, "IndexLookup: {table}.{column}")?;
if let Some(alias) = alias {
write!(f, " as {alias}.{column}")?;
Expand All @@ -381,7 +404,7 @@ impl Node {
}
}
Self::KeyLookup { table, alias, keys } => {
write!(f, "KeyLookup: {table}")?;
write!(f, "KeyLookup: {}", table.name)?;
if let Some(alias) = alias {
write!(f, " as {alias}")?;
}
Expand Down
83 changes: 32 additions & 51 deletions src/sql/testscripts/queries/aggregate
Original file line number Diff line number Diff line change
Expand Up @@ -17,16 +17,13 @@
ok

# COUNT(*) returns the row count.
# TODO: revisit the plan here. This can be eliminated by short-circuiting optimizer.
[plan]> SELECT COUNT(*) FROM test
---
Projection: #0
└─ Aggregate: count(TRUE)
└─ Scan: test
Aggregate: count(TRUE)
└─ Scan: test
6

# COUNT works on constant values.
# TODO: revisit the plan here. This can be eliminated by short-circuiting optimizer.
[plan,header]> SELECT COUNT(NULL), COUNT(TRUE), COUNT(1), COUNT(3.14), COUNT(NAN), COUNT('')
---
Projection: #0, #1, #2, #3, #4, #5
Expand All @@ -38,43 +35,37 @@ Projection: #0, #1, #2, #3, #4, #5
# COUNT works on no rows.
[plan]> SELECT COUNT(id), COUNT("bool"), COUNT("float"), COUNT("string") FROM test WHERE false
---
Projection: #0, #1, #2, #3
└─ Aggregate: count(id), count(bool), count(float), count(string)
└─ Nothing
Aggregate: count(id), count(bool), count(float), count(string)
└─ Nothing
0, 0, 0, 0

# COUNT returns number of non-NULL values.
# TODO: revisit the plan here. This can be eliminated by short-circuiting optimizer.
[plan,header]> SELECT COUNT(id), COUNT("bool"), COUNT("float"), COUNT("string") FROM test
---
Projection: #0, #1, #2, #3
└─ Aggregate: count(id), count(bool), count(float), count(string)
└─ Scan: test
Aggregate: count(id), count(bool), count(float), count(string)
└─ Scan: test
, , ,
6, 3, 5, 4

# MAX works on constant values.
[plan]> SELECT MAX(NULL), MAX(TRUE), MAX(1), MAX(3.14), MAX(NAN), MAX('foo') FROM test
---
Projection: #0, #1, #2, #3, #4, #5
└─ Aggregate: max(NULL), max(TRUE), max(1), max(3.14), max(NaN), max(foo)
└─ Scan: test
Aggregate: max(NULL), max(TRUE), max(1), max(3.14), max(NaN), max(foo)
└─ Scan: test
NULL, TRUE, 1, 3.14, NaN, foo

# MAX works on no rows.
[plan]> SELECT MAX(id), MAX("bool"), MAX("float"), MAX("string") FROM test WHERE false
---
Projection: #0, #1, #2, #3
└─ Aggregate: max(id), max(bool), max(float), max(string)
└─ Nothing
Aggregate: max(id), max(bool), max(float), max(string)
└─ Nothing
NULL, NULL, NULL, NULL

# MAX returns the max value, or NULL if any value is NULL.
[plan]> SELECT MAX(id) FROM test
---
Projection: #0
└─ Aggregate: max(id)
└─ Scan: test
Aggregate: max(id)
└─ Scan: test
5

> SELECT MAX("bool") FROM test
Expand All @@ -98,25 +89,22 @@ inf
# MIN works on constant values.
[plan]> SELECT MIN(NULL), MIN(TRUE), MIN(1), MIN(3.14), MIN(NAN), MIN('foo') FROM test
---
Projection: #0, #1, #2, #3, #4, #5
└─ Aggregate: min(NULL), min(TRUE), min(1), min(3.14), min(NaN), min(foo)
└─ Scan: test
Aggregate: min(NULL), min(TRUE), min(1), min(3.14), min(NaN), min(foo)
└─ Scan: test
NULL, TRUE, 1, 3.14, NaN, foo

# MIN works on no rows.
[plan]> SELECT MIN(id), MIN("bool"), MIN("float"), MIN("string") FROM test WHERE false
---
Projection: #0, #1, #2, #3
└─ Aggregate: min(id), min(bool), min(float), min(string)
└─ Nothing
Aggregate: min(id), min(bool), min(float), min(string)
└─ Nothing
NULL, NULL, NULL, NULL

# MIN returns the min value, or NULL if any value is NULL.
[plan]> SELECT MIN(id) FROM test
---
Projection: #0
└─ Aggregate: min(id)
└─ Scan: test
Aggregate: min(id)
└─ Scan: test
0

> SELECT MIN("bool") FROM test
Expand All @@ -138,9 +126,8 @@ FALSE
# SUM works on constant values, but only numbers.
[plan]> SELECT SUM(NULL), SUM(1), SUM(3.14), SUM(NAN) FROM test
---
Projection: #0, #1, #2, #3
└─ Aggregate: sum(NULL), sum(1), sum(3.14), sum(NaN)
└─ Scan: test
Aggregate: sum(NULL), sum(1), sum(3.14), sum(NaN)
└─ Scan: test
NULL, 6, 18.84, NaN

!> SELECT SUM(TRUE)
Expand All @@ -152,18 +139,16 @@ Error: invalid input: can't add 0 and foo
# SUM works on no rows.
[plan]> SELECT SUM(id), SUM("bool"), SUM("float"), SUM("string") FROM test WHERE false
---
Projection: #0, #1, #2, #3
└─ Aggregate: sum(id), sum(bool), sum(float), sum(string)
└─ Nothing
Aggregate: sum(id), sum(bool), sum(float), sum(string)
└─ Nothing
NULL, NULL, NULL, NULL

# SUM returns the sum, or NULL if any value is NULL. Errors
# on booleans or strings.
[plan]> SELECT SUM(id) FROM test
---
Projection: #0
└─ Aggregate: sum(id)
└─ Scan: test
Aggregate: sum(id)
└─ Scan: test
15

!> SELECT SUM("bool") FROM test
Expand All @@ -187,9 +172,8 @@ Error: invalid input: can't add 0 and
# AVG works on constant values, but only numbers.
[plan]> SELECT AVG(NULL), AVG(1), AVG(3.14), AVG(NAN) FROM test
---
Projection: #0, #1, #2, #3
└─ Aggregate: avg(NULL), avg(1), avg(3.14), avg(NaN)
└─ Scan: test
Aggregate: avg(NULL), avg(1), avg(3.14), avg(NaN)
└─ Scan: test
NULL, 1, 3.14, NaN

!> SELECT AVG(TRUE)
Expand All @@ -201,18 +185,16 @@ Error: invalid input: can't add 0 and foo
# AVG works on no rows.
[plan]> SELECT AVG(id), AVG("bool"), AVG("float"), AVG("string") FROM test WHERE false
---
Projection: #0, #1, #2, #3
└─ Aggregate: avg(id), avg(bool), avg(float), avg(string)
└─ Nothing
Aggregate: avg(id), avg(bool), avg(float), avg(string)
└─ Nothing
NULL, NULL, NULL, NULL

# AVG returns the average, or NULL if any value is NULL. Errors
# on booleans or strings.
[plan]> SELECT AVG(id) FROM test
---
Projection: #0
└─ Aggregate: avg(id)
└─ Scan: test
Aggregate: avg(id)
└─ Scan: test
2

!> SELECT AVG("bool") FROM test
Expand All @@ -236,9 +218,8 @@ Error: invalid input: can't add 0 and
# Constant aggregates can be used with rows.
[plan]> SELECT COUNT(1), MIN(1), MAX(1), SUM(1), AVG(1) FROM test
---
Projection: #0, #1, #2, #3, #4
└─ Aggregate: count(1), min(1), max(1), sum(1), avg(1)
└─ Scan: test
Aggregate: count(1), min(1), max(1), sum(1), avg(1)
└─ Scan: test
6, 1, 1, 6, 1

# Constant aggregates can't be used with value rows.
Expand Down
Loading

0 comments on commit ee70bfe

Please sign in to comment.