Skip to content

Commit

Permalink
Table extraction for basic queries (#105)
Browse files Browse the repository at this point in the history
  • Loading branch information
crisptrutski authored Oct 22, 2024
1 parent 12e2cd8 commit 41f6132
Show file tree
Hide file tree
Showing 15 changed files with 219 additions and 23 deletions.
137 changes: 137 additions & 0 deletions java/com/metabase/macaw/BasicTableExtractor.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
package com.metabase.macaw;

import net.sf.jsqlparser.schema.Table;
import net.sf.jsqlparser.statement.Statement;
import net.sf.jsqlparser.statement.select.*;

import java.util.*;
import java.util.function.Consumer;

/**
* Return a simplified query representation we can work with further, if possible.
*/
@SuppressWarnings({
"PatternVariableCanBeUsed", "IfCanBeSwitch"} // don't force a newer JVM version
)
public final class BasicTableExtractor {

public enum ErrorType {
UNSUPPORTED_EXPRESSION,
INVALID_QUERY,
UNABLE_TO_PARSE
}

public static class AnalysisError extends RuntimeException {
public final ErrorType errorType;
public final Throwable cause;

public AnalysisError(ErrorType errorType) {
this.errorType = errorType;
this.cause = null;
}

public AnalysisError(ErrorType errorType, Throwable cause) {
this.errorType = errorType;
this.cause = cause;
}
}

public static Set<Table> getTables(Statement statement) {
try {
if (statement instanceof Select) {
return getTables((Select) statement);
}
// This is not a query, it's probably a statement.
throw new AnalysisError(ErrorType.INVALID_QUERY);
} catch (IllegalArgumentException e) {
// This query uses features that we do not yet support translating.
throw new AnalysisError(ErrorType.UNABLE_TO_PARSE, e);
}
}

private static Set<Table> getTables(Select select) {
if (select instanceof PlainSelect) {
return getTables(select.getPlainSelect());
} else {
// We don't support more complex kinds of select statements yet.
throw new AnalysisError(ErrorType.UNSUPPORTED_EXPRESSION);
}
}

private static Set<Table> getTables(PlainSelect select) {
// these are fine, but irrelevant
//
// - select.getDistinct()
// - select.getHaving()
// - select.getKsqlWindow()
// - select.getMySqlHintStraightJoin()
// - select.getMySqlSqlCacheFlag()
// - select.getLimitBy()
// - select.getOffset()
// - select.getOptimizeFor()
// - select.getOracleHint()
// - select.getTop()
// - select.getWait()

// Not currently parseable
if (select.getLateralViews() != null ||
select.getOracleHierarchical() != null ||
select.getWindowDefinitions() != null) {
throw new AnalysisError(ErrorType.UNABLE_TO_PARSE);
}

// Not currently supported
if (select.getWithItemsList() != null) {
throw new AnalysisError(ErrorType.UNSUPPORTED_EXPRESSION);
}

// any of these - nope out
if (select.getFetch() != null ||
select.getFirst() != null ||
select.getForClause() != null ||
select.getForMode() != null ||
select.getForUpdateTable() != null ||
select.getForXmlPath() != null ||
select.getIntoTables() != null ||
select.getIsolation() != null ||
select.getSkip() != null) {
throw new AnalysisError(ErrorType.INVALID_QUERY);
}

Set<Table> tables = new HashSet<>();

for (SelectItem item : select.getSelectItems()) {
if (item.getExpression() instanceof ParenthesedSelect) {
// Do not allow sub-selects.
throw new AnalysisError(ErrorType.UNSUPPORTED_EXPRESSION);
}
}

Consumer<FromItem> pushOrThrow = (FromItem item) -> {
if (item instanceof Table) {
Table table = (Table) item;
if (table.getName().contains("*")) {
// Do not allow table wildcards.
throw new AnalysisError(ErrorType.INVALID_QUERY);
}
tables.add(table);
} else if (item instanceof TableFunction) {
// Do not allow dynamic tables
throw new AnalysisError(ErrorType.INVALID_QUERY);
} else {
// Only allow simple table references.
throw new AnalysisError(ErrorType.UNSUPPORTED_EXPRESSION);
}
};

if (select.getFromItem() != null) {
pushOrThrow.accept(select.getFromItem());
List<Join> joins = select.getJoins();
if (joins != null) {
joins.stream().map(Join::getFromItem).forEach(pushOrThrow);
}
}
return tables;
}

}
32 changes: 28 additions & 4 deletions src/macaw/core.clj
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,12 @@
[macaw.collect :as collect]
[macaw.rewrite :as rewrite])
(:import
(com.metabase.macaw AstWalker$Scope)
(com.metabase.macaw AstWalker$Scope BasicTableExtractor BasicTableExtractor$AnalysisError)
(java.util.function Consumer)
(net.sf.jsqlparser JSQLParserException)
(net.sf.jsqlparser.parser CCJSqlParser CCJSqlParserUtil)
(net.sf.jsqlparser.parser.feature Feature)))
(net.sf.jsqlparser.parser.feature Feature)
(net.sf.jsqlparser.schema Table)))

(set! *warn-on-reflection* true)

Expand Down Expand Up @@ -83,15 +85,37 @@
(defn- raw-components [xs]
(into (empty xs) (keep :component) xs))

(defn- table->identifier
"Given a table object, return a map with the schema and table names."
[^Table t]
(if (.getSchemaName t)
{:schema (.getSchemaName t)
:table (.getName t)}
{:table (.getName t)}))

(defn- ->macaw-error [^BasicTableExtractor$AnalysisError analysis-error]
(keyword "macaw.error" (-> (.-errorType analysis-error)
str/lower-case
(str/replace #"_" "-"))))

(defmacro ^:private kw-or-tables [expr]
`(try (map table->identifier ~expr)
(catch BasicTableExtractor$AnalysisError e#
(->macaw-error e#))
(catch JSQLParserException _e#
:macaw.error/unable-to-parse)))

(defn query->tables
"Given a parsed query (i.e., a [subclass of] `Statement`) return a set of all the table identifiers found within it."
[sql & {:keys [mode] :as opts}]
(case mode
:ast-walker-1 (-> (parsed-query sql)
:ast-walker-1 (-> (parsed-query sql opts)
(query->components opts)
:tables
raw-components)
:basic-select :macaw.error/not-implemented))
:basic-select (->> (parsed-query sql opts)
(BasicTableExtractor/getTables)
kw-or-tables)))

(defn replace-names
"Given an SQL query, apply the given table, column, and schema renames.
Expand Down
20 changes: 14 additions & 6 deletions test/macaw/acceptance_test.clj
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,12 @@
:basic-select})

(def global-overrides
{:basic-select :macaw.error/not-implemented})
{})

(def ns-overrides
{:basic-select {"compound" :macaw.error/unsupported-expression
"mutation" :macaw.error/invalid-query
"dynamic" :macaw.error/invalid-query}})

(def ^:private merged-fixtures-file "test/resources/acceptance/queries.sql")

Expand Down Expand Up @@ -71,8 +76,9 @@
(when (keyword? x)
x))

(defn- get-override [expected-cs mode ck]
(defn- get-override [expected-cs mode fixture ck]
(or (get global-overrides mode)
(get-in ns-overrides [mode (namespace fixture)])
(get-in expected-cs [:overrides mode :error])
(get-in expected-cs [:overrides :error])
(get-in expected-cs [:overrides mode ck])
Expand Down Expand Up @@ -104,16 +110,17 @@
(doseq [[ck cv] (dissoc expected-cs :overrides :error)]
(testing (str prefix " analysis is correct: " (name ck))
(let [actual-cv (get-component cs ck)
override (get-override expected-cs m ck)]
override (get-override expected-cs m fixture ck)]
(validate-analysis cv override actual-cv))))))
;; Testing path for newer modes.
(let [correct (:error expected-cs (:tables expected-cs))
override (get-override expected-cs m :tables)
override (get-override expected-cs m fixture :tables)
;; For now, we only support (and test) :tables
tables (testing (str prefix " table analysis does not throw for mode " m)
(is (ct/tables sql opts)))]
(testing (str prefix " table analysis is correct for mode " m)
(validate-analysis correct override tables)))))
(when-not (and (nil? correct) (nil? override))
(testing (str prefix " table analysis is correct for mode " m)
(validate-analysis correct override tables))))))

(when renames
(let [broken? (:broken? renames)
Expand Down Expand Up @@ -178,6 +185,7 @@
(str/trim
(ct/query-fixture fixture))))))

(test-fixture :dynamic/generate-series)

(test-fixture :compound/cte)
(test-fixture :compound/cte-nonambiguous)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
:overrides
{:basic-select
;; do not allow wildcard selects
{:error :macaw.error/illegal-expression}
{:error :macaw.error/invalid-query}

;; Just plain old wacky
:source-columns [{:table "`project_id.dataset_id.table_*`", :column "_TABLE_SUFFIX"}]
Expand Down
4 changes: 3 additions & 1 deletion test/resources/acceptance/no_source_columns.analysis.edn
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
{:tables #{{:table "foo"}
{:table "bar"}}
:source-columns #{}}
:source-columns #{}
:overrides
{:basic-select {:error :macaw.error/unsupported-expression}}}
33 changes: 28 additions & 5 deletions test/resources/acceptance/queries.sql
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ FROM employees
WHERE salary > 60000
GROUP BY department;

-- FIXTURE: cycle/cte
-- FIXTURE: compound/cycle-cte
-- we eventually want to check that the fields are cycled twice in the attribution of the final outputs.
WITH b AS (
SELECT
Expand All @@ -182,7 +182,7 @@ SELECT
z
FROM c;

-- FIXTURE: duplicate-scopes
-- FIXTURE: compound/duplicate-scopes
-- This is a minimal example to illustrate why we need to put an id on each scope.
-- As we add more complex compound query tests this will become redundant, and we can then delete it
-- we eventually want to check that the fields are cycled twice in the attribution of the final outputs.
Expand All @@ -193,7 +193,7 @@ SELECT
c.x
FROM b, c;

-- FIXTURE: generate-series
-- FIXTURE: dynamic/generate-series
SELECT t.day::date AS date
FROM generate_series(timestamp '2021-01-01', now(), interval '1 day') AS t(day)

Expand Down Expand Up @@ -225,7 +225,7 @@ with final as (

select * from final

-- FIXTURE: shadow/subselect
-- FIXTURE: compound/shadow-subselect
SELECT
e.id,
e.name,
Expand All @@ -240,7 +240,7 @@ FROM (
GROUP BY first_name, last_name, department_id
) e JOIN departments d ON d.id = e.department_id;

-- FIXTURE: simple/select-into
-- FIXTURE: mutation/select-into
SELECT id, name
INTO new_user_summary
FROM user;
Expand Down Expand Up @@ -294,3 +294,26 @@ EXEC sp_executesql @SQL

-- FIXTURE: string-concat
SELECT x || y AS z FROM t

-- FIXTURE: mutation/alter-table
ALTER TABLE users
ADD COLUMN email VARCHAR(255);

-- FIXTURE: mutation/drop-table
DROP TABLE IF EXISTS users;

-- FIXTURE: mutation/truncate-table
TRUNCATE TABLE users;

-- FIXTURE: mutation/insert
INSERT INTO users (name, email)
VALUES ('Alice', '[email protected]');

-- FIXTURE: mutation/update
UPDATE users
SET email = '[email protected]'
WHERE name = 'Alice';

-- FIXTURE: mutation/delete
DELETE FROM users
WHERE name = 'Alice';
4 changes: 3 additions & 1 deletion test/resources/acceptance/reserved__final.analysis.edn
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
{:source-columns [{:table "invoice" :column "amount_paid_cents"}
{:table "invoice" :column "id"}
{:table "invoice" :column "is_deleted"}]
:tables [{:table "invoice"}]}
:tables [{:table "invoice"}]
:overrides
{:basic-select :macaw.error/unsupported-expression}}
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
{:has-wildcard? true
{:has-wildcard? #{true}
:source-columns []
:tables [{:table "t"}]}
4 changes: 2 additions & 2 deletions test/resources/acceptance/sqlserver__execute.analysis.edn
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
{:error :macaw.error/illegal-expression
:overrides :macaw.error/unable-to-parse}
{:error :macaw.error/invalid-query
:overrides {:ast-walker-1 :macaw.error/unable-to-parse}}
4 changes: 2 additions & 2 deletions test/resources/acceptance/sqlserver__executesql.analysis.edn
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
{:error :macaw.error/illegal-expression
:overrides :macaw.error/unable-to-parse}
{:error :macaw.error/invalid-query
:overrides {:ast-walker-1 :macaw.error/unable-to-parse}}

0 comments on commit 41f6132

Please sign in to comment.