diff --git a/java/com/metabase/macaw/BasicTableExtractor.java b/java/com/metabase/macaw/BasicTableExtractor.java new file mode 100644 index 0000000..8da5606 --- /dev/null +++ b/java/com/metabase/macaw/BasicTableExtractor.java @@ -0,0 +1,137 @@ +package com.metabase.macaw; + +import net.sf.jsqlparser.schema.Table; +import net.sf.jsqlparser.statement.Statement; +import net.sf.jsqlparser.statement.select.*; + +import java.util.*; +import java.util.function.Consumer; + +/** + * Return a simplified query representation we can work with further, if possible. + */ +@SuppressWarnings({ + "PatternVariableCanBeUsed", "IfCanBeSwitch"} // don't force a newer JVM version +) +public final class BasicTableExtractor { + + public enum ErrorType { + UNSUPPORTED_EXPRESSION, + INVALID_QUERY, + UNABLE_TO_PARSE + } + + public static class AnalysisError extends RuntimeException { + public final ErrorType errorType; + public final Throwable cause; + + public AnalysisError(ErrorType errorType) { + this.errorType = errorType; + this.cause = null; + } + + public AnalysisError(ErrorType errorType, Throwable cause) { + this.errorType = errorType; + this.cause = cause; + } + } + + public static Set getTables(Statement statement) { + try { + if (statement instanceof Select) { + return getTables((Select) statement); + } + // This is not a query, it's probably a statement. + throw new AnalysisError(ErrorType.INVALID_QUERY); + } catch (IllegalArgumentException e) { + // This query uses features that we do not yet support translating. + throw new AnalysisError(ErrorType.UNABLE_TO_PARSE, e); + } + } + + private static Set
getTables(Select select) { + if (select instanceof PlainSelect) { + return getTables(select.getPlainSelect()); + } else { + // We don't support more complex kinds of select statements yet. + throw new AnalysisError(ErrorType.UNSUPPORTED_EXPRESSION); + } + } + + private static Set
getTables(PlainSelect select) { + // these are fine, but irrelevant + // + // - select.getDistinct() + // - select.getHaving() + // - select.getKsqlWindow() + // - select.getMySqlHintStraightJoin() + // - select.getMySqlSqlCacheFlag() + // - select.getLimitBy() + // - select.getOffset() + // - select.getOptimizeFor() + // - select.getOracleHint() + // - select.getTop() + // - select.getWait() + + // Not currently parseable + if (select.getLateralViews() != null || + select.getOracleHierarchical() != null || + select.getWindowDefinitions() != null) { + throw new AnalysisError(ErrorType.UNABLE_TO_PARSE); + } + + // Not currently supported + if (select.getWithItemsList() != null) { + throw new AnalysisError(ErrorType.UNSUPPORTED_EXPRESSION); + } + + // any of these - nope out + if (select.getFetch() != null || + select.getFirst() != null || + select.getForClause() != null || + select.getForMode() != null || + select.getForUpdateTable() != null || + select.getForXmlPath() != null || + select.getIntoTables() != null || + select.getIsolation() != null || + select.getSkip() != null) { + throw new AnalysisError(ErrorType.INVALID_QUERY); + } + + Set
tables = new HashSet<>(); + + for (SelectItem item : select.getSelectItems()) { + if (item.getExpression() instanceof ParenthesedSelect) { + // Do not allow sub-selects. + throw new AnalysisError(ErrorType.UNSUPPORTED_EXPRESSION); + } + } + + Consumer pushOrThrow = (FromItem item) -> { + if (item instanceof Table) { + Table table = (Table) item; + if (table.getName().contains("*")) { + // Do not allow table wildcards. + throw new AnalysisError(ErrorType.INVALID_QUERY); + } + tables.add(table); + } else if (item instanceof TableFunction) { + // Do not allow dynamic tables + throw new AnalysisError(ErrorType.INVALID_QUERY); + } else { + // Only allow simple table references. + throw new AnalysisError(ErrorType.UNSUPPORTED_EXPRESSION); + } + }; + + if (select.getFromItem() != null) { + pushOrThrow.accept(select.getFromItem()); + List joins = select.getJoins(); + if (joins != null) { + joins.stream().map(Join::getFromItem).forEach(pushOrThrow); + } + } + return tables; + } + +} diff --git a/src/macaw/core.clj b/src/macaw/core.clj index cb2e4d0..2e5d05f 100644 --- a/src/macaw/core.clj +++ b/src/macaw/core.clj @@ -5,10 +5,12 @@ [macaw.collect :as collect] [macaw.rewrite :as rewrite]) (:import - (com.metabase.macaw AstWalker$Scope) + (com.metabase.macaw AstWalker$Scope BasicTableExtractor BasicTableExtractor$AnalysisError) (java.util.function Consumer) + (net.sf.jsqlparser JSQLParserException) (net.sf.jsqlparser.parser CCJSqlParser CCJSqlParserUtil) - (net.sf.jsqlparser.parser.feature Feature))) + (net.sf.jsqlparser.parser.feature Feature) + (net.sf.jsqlparser.schema Table))) (set! *warn-on-reflection* true) @@ -83,15 +85,37 @@ (defn- raw-components [xs] (into (empty xs) (keep :component) xs)) +(defn- table->identifier + "Given a table object, return a map with the schema and table names." + [^Table t] + (if (.getSchemaName t) + {:schema (.getSchemaName t) + :table (.getName t)} + {:table (.getName t)})) + +(defn- ->macaw-error [^BasicTableExtractor$AnalysisError analysis-error] + (keyword "macaw.error" (-> (.-errorType analysis-error) + str/lower-case + (str/replace #"_" "-")))) + +(defmacro ^:private kw-or-tables [expr] + `(try (map table->identifier ~expr) + (catch BasicTableExtractor$AnalysisError e# + (->macaw-error e#)) + (catch JSQLParserException _e# + :macaw.error/unable-to-parse))) + (defn query->tables "Given a parsed query (i.e., a [subclass of] `Statement`) return a set of all the table identifiers found within it." [sql & {:keys [mode] :as opts}] (case mode - :ast-walker-1 (-> (parsed-query sql) + :ast-walker-1 (-> (parsed-query sql opts) (query->components opts) :tables raw-components) - :basic-select :macaw.error/not-implemented)) + :basic-select (->> (parsed-query sql opts) + (BasicTableExtractor/getTables) + kw-or-tables))) (defn replace-names "Given an SQL query, apply the given table, column, and schema renames. diff --git a/test/macaw/acceptance_test.clj b/test/macaw/acceptance_test.clj index 9b67714..df12cc9 100644 --- a/test/macaw/acceptance_test.clj +++ b/test/macaw/acceptance_test.clj @@ -40,7 +40,12 @@ :basic-select}) (def global-overrides - {:basic-select :macaw.error/not-implemented}) + {}) + +(def ns-overrides + {:basic-select {"compound" :macaw.error/unsupported-expression + "mutation" :macaw.error/invalid-query + "dynamic" :macaw.error/invalid-query}}) (def ^:private merged-fixtures-file "test/resources/acceptance/queries.sql") @@ -71,8 +76,9 @@ (when (keyword? x) x)) -(defn- get-override [expected-cs mode ck] +(defn- get-override [expected-cs mode fixture ck] (or (get global-overrides mode) + (get-in ns-overrides [mode (namespace fixture)]) (get-in expected-cs [:overrides mode :error]) (get-in expected-cs [:overrides :error]) (get-in expected-cs [:overrides mode ck]) @@ -104,16 +110,17 @@ (doseq [[ck cv] (dissoc expected-cs :overrides :error)] (testing (str prefix " analysis is correct: " (name ck)) (let [actual-cv (get-component cs ck) - override (get-override expected-cs m ck)] + override (get-override expected-cs m fixture ck)] (validate-analysis cv override actual-cv)))))) ;; Testing path for newer modes. (let [correct (:error expected-cs (:tables expected-cs)) - override (get-override expected-cs m :tables) + override (get-override expected-cs m fixture :tables) ;; For now, we only support (and test) :tables tables (testing (str prefix " table analysis does not throw for mode " m) (is (ct/tables sql opts)))] - (testing (str prefix " table analysis is correct for mode " m) - (validate-analysis correct override tables))))) + (when-not (and (nil? correct) (nil? override)) + (testing (str prefix " table analysis is correct for mode " m) + (validate-analysis correct override tables)))))) (when renames (let [broken? (:broken? renames) @@ -178,6 +185,7 @@ (str/trim (ct/query-fixture fixture)))))) + (test-fixture :dynamic/generate-series) (test-fixture :compound/cte) (test-fixture :compound/cte-nonambiguous) diff --git a/test/resources/acceptance/bigquery__table_wildcard.analysis.edn b/test/resources/acceptance/bigquery__table_wildcard.analysis.edn index d7c0d12..9fb9ca9 100644 --- a/test/resources/acceptance/bigquery__table_wildcard.analysis.edn +++ b/test/resources/acceptance/bigquery__table_wildcard.analysis.edn @@ -4,7 +4,7 @@ :overrides {:basic-select ;; do not allow wildcard selects - {:error :macaw.error/illegal-expression} + {:error :macaw.error/invalid-query} ;; Just plain old wacky :source-columns [{:table "`project_id.dataset_id.table_*`", :column "_TABLE_SUFFIX"}] diff --git a/test/resources/acceptance/cycle__cte.analysis.edn b/test/resources/acceptance/compound__cycle_cte.analysis.edn similarity index 100% rename from test/resources/acceptance/cycle__cte.analysis.edn rename to test/resources/acceptance/compound__cycle_cte.analysis.edn diff --git a/test/resources/acceptance/duplicate_scopes.analysis.edn b/test/resources/acceptance/compound__duplicate_scopes.analysis.edn similarity index 100% rename from test/resources/acceptance/duplicate_scopes.analysis.edn rename to test/resources/acceptance/compound__duplicate_scopes.analysis.edn diff --git a/test/resources/acceptance/duplicate_scopes.renames.edn b/test/resources/acceptance/compound__duplicate_scopes.renames.edn similarity index 100% rename from test/resources/acceptance/duplicate_scopes.renames.edn rename to test/resources/acceptance/compound__duplicate_scopes.renames.edn diff --git a/test/resources/acceptance/duplicate_scopes.rewritten.sql b/test/resources/acceptance/compound__duplicate_scopes.rewritten.sql similarity index 100% rename from test/resources/acceptance/duplicate_scopes.rewritten.sql rename to test/resources/acceptance/compound__duplicate_scopes.rewritten.sql diff --git a/test/resources/acceptance/shadow__subselect.analysis.edn b/test/resources/acceptance/compound__shadow_subselect.analysis.edn similarity index 100% rename from test/resources/acceptance/shadow__subselect.analysis.edn rename to test/resources/acceptance/compound__shadow_subselect.analysis.edn diff --git a/test/resources/acceptance/no_source_columns.analysis.edn b/test/resources/acceptance/no_source_columns.analysis.edn index ead7e88..56261e7 100644 --- a/test/resources/acceptance/no_source_columns.analysis.edn +++ b/test/resources/acceptance/no_source_columns.analysis.edn @@ -1,3 +1,5 @@ {:tables #{{:table "foo"} {:table "bar"}} - :source-columns #{}} + :source-columns #{} + :overrides + {:basic-select {:error :macaw.error/unsupported-expression}}} diff --git a/test/resources/acceptance/queries.sql b/test/resources/acceptance/queries.sql index 01d1ebd..858f0a7 100644 --- a/test/resources/acceptance/queries.sql +++ b/test/resources/acceptance/queries.sql @@ -160,7 +160,7 @@ FROM employees WHERE salary > 60000 GROUP BY department; --- FIXTURE: cycle/cte +-- FIXTURE: compound/cycle-cte -- we eventually want to check that the fields are cycled twice in the attribution of the final outputs. WITH b AS ( SELECT @@ -182,7 +182,7 @@ SELECT z FROM c; --- FIXTURE: duplicate-scopes +-- FIXTURE: compound/duplicate-scopes -- This is a minimal example to illustrate why we need to put an id on each scope. -- As we add more complex compound query tests this will become redundant, and we can then delete it -- we eventually want to check that the fields are cycled twice in the attribution of the final outputs. @@ -193,7 +193,7 @@ SELECT c.x FROM b, c; --- FIXTURE: generate-series +-- FIXTURE: dynamic/generate-series SELECT t.day::date AS date FROM generate_series(timestamp '2021-01-01', now(), interval '1 day') AS t(day) @@ -225,7 +225,7 @@ with final as ( select * from final --- FIXTURE: shadow/subselect +-- FIXTURE: compound/shadow-subselect SELECT e.id, e.name, @@ -240,7 +240,7 @@ FROM ( GROUP BY first_name, last_name, department_id ) e JOIN departments d ON d.id = e.department_id; --- FIXTURE: simple/select-into +-- FIXTURE: mutation/select-into SELECT id, name INTO new_user_summary FROM user; @@ -294,3 +294,26 @@ EXEC sp_executesql @SQL -- FIXTURE: string-concat SELECT x || y AS z FROM t + +-- FIXTURE: mutation/alter-table +ALTER TABLE users +ADD COLUMN email VARCHAR(255); + +-- FIXTURE: mutation/drop-table +DROP TABLE IF EXISTS users; + +-- FIXTURE: mutation/truncate-table +TRUNCATE TABLE users; + +-- FIXTURE: mutation/insert +INSERT INTO users (name, email) +VALUES ('Alice', 'alice@example.com'); + +-- FIXTURE: mutation/update +UPDATE users +SET email = 'newemail@example.com' +WHERE name = 'Alice'; + +-- FIXTURE: mutation/delete +DELETE FROM users +WHERE name = 'Alice'; diff --git a/test/resources/acceptance/reserved__final.analysis.edn b/test/resources/acceptance/reserved__final.analysis.edn index 3794d30..5263d57 100644 --- a/test/resources/acceptance/reserved__final.analysis.edn +++ b/test/resources/acceptance/reserved__final.analysis.edn @@ -1,4 +1,6 @@ {:source-columns [{:table "invoice" :column "amount_paid_cents"} {:table "invoice" :column "id"} {:table "invoice" :column "is_deleted"}] - :tables [{:table "invoice"}]} + :tables [{:table "invoice"}] + :overrides + {:basic-select :macaw.error/unsupported-expression}} diff --git a/test/resources/acceptance/simple__select_star.edn b/test/resources/acceptance/simple__select_star.analysis.edn similarity index 67% rename from test/resources/acceptance/simple__select_star.edn rename to test/resources/acceptance/simple__select_star.analysis.edn index 2c03803..413df04 100644 --- a/test/resources/acceptance/simple__select_star.edn +++ b/test/resources/acceptance/simple__select_star.analysis.edn @@ -1,3 +1,3 @@ -{:has-wildcard? true +{:has-wildcard? #{true} :source-columns [] :tables [{:table "t"}]} diff --git a/test/resources/acceptance/sqlserver__execute.analysis.edn b/test/resources/acceptance/sqlserver__execute.analysis.edn index d4f0a14..473e7d7 100644 --- a/test/resources/acceptance/sqlserver__execute.analysis.edn +++ b/test/resources/acceptance/sqlserver__execute.analysis.edn @@ -1,2 +1,2 @@ -{:error :macaw.error/illegal-expression - :overrides :macaw.error/unable-to-parse} +{:error :macaw.error/invalid-query + :overrides {:ast-walker-1 :macaw.error/unable-to-parse}} diff --git a/test/resources/acceptance/sqlserver__executesql.analysis.edn b/test/resources/acceptance/sqlserver__executesql.analysis.edn index d4f0a14..473e7d7 100644 --- a/test/resources/acceptance/sqlserver__executesql.analysis.edn +++ b/test/resources/acceptance/sqlserver__executesql.analysis.edn @@ -1,2 +1,2 @@ -{:error :macaw.error/illegal-expression - :overrides :macaw.error/unable-to-parse} +{:error :macaw.error/invalid-query + :overrides {:ast-walker-1 :macaw.error/unable-to-parse}}