Skip to content

Commit

Permalink
perf: remove person join from actors modal (#26623)
Browse files Browse the repository at this point in the history
Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com>
  • Loading branch information
aspicer and github-actions[bot] authored Dec 9, 2024
1 parent 2df8682 commit 4a56994
Show file tree
Hide file tree
Showing 24 changed files with 763 additions and 3,659 deletions.
4 changes: 2 additions & 2 deletions frontend/src/scenes/trends/persons-modal/personsModalLogic.ts
Original file line number Diff line number Diff line change
Expand Up @@ -331,7 +331,7 @@ export const personsModalLogic = kea<personsModalLogicType>([
() => [(_, p) => p.additionalSelect],
(additionalSelect: PersonModalLogicProps['additionalSelect']): string[] => {
const extra = Object.values(additionalSelect || {})
return ['actor', 'created_at', ...extra]
return ['actor', ...extra]
},
],
actorsQuery: [
Expand All @@ -344,7 +344,7 @@ export const personsModalLogic = kea<personsModalLogicType>([
kind: NodeKind.ActorsQuery,
source: query,
select: selectFields,
orderBy: orderBy || ['created_at DESC'],
orderBy: orderBy || [],
search: searchTerm,
}
},
Expand Down
3 changes: 3 additions & 0 deletions posthog/hogql/ast.py
Original file line number Diff line number Diff line change
Expand Up @@ -840,6 +840,9 @@ class SelectSetQuery(Expr):
initial_select_query: Union[SelectQuery, "SelectSetQuery"]
subsequent_select_queries: list[SelectSetNode]

def select_queries(self):
return [self.initial_select_query] + [node.select_query for node in self.subsequent_select_queries]

@classmethod
def create_from_queries(
cls, queries: Sequence[Union[SelectQuery, "SelectSetQuery"]], set_operator: SetOperator
Expand Down
2 changes: 1 addition & 1 deletion posthog/hogql_queries/actor_strategies.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def get_actors(self, actor_ids, order_by: str = "") -> dict[str, dict]:
return person_uuid_to_person

def input_columns(self) -> list[str]:
return ["person", "id", "created_at", "person.$delete"]
return ["person", "id", "person.$delete"]

def filter_conditions(self) -> list[ast.Expr]:
where_exprs: list[ast.Expr] = []
Expand Down
61 changes: 47 additions & 14 deletions posthog/hogql_queries/actors_query_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@

from posthog.hogql import ast
from posthog.hogql.constants import HogQLGlobalSettings, HogQLQuerySettings
from posthog.hogql.context import HogQLContext
from posthog.hogql.parser import parse_expr, parse_order_expr
from posthog.hogql.printer import print_ast
from posthog.hogql.property import has_aggregation
from posthog.hogql.resolver_utils import extract_select_queries
from posthog.hogql_queries.actor_strategies import ActorStrategy, PersonStrategy, GroupStrategy
Expand Down Expand Up @@ -143,7 +145,7 @@ def input_columns(self) -> list[str]:
return self.strategy.input_columns()

# TODO: Figure out a more sure way of getting the actor id than using the alias or chain name
def source_id_column(self, source_query: ast.SelectQuery | ast.SelectSetQuery) -> list[str]:
def source_id_column(self, source_query: ast.SelectQuery | ast.SelectSetQuery) -> list[int | str]:
# Figure out the id column of the source query, first column that has id in the name
if isinstance(source_query, ast.SelectQuery):
select = source_query.select
Expand Down Expand Up @@ -248,15 +250,49 @@ def to_query(self) -> ast.SelectQuery:
order_by = []

with self.timings.measure("select"):
select_query = ast.SelectQuery(
select=columns,
where=where,
having=having,
group_by=group_by if has_any_aggregation else None,
order_by=order_by,
settings=HogQLQuerySettings(join_algorithm="auto", optimize_aggregation_in_order=True),
)
if not self.query.source:
join_expr = ast.JoinExpr(table=ast.Field(chain=[self.strategy.origin]))
select_query.select_from = ast.JoinExpr(table=ast.Field(chain=[self.strategy.origin]))
else:
assert self.source_query_runner is not None # For type checking
source_query = self.source_query_runner.to_actors_query()

source_id_chain = self.source_id_column(source_query)
source_alias = "source"

# If we aren't joining with the origin, give the source the origin_id
for source in (
[source_query] if isinstance(source_query, ast.SelectQuery) else source_query.select_queries()
):
source.select.append(
ast.Alias(alias=self.strategy.origin_id, expr=ast.Field(chain=source_id_chain))
)
select_query.select_from = ast.JoinExpr(
table=source_query,
alias=source_alias,
)

try:
print_ast(
select_query,
context=HogQLContext(
team=self.team,
enable_select_queries=True,
timings=self.timings,
modifiers=self.modifiers,
),
dialect="clickhouse",
)
return select_query
except Exception:
pass

origin = self.strategy.origin

join_on: ast.Expr = ast.CompareOperation(
Expand All @@ -277,7 +313,7 @@ def to_query(self) -> ast.SelectQuery:
exprs=[
join_on,
ast.CompareOperation(
left=ast.Field(chain=[origin, "id"]),
left=ast.Field(chain=[origin, self.strategy.origin_id]),
right=ast.SelectQuery(
select=[ast.Field(chain=[source_alias, *self.source_id_column(source_query)])],
select_from=ast.JoinExpr(table=source_query, alias=source_alias),
Expand All @@ -287,7 +323,12 @@ def to_query(self) -> ast.SelectQuery:
]
)

join_expr = ast.JoinExpr(
# remove id, which now comes from the origin
for source in (
[source_query] if isinstance(source_query, ast.SelectQuery) else source_query.select_queries()
):
source.select.pop()
select_query.select_from = ast.JoinExpr(
table=source_query,
alias=source_alias,
next_join=ast.JoinExpr(
Expand All @@ -300,15 +341,7 @@ def to_query(self) -> ast.SelectQuery:
),
)

return ast.SelectQuery(
select=columns,
select_from=join_expr,
where=where,
having=having,
group_by=group_by if has_any_aggregation else None,
order_by=order_by,
settings=HogQLQuerySettings(join_algorithm="auto", optimize_aggregation_in_order=True),
)
return select_query

def to_actors_query(self) -> ast.SelectQuery:
return self.to_query()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,12 +98,12 @@
# ---
# name: TestFOSSFunnel.test_funnel_conversion_window_seconds.1
'''
SELECT persons.id,
persons.id AS id,
persons.created_at AS created_at,
SELECT source.id,
source.id AS id,
1
FROM
(SELECT aggregation_target AS actor_id
(SELECT aggregation_target AS actor_id,
actor_id AS id
FROM
(SELECT aggregation_target AS aggregation_target,
steps AS steps,
Expand Down Expand Up @@ -184,14 +184,7 @@
and isNull(max(max_steps))))
WHERE ifNull(in(steps, [2, 3]), 0)
ORDER BY aggregation_target ASC) AS source
INNER JOIN
(SELECT argMax(toTimeZone(person.created_at, 'UTC'), person.version) AS created_at,
person.id AS id
FROM person
WHERE equals(person.team_id, 99999)
GROUP BY person.id
HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id)
ORDER BY persons.created_at DESC
ORDER BY source.id ASC
LIMIT 101
OFFSET 0 SETTINGS optimize_aggregation_in_order=1,
join_algorithm='auto',
Expand Down Expand Up @@ -508,12 +501,12 @@
# ---
# name: TestFOSSFunnel.test_funnel_with_property_groups.1
'''
SELECT persons.id,
persons.id AS id,
persons.created_at AS created_at,
SELECT source.id,
source.id AS id,
1
FROM
(SELECT aggregation_target AS actor_id
(SELECT aggregation_target AS actor_id,
actor_id AS id
FROM
(SELECT aggregation_target AS aggregation_target,
steps AS steps,
Expand Down Expand Up @@ -605,14 +598,7 @@
and isNull(max(max_steps))))
WHERE ifNull(in(steps, [1, 2, 3]), 0)
ORDER BY aggregation_target ASC) AS source
INNER JOIN
(SELECT argMax(toTimeZone(person.created_at, 'UTC'), person.version) AS created_at,
person.id AS id
FROM person
WHERE equals(person.team_id, 99999)
GROUP BY person.id
HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id)
ORDER BY persons.created_at DESC
ORDER BY source.id ASC
LIMIT 101
OFFSET 0 SETTINGS optimize_aggregation_in_order=1,
join_algorithm='auto',
Expand All @@ -628,12 +614,12 @@
# ---
# name: TestFOSSFunnel.test_funnel_with_property_groups.2
'''
SELECT persons.id,
persons.id AS id,
persons.created_at AS created_at,
SELECT source.id,
source.id AS id,
1
FROM
(SELECT aggregation_target AS actor_id
(SELECT aggregation_target AS actor_id,
actor_id AS id
FROM
(SELECT aggregation_target AS aggregation_target,
steps AS steps,
Expand Down Expand Up @@ -725,14 +711,7 @@
and isNull(max(max_steps))))
WHERE ifNull(in(steps, [2, 3]), 0)
ORDER BY aggregation_target ASC) AS source
INNER JOIN
(SELECT argMax(toTimeZone(person.created_at, 'UTC'), person.version) AS created_at,
person.id AS id
FROM person
WHERE equals(person.team_id, 99999)
GROUP BY person.id
HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id)
ORDER BY persons.created_at DESC
ORDER BY source.id ASC
LIMIT 101
OFFSET 0 SETTINGS optimize_aggregation_in_order=1,
join_algorithm='auto',
Expand All @@ -748,12 +727,12 @@
# ---
# name: TestFOSSFunnel.test_funnel_with_property_groups.3
'''
SELECT persons.id,
persons.id AS id,
persons.created_at AS created_at,
SELECT source.id,
source.id AS id,
1
FROM
(SELECT aggregation_target AS actor_id
(SELECT aggregation_target AS actor_id,
actor_id AS id
FROM
(SELECT aggregation_target AS aggregation_target,
steps AS steps,
Expand Down Expand Up @@ -845,14 +824,7 @@
and isNull(max(max_steps))))
WHERE ifNull(in(steps, [3]), 0)
ORDER BY aggregation_target ASC) AS source
INNER JOIN
(SELECT argMax(toTimeZone(person.created_at, 'UTC'), person.version) AS created_at,
person.id AS id
FROM person
WHERE equals(person.team_id, 99999)
GROUP BY person.id
HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id)
ORDER BY persons.created_at DESC
ORDER BY source.id ASC
LIMIT 101
OFFSET 0 SETTINGS optimize_aggregation_in_order=1,
join_algorithm='auto',
Expand Down Expand Up @@ -1727,10 +1699,11 @@
# ---
# name: TestFunnelGroupBreakdown.test_funnel_breakdown_group.1
'''
SELECT persons.id,
persons.id AS id
SELECT source.id,
source.id AS id
FROM
(SELECT aggregation_target AS actor_id
(SELECT aggregation_target AS actor_id,
actor_id AS id
FROM
(SELECT aggregation_target AS aggregation_target,
steps AS steps,
Expand Down Expand Up @@ -1848,13 +1821,7 @@
WHERE and(ifNull(in(steps, [1, 2, 3]), 0), ifNull(equals(arrayFlatten(array(prop)), arrayFlatten(array('finance'))), isNull(arrayFlatten(array(prop)))
and isNull(arrayFlatten(array('finance')))))
ORDER BY aggregation_target ASC) AS source
INNER JOIN
(SELECT person.id AS id
FROM person
WHERE equals(person.team_id, 99999)
GROUP BY person.id
HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id)
ORDER BY persons.id ASC
ORDER BY source.id ASC
LIMIT 101
OFFSET 0 SETTINGS optimize_aggregation_in_order=1,
join_algorithm='auto',
Expand All @@ -1870,10 +1837,11 @@
# ---
# name: TestFunnelGroupBreakdown.test_funnel_breakdown_group.2
'''
SELECT persons.id,
persons.id AS id
SELECT source.id,
source.id AS id
FROM
(SELECT aggregation_target AS actor_id
(SELECT aggregation_target AS actor_id,
actor_id AS id
FROM
(SELECT aggregation_target AS aggregation_target,
steps AS steps,
Expand Down Expand Up @@ -1991,13 +1959,7 @@
WHERE and(ifNull(in(steps, [2, 3]), 0), ifNull(equals(arrayFlatten(array(prop)), arrayFlatten(array('finance'))), isNull(arrayFlatten(array(prop)))
and isNull(arrayFlatten(array('finance')))))
ORDER BY aggregation_target ASC) AS source
INNER JOIN
(SELECT person.id AS id
FROM person
WHERE equals(person.team_id, 99999)
GROUP BY person.id
HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id)
ORDER BY persons.id ASC
ORDER BY source.id ASC
LIMIT 101
OFFSET 0 SETTINGS optimize_aggregation_in_order=1,
join_algorithm='auto',
Expand All @@ -2013,10 +1975,11 @@
# ---
# name: TestFunnelGroupBreakdown.test_funnel_breakdown_group.3
'''
SELECT persons.id,
persons.id AS id
SELECT source.id,
source.id AS id
FROM
(SELECT aggregation_target AS actor_id
(SELECT aggregation_target AS actor_id,
actor_id AS id
FROM
(SELECT aggregation_target AS aggregation_target,
steps AS steps,
Expand Down Expand Up @@ -2134,13 +2097,7 @@
WHERE and(ifNull(in(steps, [1, 2, 3]), 0), ifNull(equals(arrayFlatten(array(prop)), arrayFlatten(array('technology'))), isNull(arrayFlatten(array(prop)))
and isNull(arrayFlatten(array('technology')))))
ORDER BY aggregation_target ASC) AS source
INNER JOIN
(SELECT person.id AS id
FROM person
WHERE equals(person.team_id, 99999)
GROUP BY person.id
HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id)
ORDER BY persons.id ASC
ORDER BY source.id ASC
LIMIT 101
OFFSET 0 SETTINGS optimize_aggregation_in_order=1,
join_algorithm='auto',
Expand All @@ -2156,10 +2113,11 @@
# ---
# name: TestFunnelGroupBreakdown.test_funnel_breakdown_group.4
'''
SELECT persons.id,
persons.id AS id
SELECT source.id,
source.id AS id
FROM
(SELECT aggregation_target AS actor_id
(SELECT aggregation_target AS actor_id,
actor_id AS id
FROM
(SELECT aggregation_target AS aggregation_target,
steps AS steps,
Expand Down Expand Up @@ -2277,13 +2235,7 @@
WHERE and(ifNull(in(steps, [2, 3]), 0), ifNull(equals(arrayFlatten(array(prop)), arrayFlatten(array('technology'))), isNull(arrayFlatten(array(prop)))
and isNull(arrayFlatten(array('technology')))))
ORDER BY aggregation_target ASC) AS source
INNER JOIN
(SELECT person.id AS id
FROM person
WHERE equals(person.team_id, 99999)
GROUP BY person.id
HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id)
ORDER BY persons.id ASC
ORDER BY source.id ASC
LIMIT 101
OFFSET 0 SETTINGS optimize_aggregation_in_order=1,
join_algorithm='auto',
Expand Down
Loading

0 comments on commit 4a56994

Please sign in to comment.