-
Notifications
You must be signed in to change notification settings - Fork 1.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: Support Spark explode outer #11954
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -98,6 +98,7 @@ class Unnest : public Operator { | |
|
||
// Generate output for 'rowRange' represented rows. | ||
// @param rowRange Range of rows to process. | ||
template <bool isOuter> | ||
RowVectorPtr generateOutput(const RowRange& rowRange); | ||
|
||
// Invoked by generateOutput function above to generate the repeated output | ||
|
@@ -106,6 +107,9 @@ class Unnest : public Operator { | |
const RowRange& rowRange, | ||
std::vector<VectorPtr>& outputs); | ||
|
||
template <bool isOuter> | ||
void countMaxNumElementsPerRow(int32_t size); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Perhaps document this function and the 'isOuter' template parameter. |
||
|
||
struct UnnestChannelEncoding { | ||
BufferPtr indices; | ||
BufferPtr nulls; | ||
|
@@ -121,6 +125,7 @@ class Unnest : public Operator { | |
const RowRange& rowRange); | ||
|
||
// Invoked by generateOutput for the ordinality column. | ||
template <bool isOuter> | ||
VectorPtr generateOrdinalityVector(const RowRange& rowRange); | ||
|
||
const bool withOrdinality_; | ||
|
@@ -142,5 +147,8 @@ class Unnest : public Operator { | |
|
||
// Next 'input_' row to process in getOutput(). | ||
vector_size_t nextInputRow_{0}; | ||
|
||
std::vector<bool> rawOrdinalityIsNull_; | ||
const bool isOuter_; | ||
}; | ||
} // namespace facebook::velox::exec |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -474,6 +474,47 @@ TEST_P(UnnestTest, batchSize) { | |
ASSERT_EQ(expectedNumVectors, stats.at(unnestId).outputVectors); | ||
} | ||
|
||
TEST_P(UnnestTest, basicArrayWithOuter) { | ||
auto vector = makeRowVector({ | ||
makeFlatVector<int64_t>({1, 2, 3}), | ||
makeNullableArrayVector<int64_t>({ | ||
{1, 2, std::nullopt}, | ||
{}, | ||
{3} | ||
}), | ||
}); | ||
|
||
createDuckDbTable({vector}); | ||
|
||
// is_outer = false | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: isOuter |
||
auto op1 = PlanBuilder().values({vector}).unnest({"c0"}, {"c1"}, std::nullopt, false /* is_outer */).planNode(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ditto, perhaps use the style like below.
|
||
auto params1 = makeCursorParameters(op1); | ||
auto expected1 = makeRowVector({ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: can we have 'op', 'params', 'expected' and reuse them in the other test cases? |
||
makeFlatVector<int64_t>({1, 1, 1, 3}), | ||
makeNullableFlatVector<int64_t>({1, 2, std::nullopt, 3}), | ||
}); | ||
assertQuery(params1, expected1); | ||
|
||
// is_outer = true | ||
auto op2 = PlanBuilder().values({vector}).unnest({"c0"}, {"c1"}, std::nullopt, true /* is_outer */).planNode(); | ||
auto params2 = makeCursorParameters(op2); | ||
auto expected2 = makeRowVector({ | ||
makeFlatVector<int64_t>({1, 1, 1, 2, 3}), | ||
makeNullableFlatVector<int64_t>({1, 2, std::nullopt, std::nullopt, 3}), | ||
}); | ||
assertQuery(params2, expected2); | ||
|
||
// ordinal = true && is_outer = true | ||
auto op3 = PlanBuilder().values({vector}).unnest({"c0"}, {"c1"}, "ordinal", true /* is_outer */).planNode(); | ||
auto params3 = makeCursorParameters(op3); | ||
auto expected3 = makeRowVector({ | ||
makeFlatVector<int64_t>({1, 1, 1, 2, 3}), | ||
makeNullableFlatVector<int64_t>({1, 2, std::nullopt, std::nullopt, 3}), | ||
makeNullableFlatVector<int64_t>({1, 2, 3, std::nullopt, 1}), | ||
}); | ||
assertQuery(params3, expected3); | ||
} | ||
|
||
VELOX_INSTANTIATE_TEST_SUITE_P( | ||
UnnestTest, | ||
UnnestTest, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: drop const when passing by value.