From 0761ec1c4935adbda87d85cd880e06ac78cd6eb4 Mon Sep 17 00:00:00 2001 From: Yizhuo Liang <70337586+yizhuoliang@users.noreply.github.com> Date: Wed, 11 Dec 2024 19:18:23 -0800 Subject: [PATCH] fix: Make ScanAndSort.cpp example up-to-date (#11803) Summary: The ScanAndSort.cpp simple example has been broken, its APIs used are outdated. Mainly, `Task::next()` only work with `kSerial` mode now. Also register writer factory and fixed the filesystem access issues. Pull Request resolved: https://github.com/facebookincubator/velox/pull/11803 Reviewed By: Yuhta Differential Revision: D67060616 Pulled By: bikramSingh91 fbshipit-source-id: a0a542895021a36f086a1325eab634ffd1ef6ee1 --- velox/examples/ScanAndSort.cpp | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/velox/examples/ScanAndSort.cpp b/velox/examples/ScanAndSort.cpp index 9dc9ffc044d0..e7bb544d534b 100644 --- a/velox/examples/ScanAndSort.cpp +++ b/velox/examples/ScanAndSort.cpp @@ -19,7 +19,9 @@ #include "velox/common/memory/Memory.h" #include "velox/connectors/hive/HiveConnector.h" #include "velox/connectors/hive/HiveConnectorSplit.h" +#include "velox/dwio/common/FileSink.h" #include "velox/dwio/dwrf/RegisterDwrfReader.h" +#include "velox/dwio/dwrf/RegisterDwrfWriter.h" #include "velox/exec/Task.h" #include "velox/exec/tests/utils/HiveConnectorTestBase.h" #include "velox/exec/tests/utils/PlanBuilder.h" @@ -31,7 +33,6 @@ #include using namespace facebook::velox; -using exec::test::HiveConnectorTestBase; // This file contains a step-by-step minimal example of a workflow that: // @@ -102,12 +103,15 @@ int main(int argc, char** argv) { // filesystem. We also need to register the dwrf reader factory as well as a // write protocol, in this case commit is not required: filesystems::registerLocalFileSystem(); + dwio::common::registerFileSinks(); dwrf::registerDwrfReaderFactory(); + dwrf::registerDwrfWriterFactory(); // Create a temporary dir to store the local file created. Note that this // directory is automatically removed when the `tempDir` object runs out of // scope. auto tempDir = exec::test::TempDirectoryPath::create(); + auto absTempDirPath = tempDir->getPath(); // Once we finalize setting up the Hive connector, let's define our query // plan. We use the helper `PlanBuilder` class to generate the query plan @@ -122,7 +126,7 @@ int main(int argc, char** argv) { auto writerPlanFragment = exec::test::PlanBuilder() .values({rowVector}) - .tableWrite("targetDirectory", dwio::common::FileFormat::DWRF) + .tableWrite(absTempDirPath, dwio::common::FileFormat::DWRF) .planFragment(); std::shared_ptr executor( @@ -137,7 +141,7 @@ int main(int argc, char** argv) { writerPlanFragment, /*destination=*/0, core::QueryCtx::create(executor.get()), - exec::Task::ExecutionMode::kParallel); + exec::Task::ExecutionMode::kSerial); // next() starts execution using the client thread. The loop pumps output // vectors out of the task (there are none in this query fragment). @@ -167,7 +171,7 @@ int main(int argc, char** argv) { readPlanFragment, /*destination=*/0, core::QueryCtx::create(executor.get()), - exec::Task::ExecutionMode::kParallel); + exec::Task::ExecutionMode::kSerial); // Now that we have the query fragment and Task structure set up, we will // add data to it via `splits`. @@ -176,7 +180,7 @@ int main(int argc, char** argv) { // HiveConnectorSplit for each file, using the same HiveConnector id defined // above, the local file path (the "file:" prefix specifies which FileSystem // to use; local, in this case), and the file format (DWRF/ORC). - for (auto& filePath : fs::directory_iterator(tempDir->getPath())) { + for (auto& filePath : fs::directory_iterator(absTempDirPath)) { auto connectorSplit = std::make_shared( kHiveConnectorId, "file:" + filePath.path().string(),