Skip to content

Commit

Permalink
feat: Add TextWriter
Browse files Browse the repository at this point in the history
  • Loading branch information
kewang1024 committed Dec 31, 2024
1 parent 20eb8ec commit c17a2ce
Show file tree
Hide file tree
Showing 17 changed files with 944 additions and 1 deletion.
1 change: 1 addition & 0 deletions velox/connectors/hive/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ velox_link_libraries(
velox_dwio_orc_reader
velox_dwio_parquet_reader
velox_dwio_parquet_writer
velox_dwio_text_writer_register
velox_file
velox_hive_partition_function
velox_type_tz
Expand Down
5 changes: 4 additions & 1 deletion velox/connectors/hive/HiveConnectorUtil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1049,8 +1049,11 @@ void updateWriterOptionsFromHiveConfig(
case dwio::common::FileFormat::NIMBLE:
// No-op for now.
break;
case dwio::common::FileFormat::TEXT:
// No-op for now.
break;
default:
VELOX_UNSUPPORTED("{}", fileFormat);
VELOX_UNSUPPORTED("Unsupported file format: {}", fileFormat);
}
}

Expand Down
1 change: 1 addition & 0 deletions velox/dwio/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -35,3 +35,4 @@ add_subdirectory(catalog)
add_subdirectory(dwrf)
add_subdirectory(orc)
add_subdirectory(parquet)
add_subdirectory(text)
23 changes: 23 additions & 0 deletions velox/dwio/text/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Copyright (c) Facebook, Inc. and its affiliates.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

if(${VELOX_BUILD_TESTING})
add_subdirectory(tests)
endif()

add_subdirectory(writer)

velox_add_library(velox_dwio_text_writer_register RegisterTextWriter.cpp)

velox_link_libraries(velox_dwio_text_writer_register velox_dwio_text_writer)
29 changes: 29 additions & 0 deletions velox/dwio/text/RegisterTextWriter.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "velox/dwio/text/writer/TextWriter.h"

namespace facebook::velox::text {

void registerTextWriterFactory() {
dwio::common::registerWriterFactory(std::make_shared<TextWriterFactory>());
}

void unregisterTextWriterFactory() {
dwio::common::unregisterWriterFactory(dwio::common::FileFormat::TEXT);
}

} // namespace facebook::velox::text
25 changes: 25 additions & 0 deletions velox/dwio/text/RegisterTextWriter.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

namespace facebook::velox::text {

void registerTextWriterFactory();

void unregisterTextWriterFactory();

} // namespace facebook::velox::text
26 changes: 26 additions & 0 deletions velox/dwio/text/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# Copyright (c) Facebook, Inc. and its affiliates.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

set(TEST_LINK_LIBS
velox_dwio_common_test_utils
velox_vector_test_lib
velox_exec_test_lib
velox_temp_path
GTest::gtest
GTest::gtest_main
GTest::gmock
gflags::gflags
glog::glog)

add_subdirectory(writer)
75 changes: 75 additions & 0 deletions velox/dwio/text/tests/writer/BufferedWriterSinkTest.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <gtest/gtest.h>
#include "velox/common/base/Fs.h"
#include "velox/common/file/FileSystems.h"
#include "velox/dwio/text/tests/writer/FileReaderUtil.h"
#include "velox/dwio/text/writer/TextWriter.h"
#include "velox/exec/tests/utils/TempDirectoryPath.h"
#include "velox/vector/tests/utils/VectorTestBase.h"

namespace facebook::velox::text {

class BufferedWriterSinkTest : public testing::Test,
public velox::test::VectorTestBase {
public:
void SetUp() override {
velox::filesystems::registerLocalFileSystem();
dwio::common::LocalFileSink::registerFactory();
rootPool_ = memory::memoryManager()->addRootPool("BufferedWriterSinkTest");
leafPool_ = rootPool_->addLeafChild("BufferedWriterSinkTest");
tempPath_ = exec::test::TempDirectoryPath::create();
}

protected:
static void SetUpTestCase() {
memory::MemoryManager::testingSetInstance({});
}

std::shared_ptr<memory::MemoryPool> rootPool_;
std::shared_ptr<memory::MemoryPool> leafPool_;
std::shared_ptr<exec::test::TempDirectoryPath> tempPath_;
};

TEST_F(BufferedWriterSinkTest, write) {
auto filePath = fs::path(
fmt::format("{}/test_buffered_writer.txt", tempPath_->getPath()));
auto sink = std::make_unique<dwio::common::LocalFileSink>(
filePath, dwio::common::FileSink::Options{.pool = leafPool_.get()});
auto bufferedWriterSink = std::make_unique<BufferedWriterSink>(
std::move(sink), rootPool_->addLeafChild("bufferedWriterSinkTest"), 15);
bufferedWriterSink->write("hello world", 10);
bufferedWriterSink->write("this is writer", 10);
bufferedWriterSink->close();
std::string result = readFile(filePath);
EXPECT_EQ(result.size(), 20);
}

TEST_F(BufferedWriterSinkTest, abort) {
auto filePath =
fs::path(fmt::format("{}/test_buffered_abort.txt", tempPath_->getPath()));
auto sink = std::make_unique<dwio::common::LocalFileSink>(
filePath, dwio::common::FileSink::Options{.pool = leafPool_.get()});
auto bufferedWriterSink = std::make_unique<BufferedWriterSink>(
std::move(sink), rootPool_->addLeafChild("bufferedWriterSinkTest"), 15);
bufferedWriterSink->write("hello world", 10);
bufferedWriterSink->write("this is writer", 10);
bufferedWriterSink->abort();
std::string result = readFile(filePath);
EXPECT_EQ(result.size(), 10);
}
} // namespace facebook::velox::text
32 changes: 32 additions & 0 deletions velox/dwio/text/tests/writer/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# Copyright (c) Facebook, Inc. and its affiliates.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

add_executable(velox_text_writer_test
TextWriterTest.cpp BufferedWriterSinkTest.cpp FileReaderUtil.cpp)

add_test(
NAME velox_text_writer_test
COMMAND velox_text_writer_test
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})

target_link_libraries(
velox_text_writer_test
velox_dwio_text_writer
velox_dwio_common_test_utils
velox_link_libs
Boost::regex
Folly::folly
${TEST_LINK_LIBS}
GTest::gtest
fmt::fmt)
58 changes: 58 additions & 0 deletions velox/dwio/text/tests/writer/FileReaderUtil.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "velox/dwio/text/tests/writer/FileReaderUtil.h"

namespace facebook::velox::text {

std::string readFile(const std::string& name) {
std::ifstream file(name);
std::string line;

std::stringstream ss;
while (std::getline(file, line)) {
ss << line;
}
return ss.str();
}

std::vector<std::vector<std::string>> parseTextFile(const std::string& name) {
std::ifstream file(name);
std::string line;
std::vector<std::vector<std::string>> table;

while (std::getline(file, line)) {
std::vector<std::string> row = splitTextLine(line, TextFileTraits::kSOH);
table.push_back(row);
}
return table;
}

std::vector<std::string> splitTextLine(const std::string& str, char delimiter) {
std::vector<std::string> result;
std::size_t start = 0;
std::size_t end = str.find(delimiter);

while (end != std::string::npos) {
result.push_back(str.substr(start, end - start));
start = end + 1;
end = str.find(delimiter, start);
}

result.push_back(str.substr(start)); // Add the last part
return result;
}
} // namespace facebook::velox::text
25 changes: 25 additions & 0 deletions velox/dwio/text/tests/writer/FileReaderUtil.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "velox/common/base/Fs.h"
#include "velox/common/file/FileSystems.h"
#include "velox/dwio/text/writer/TextWriter.h"

namespace facebook::velox::text {
std::string readFile(const std::string& name);
std::vector<std::vector<std::string>> parseTextFile(const std::string& name);
std::vector<std::string> splitTextLine(const std::string& str, char delimiter);
} // namespace facebook::velox::text
Loading

0 comments on commit c17a2ce

Please sign in to comment.