From 797ba434a9e541f4c9737550de748ea8d96558a5 Mon Sep 17 00:00:00 2001 From: felipe Date: Mon, 9 Nov 2020 08:54:34 -0600 Subject: [PATCH] adding file system list apis from pyhthon --- engine/bsql_engine/io/cio.pxd | 2 +- engine/bsql_engine/io/io.pyx | 10 ++++++++++ engine/include/io/io.h | 2 ++ engine/src/cython/io.cpp | 9 +++++++++ io/src/FileSystem/private/LocalFileSystem_p.cpp | 4 ++-- 5 files changed, 24 insertions(+), 3 deletions(-) diff --git a/engine/bsql_engine/io/cio.pxd b/engine/bsql_engine/io/cio.pxd index baeb56e02..6f9a7e29c 100644 --- a/engine/bsql_engine/io/cio.pxd +++ b/engine/bsql_engine/io/cio.pxd @@ -116,7 +116,7 @@ cdef extern from "../include/io/io.h" nogil: bool useDefaultAdcJsonFile string adcJsonFile - + vector[string] list_files(string path) except + pair[bool, string] registerFileSystemHDFS(HDFS hdfs, string root, string authority) except +raiseRegisterFileSystemHDFSError pair[bool, string] registerFileSystemGCS( GCS gcs, string root, string authority) except +raiseRegisterFileSystemGCSError pair[bool, string] registerFileSystemS3( S3 s3, string root, string authority) except +raiseRegisterFileSystemS3Error diff --git a/engine/bsql_engine/io/io.pyx b/engine/bsql_engine/io/io.pyx index 4fb34fa73..3189511e5 100644 --- a/engine/bsql_engine/io/io.pyx +++ b/engine/bsql_engine/io/io.pyx @@ -284,6 +284,16 @@ cdef class PyBlazingCache: df._rename_columns(decoded_names) return df, metadata_py + + +cpdef list_files_caller(path): + cdef vector[string] files = cio.list_files(path.encode()) + decoded_names = [] + for i in range(files.size()): + decoded_names.append(files[i].decode('utf-8')) + return decoded_names + + cpdef initializeCaller(int ralId, string worker_id, int gpuId, string network_iface_name, int ralCommunicationPort, vector[NodeMetaDataUCP] workers_ucp_info, bool singleNode, map[string,string] config_options, string allocation_mode, size_t initial_pool_size, size_t maximum_pool_size, bool enable_logging): init_output = initializePython( ralId, worker_id, gpuId, network_iface_name, ralCommunicationPort, workers_ucp_info, singleNode, config_options, diff --git a/engine/include/io/io.h b/engine/include/io/io.h index dde087a06..54c2df3eb 100644 --- a/engine/include/io/io.h +++ b/engine/include/io/io.h @@ -115,4 +115,6 @@ std::pair, error_code_t> registerFileSystemGCS_C(GC std::pair, error_code_t> registerFileSystemS3_C(S3 s3, std::string root, std::string authority); std::pair, error_code_t> registerFileSystemLocal_C(std::string root, std::string authority); +std::vector list_files(std::string path); + } // extern "C" \ No newline at end of file diff --git a/engine/src/cython/io.cpp b/engine/src/cython/io.cpp index b3310a5f1..713b320a7 100644 --- a/engine/src/cython/io.cpp +++ b/engine/src/cython/io.cpp @@ -294,3 +294,12 @@ std::pair, error_code_t> registerFileSystemLocal_C( return std::make_pair(result, E_EXCEPTION); } } + +std::vector list_files(std::string path){ + auto uri = Uri(path); + auto files = BlazingContext::getInstance()->getFileSystemManager()->listResourceNames(uri); + for(auto file : files){ + std::cout< LocalFileSystem::Private::listResourceNames( if(skip) { continue; } - - const bool pass = WildcardFilter::match(name, finalWildcard); + const Path listedPath = uriWithRoot.getPath() + name; + const bool pass = WildcardFilter::match(listedPath.toString(), finalWildcard); if(pass) { response.push_back(name);