diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 3f7c631..dfc6ed7 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - - uses: cachix/install-nix-action@v7 + - uses: cachix/install-nix-action@v10 - run: nix-channel --add ${{ matrix.channel }} nixpkgs - run: nix-channel --update - - run: nix-build \ No newline at end of file + - run: nix-build diff --git a/CMakeLists.txt b/CMakeLists.txt index a77a326..8743850 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,7 +5,7 @@ enable_testing() set (ALPINOCORPUS_VERSION "2.6.0") -set (CMAKE_CXX_STANDARD 11) +set (CMAKE_CXX_STANDARD 17) if (NOT CMAKE_CONFIGURATION_TYPES AND NOT CMAKE_BUILD_TYPE) set (CMAKE_BUILD_TYPE Release) @@ -41,7 +41,7 @@ endif() include(GNUInstallDirs REQUIRED) -find_package(Boost 1.50 COMPONENTS system filesystem REQUIRED) +find_package(Boost 1.50 COMPONENTS system REQUIRED) if(Boost_FOUND) include_directories(${Boost_INCLUDE_DIRS}) diff --git a/src/CompactCorpusReaderPrivate.cpp b/src/CompactCorpusReaderPrivate.cpp index 222229a..1efce03 100644 --- a/src/CompactCorpusReaderPrivate.cpp +++ b/src/CompactCorpusReaderPrivate.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -7,8 +8,6 @@ #include #include -#include - #include #include "DzIstream.hh" @@ -20,7 +19,7 @@ namespace { char const * const INDEX_EXT = ".index"; } -namespace bf = boost::filesystem; +namespace fs = std::filesystem; namespace alpinocorpus { @@ -51,12 +50,12 @@ void CompactCorpusReaderPrivate::construct(std::string const &canonical, std::string const &indexPath) { // XXX race condition up ahead - bf::path dataP(dataPath); - if (!bf::is_regular_file(dataP)) + fs::path dataP(dataPath); + if (!fs::is_regular_file(dataP)) throw OpenError(dataPath, "not a regular file"); - bf::path indexP(indexPath); - if (!bf::is_regular_file(indexP)) + fs::path indexP(indexPath); + if (!fs::is_regular_file(indexP)) throw OpenError(indexPath, "not a regular file"); open(dataPath, indexPath); diff --git a/src/DbCorpusWriter.cpp b/src/DbCorpusWriter.cpp index be13905..1e28089 100644 --- a/src/DbCorpusWriter.cpp +++ b/src/DbCorpusWriter.cpp @@ -1,11 +1,10 @@ #include #include #include +#include #include #include -#include - #include #include @@ -13,7 +12,7 @@ #include #include -namespace bf = boost::filesystem; +namespace fs = std::filesystem; namespace db = DbXml; namespace alpinocorpus { @@ -76,7 +75,7 @@ namespace alpinocorpus { db::XmlContainer ::NodeContainer); } else { - if (bf::exists(path)) + if (fs::exists(path)) d_container = d_mgr.openContainer(path, config); else d_container = d_mgr.createContainer(path, config, @@ -149,7 +148,7 @@ namespace alpinocorpus { db::XmlUpdateContext &ctx) { try { - std::string canonical(bf::path(name).generic_string()); + std::string canonical(fs::path(name).generic_string()); d_container.putDocument(canonical, content, ctx, db::DBXML_WELL_FORMED_ONLY); } catch (db::XmlException const &e) { diff --git a/src/DirectoryCorpusReaderPrivate.cpp b/src/DirectoryCorpusReaderPrivate.cpp index a48bca7..7130c20 100644 --- a/src/DirectoryCorpusReaderPrivate.cpp +++ b/src/DirectoryCorpusReaderPrivate.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -7,8 +8,6 @@ #include #include -#include -#include #include #include @@ -18,17 +17,17 @@ #include "util/NameCompare.hh" #include "util/textfile.hh" -namespace bf = boost::filesystem; +namespace fs = std::filesystem; namespace { class DirIter : public alpinocorpus::IterImpl { - boost::filesystem::recursive_directory_iterator iter; - boost::filesystem::path d_directory; + fs::recursive_directory_iterator iter; + fs::path d_directory; public: - DirIter(boost::filesystem::path const &path, - boost::filesystem::recursive_directory_iterator i); + DirIter(fs::path const &path, + fs::recursive_directory_iterator i); alpinocorpus::IterImpl *copy() const; bool hasNext(); alpinocorpus::Entry next(alpinocorpus::CorpusReader const &rdr); @@ -37,7 +36,7 @@ namespace { }; DirIter::DirIter( - bf::path const &path, bf::recursive_directory_iterator i) : + fs::path const &path, fs::recursive_directory_iterator i) : d_directory(path), iter(i) { } @@ -51,7 +50,7 @@ namespace { bool DirIter::isValid() { // End is a correct iterator state. - if (iter == bf::recursive_directory_iterator()) + if (iter == fs::recursive_directory_iterator()) return true; return iter->path().extension() == ".xml"; @@ -64,7 +63,7 @@ namespace { ++iter; } - return iter != bf::recursive_directory_iterator(); + return iter != fs::recursive_directory_iterator(); } alpinocorpus::Entry DirIter::next(alpinocorpus::CorpusReader const &rdr) @@ -78,7 +77,7 @@ namespace { if (entryPathStr[0] == '/') entryPathStr.erase(0, 1); - bf::path entryPath(entryPathStr); + fs::path entryPath(entryPathStr); // Move the iterator. ++iter; @@ -94,8 +93,8 @@ namespace { std::vector::const_iterator d_iter; public: - SortedDirIter(boost::filesystem::path const &path, - boost::filesystem::recursive_directory_iterator i); + SortedDirIter(fs::path const &path, + fs::recursive_directory_iterator i); alpinocorpus::IterImpl *copy() const; bool hasNext(); alpinocorpus::Entry next(alpinocorpus::CorpusReader const &rdr); @@ -106,9 +105,9 @@ namespace { }; SortedDirIter::SortedDirIter( - bf::path const &path, bf::recursive_directory_iterator i) + fs::path const &path, fs::recursive_directory_iterator i) { - for (; i != bf::recursive_directory_iterator(); i++) + for (; i != fs::recursive_directory_iterator(); i++) { std::string entryPathStr = i->path().string(); entryPathStr.erase(0, path.string().size()); @@ -161,7 +160,7 @@ namespace { { // We assume the iterator is valid, since hasNext() should be called - bf::path entryPath(*d_iter); + fs::path entryPath(*d_iter); // Move the iterator. ++d_iter; @@ -180,12 +179,12 @@ DirectoryCorpusReaderPrivate::DirectoryCorpusReaderPrivate( d_nEntries(std::numeric_limits::max()) { if (directory[directory.size() - 1] == '/') - d_directory = bf::path(directory).parent_path(); + d_directory = fs::path(directory).parent_path(); else - d_directory = bf::path(directory); + d_directory = fs::path(directory); - if (!bf::exists(d_directory) || - !bf::is_directory(d_directory)) + if (!fs::exists(d_directory) || + !fs::is_directory(d_directory)) throw OpenError(directory, "non-existent or not a directory"); } @@ -197,10 +196,12 @@ CorpusReader::EntryIterator DirectoryCorpusReaderPrivate::getEntries(SortOrder s switch (sortOrder) { case NaturalOrder: return EntryIterator(new DirIter(d_directory, - bf::recursive_directory_iterator(d_directory, bf::symlink_option::recurse))); + fs::recursive_directory_iterator(d_directory, + fs::directory_options::follow_directory_symlink))); case NumericalOrder: return EntryIterator(new SortedDirIter(d_directory, - bf::recursive_directory_iterator(d_directory, bf::symlink_option::recurse))); + fs::recursive_directory_iterator(d_directory, + fs::directory_options::follow_directory_symlink))); default: throw NotImplemented("Unexpected sort order."); } @@ -232,12 +233,12 @@ size_t DirectoryCorpusReaderPrivate::getSize() const std::string DirectoryCorpusReaderPrivate::readEntry(std::string const &entry) const { - bf::path p(d_directory); + fs::path p(d_directory); p /= entry; return util::readFile(p.string()); } -bf::path DirectoryCorpusReaderPrivate::cachePath() const +fs::path DirectoryCorpusReaderPrivate::cachePath() const { return d_directory.parent_path() / d_directory.filename().replace_extension(".dir_index"); } diff --git a/src/DirectoryCorpusReaderPrivate.hh b/src/DirectoryCorpusReaderPrivate.hh index eea1986..75ee8ec 100644 --- a/src/DirectoryCorpusReaderPrivate.hh +++ b/src/DirectoryCorpusReaderPrivate.hh @@ -1,11 +1,10 @@ #ifndef ALPINO_DIRECTORYCORPUSREADER_PRIVATE_HH #define ALPINO_DIRECTORYCORPUSREADER_PRIVATE_HH +#include #include #include -#include - #include #include #include @@ -32,9 +31,9 @@ public: virtual size_t getSize() const; private: - boost::filesystem::path cachePath() const; + std::filesystem::path cachePath() const; - boost::filesystem::path d_directory; + std::filesystem::path d_directory; mutable size_t d_nEntries; bool d_entriesRead; }; diff --git a/src/DzOstreamBuf.cpp b/src/DzOstreamBuf.cpp index a0f21dc..c2eb0a4 100644 --- a/src/DzOstreamBuf.cpp +++ b/src/DzOstreamBuf.cpp @@ -2,7 +2,9 @@ #include #include #include +#include #include +#include #include #include #include @@ -11,14 +13,12 @@ #include -#include - #include "DzOstreamBuf.hh" #include "gzip.hh" #include "util/bufutil.hh" -namespace bf = boost::filesystem; +namespace fs = std::filesystem; namespace { @@ -44,13 +44,18 @@ DzOstreamBuf::DzOstreamBuf(char const *filename) : d_size(0), d_crc32(crc32(0L, return; } - // XXX - There is a race condition here, but Boost does not seem to - // provide a variant that returns a file descriptor. We used mkstemp - // previously, but it is not portable. - std::string tmpFilename = - bf::unique_path(std::string(filename) + "-%%%%-%%%%-%%%%-%%%%").string(); - d_tmpFilename = tmpFilename; - d_zDataStream = fopen(tmpFilename.c_str(), "w"); + // XXX - Seems like C++17 does not provide an alternative to mkstemp + // yet. But since we haven't recently built any Windows versions and + // Windows has WSL anyway, this should be fine. + std::string tmpFilename = + std::string(filename) + "-XXXX-XXXX-XXXX-XXXX"; + int fd; + if ((fd = mkstemp(tmpFilename.data())) == -1) + throw std::runtime_error(std::string("DzOstreamBuf::DzOstreamBuf: Could not create temporary file") + + d_tmpFilename); + + d_tmpFilename = tmpFilename; + d_zDataStream = fdopen(fd, "w"); if (d_zDataStream == NULL) throw std::runtime_error(std::string("DzOstreamBuf::DzOstreamBuf: Could not open ") + @@ -114,7 +119,7 @@ DzOstreamBuf::~DzOstreamBuf() fclose(d_dzStream); - bf::remove(d_tmpFilename); + fs::remove(d_tmpFilename); } void DzOstreamBuf::flushBuffer() @@ -202,14 +207,14 @@ void DzOstreamBuf::writeHeader() auto secsSinceEpoch = std::chrono::duration_cast( clock.time_since_epoch()).count(); - if (secsSinceEpoch > std::numeric_limits::max()) + if (secsSinceEpoch > std::numeric_limits::max()) secsSinceEpoch = 0; header[GZ_HEADER_ID1] = gzipId1; header[GZ_HEADER_ID2] = gzipId2; header[GZ_HEADER_CM] = GZ_CM_DEFLATE; header[GZ_HEADER_FLG] = GZ_FLG_EXTRA; - util::writeToBuf(&header[0] + GZ_HEADER_MTIME, secsSinceEpoch); + util::writeToBuf(&header[0] + GZ_HEADER_MTIME, secsSinceEpoch); header[GZ_HEADER_XFL] = GZ_XFL_MAX; header[GZ_HEADER_OS] = GZ_OS_UNIX; @@ -220,8 +225,8 @@ void DzOstreamBuf::writeTrailer() { std::vector trailer(GZ_TRAILER_SIZE); - util::writeToBuf(&trailer[0] + GZ_TRAILER_CRC32, d_crc32); - util::writeToBuf(&trailer[0] + GZ_TRAILER_ISIZE, d_size); + util::writeToBuf(&trailer[0] + GZ_TRAILER_CRC32, d_crc32); + util::writeToBuf(&trailer[0] + GZ_TRAILER_ISIZE, d_size); fwrite(&trailer[0], 1, GZ_TRAILER_SIZE, d_dzStream); } diff --git a/src/MultiCorpusReaderPrivate.cpp b/src/MultiCorpusReaderPrivate.cpp index bb64ece..4baa9a4 100644 --- a/src/MultiCorpusReaderPrivate.cpp +++ b/src/MultiCorpusReaderPrivate.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -5,8 +6,6 @@ #include #include -#include - #include #include #include @@ -21,7 +20,7 @@ #include "MultiCorpusReaderPrivate.hh" -namespace bf = boost::filesystem; +namespace fs = std::filesystem; namespace alpinocorpus { @@ -80,8 +79,8 @@ size_t MultiCorpusReaderPrivate::getSize() const void MultiCorpusReaderPrivate::push_back(std::string const &name, std::string const &filename, bool recursive) { - bf::path corpusPath(filename); - if (!bf::exists(corpusPath)) + fs::path corpusPath(filename); + if (!fs::exists(corpusPath)) throw OpenError(filename); d_corpora.push_back(std::make_pair(filename, recursive)); diff --git a/src/MultiCorpusReaderPrivate.hh b/src/MultiCorpusReaderPrivate.hh index f4945cc..0746edc 100644 --- a/src/MultiCorpusReaderPrivate.hh +++ b/src/MultiCorpusReaderPrivate.hh @@ -1,3 +1,4 @@ +#include #include #include #include @@ -5,8 +6,6 @@ #include #include -#include - #include #include #include @@ -92,7 +91,7 @@ private: std::pair corpusFromPath(std::string const &path) const; std::string entryFromPath(std::string const &path) const; - boost::filesystem::path d_directory; + std::filesystem::path d_directory; std::list > d_corpora; Corpora d_corporaMap; #ifdef USE_DBXML diff --git a/src/RecursiveCorpusReader.cpp b/src/RecursiveCorpusReader.cpp index 837e82d..8dfcfd3 100644 --- a/src/RecursiveCorpusReader.cpp +++ b/src/RecursiveCorpusReader.cpp @@ -1,15 +1,14 @@ +#include #include -#include - #include -#include +#include #include #include #include #include -namespace bf = boost::filesystem; +namespace fs = std::filesystem; namespace alpinocorpus { @@ -30,7 +29,7 @@ class RecursiveCorpusReaderPrivate : public CorpusReader Either validQuery(QueryDialect d, bool variables, std::string const &query) const; private: - bf::path d_directory; + fs::path d_directory; std::shared_ptr d_multiReader; }; @@ -95,23 +94,24 @@ RecursiveCorpusReaderPrivate::RecursiveCorpusReaderPrivate(std::string const &di d_multiReader(new MultiCorpusReader) { if (directory[directory.size() - 1] == '/') - d_directory = bf::path(directory).parent_path(); + d_directory = fs::path(directory).parent_path(); else - d_directory = bf::path(directory); + d_directory = fs::path(directory); - if (!bf::exists(d_directory) || - !bf::is_directory(d_directory)) + if (!fs::exists(d_directory) || + !fs::is_directory(d_directory)) throw OpenError(directory, "non-existent or not a directory"); - for (bf::recursive_directory_iterator iter(d_directory, bf::symlink_option::recurse); - iter != bf::recursive_directory_iterator(); + for (fs::recursive_directory_iterator iter(d_directory, + fs::directory_options::follow_directory_symlink); + iter != fs::recursive_directory_iterator(); ++iter) { if (iter->path().extension() != ".dact" && (dactOnly || iter->path().extension() != ".index")) continue; - bf::path namePath = iter->path(); + fs::path namePath = iter->path(); namePath.replace_extension(""); std::string name = namePath.string(); diff --git a/src/util/NameCompare.cpp b/src/util/NameCompare.cpp index 3d34609..472e31c 100644 --- a/src/util/NameCompare.cpp +++ b/src/util/NameCompare.cpp @@ -1,10 +1,9 @@ #include +#include #include #include #include -#include - #include "InfInt.h" #include "NameCompare.hh" #include "parseString.hh" @@ -76,8 +75,8 @@ namespace alpinocorpus { return false; } - bool PathCompare::operator()(boost::filesystem::path const &p1, - boost::filesystem::path const &p2) const + bool PathCompare::operator()(std::filesystem::path const &p1, + std::filesystem::path const &p2) const { return d_nameCompare(p1.string(), p2.string()); } diff --git a/src/util/NameCompare.hh b/src/util/NameCompare.hh index 007f5d1..7f1e36d 100644 --- a/src/util/NameCompare.hh +++ b/src/util/NameCompare.hh @@ -1,10 +1,9 @@ #ifndef ALPINOCORPUS_NAME_COMPARE #define ALPINOCORPUS_NAME_COMPARE +#include #include -#include - namespace alpinocorpus { @@ -15,8 +14,8 @@ struct NameCompare struct PathCompare { - bool operator()(boost::filesystem::path const &p1, - boost::filesystem::path const &p2) const; + bool operator()(std::filesystem::path const &p1, + std::filesystem::path const &p2) const; private: NameCompare d_nameCompare; }; diff --git a/src/util/base64.hh b/src/util/base64.hh index 1ad7eea..08d0969 100644 --- a/src/util/base64.hh +++ b/src/util/base64.hh @@ -10,8 +10,6 @@ #include #include -#include - namespace { std::string b64_chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; @@ -45,7 +43,8 @@ namespace util { template std::string b64_encode(T val) { - BOOST_STATIC_ASSERT(std::numeric_limits::is_integer); + static_assert(std::numeric_limits::is_integer, + "b64_encode can only encode integral types"); // Find number of 6-bit chunks size_t chunks = 1; @@ -69,7 +68,8 @@ std::string b64_encode(T val) template T b64_decode(std::string const &val) { - BOOST_STATIC_ASSERT(std::numeric_limits::is_integer); + static_assert(std::numeric_limits::is_integer, + "b64_decode can only decode integral types"); T result = 0; size_t offset = 0; diff --git a/src/util/textfile.cpp b/src/util/textfile.cpp index 4ff9e17..60c3db3 100644 --- a/src/util/textfile.cpp +++ b/src/util/textfile.cpp @@ -1,20 +1,19 @@ #include +#include #include #include #include #include -#include - #include "textfile.hh" namespace alpinocorpus { namespace util { std::string readFile(std::string const &filename) { - boost::filesystem::path p(filename); + std::filesystem::path p(filename); - if (!boost::filesystem::is_regular_file(p)) + if (!std::filesystem::is_regular_file(p)) throw std::runtime_error(std::string("readFile: '") + filename + "' is not a regular file"); diff --git a/util/common/EqualsPrevious.hh b/util/common/EqualsPrevious.hh index ca5ac26..e7bda0f 100644 --- a/util/common/EqualsPrevious.hh +++ b/util/common/EqualsPrevious.hh @@ -4,8 +4,6 @@ #include #include -#include - template class EqualsPrevious : public std::unary_function { diff --git a/util/common/util.cpp b/util/common/util.cpp index c5f6a81..2f50c8a 100644 --- a/util/common/util.cpp +++ b/util/common/util.cpp @@ -1,15 +1,15 @@ +#include #include #include #include #include -#include #include #include #include -namespace bf = boost::filesystem; +namespace fs = std::filesystem; using alpinocorpus::CorpusReader; using alpinocorpus::CorpusReaderFactory; @@ -18,7 +18,7 @@ using alpinocorpus::MultiCorpusReader; std::shared_ptr openCorpus(std::string const &path, bool recursive) { - if (recursive && bf::is_directory(bf::path(path))) + if (recursive && fs::is_directory(fs::path(path))) return std::shared_ptr(CorpusReaderFactory::openRecursive(path, false)); else return std::shared_ptr(CorpusReaderFactory::open(path)); @@ -36,12 +36,12 @@ std::shared_ptr openCorpora( { // If we are dealing with a directory, and the path ends with a trailing // slash, we remove the slash. - bf::path p = bf::path(*iter); + fs::path p = fs::path(*iter); - bool isDir = bf::is_directory(p); + bool isDir = fs::is_directory(p); if (isDir && iter->rfind('/') == iter->size() - 1) - p = bf::path(iter->substr(0, iter->size() - 1)); + p = fs::path(iter->substr(0, iter->size() - 1)); // Kill the extension, if there is any. p.replace_extension(""); diff --git a/util/create/main.cpp b/util/create/main.cpp index 521e45f..ad1a6b4 100644 --- a/util/create/main.cpp +++ b/util/create/main.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -6,8 +7,6 @@ #include #include -#include - #include #include #include @@ -44,7 +43,7 @@ using alpinocorpus::SortOrder; using alpinocorpus::DbCorpusWriter; #endif -namespace bf = boost::filesystem; +namespace fs = std::filesystem; void usage(std::string const &programName) { @@ -167,7 +166,7 @@ int main(int argc, char *argv[]) for (std::vector::const_iterator iter = opts->arguments().begin(); iter != opts->arguments().end(); ++iter) - if (bf::equivalent(treebankOut, *iter)) + if (fs::equivalent(treebankOut, *iter)) throw std::runtime_error("Attempting to write to the source treebank."); #if defined(USE_DBXML) @@ -194,7 +193,7 @@ int main(int argc, char *argv[]) for (std::vector::const_iterator iter = opts->arguments().begin(); iter != opts->arguments().end(); ++iter) - if (bf::equivalent(outIndex, *iter) || bf::equivalent(outDataDz, *iter)) + if (fs::equivalent(outIndex, *iter) || fs::equivalent(outDataDz, *iter)) throw std::runtime_error("Attempting to write to the source treebank."); std::shared_ptr wr(new CompactCorpusWriter(treebankOut)); diff --git a/util/extract/main.cpp b/util/extract/main.cpp index f5bd502..f4b8c2c 100644 --- a/util/extract/main.cpp +++ b/util/extract/main.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -7,14 +8,11 @@ #include #include -#include - #include #include #include #include -#include using alpinocorpus::CorpusReader; using alpinocorpus::Either; @@ -47,17 +45,17 @@ int main(int argc, char *argv[]) { return 1; } - boost::filesystem::path targetPath = opts->arguments().at(1); + std::filesystem::path targetPath = opts->arguments().at(1); CorpusReader::EntryIterator iter = reader->entries(); while (iter.hasNext()) { alpinocorpus::Entry entry = iter.next(*reader); - boost::filesystem::path entryPath(entry.name); - boost::filesystem::path filePath = targetPath / entryPath; + std::filesystem::path entryPath(entry.name); + std::filesystem::path filePath = targetPath / entryPath; - if (!boost::filesystem::exists(filePath.parent_path())) { - boost::filesystem::create_directories(filePath.parent_path()); + if (!std::filesystem::exists(filePath.parent_path())) { + std::filesystem::create_directories(filePath.parent_path()); } std::string content = reader->read(entry.name, diff --git a/util/get/main.cpp b/util/get/main.cpp index 8ff22b1..a52c64a 100644 --- a/util/get/main.cpp +++ b/util/get/main.cpp @@ -12,7 +12,6 @@ #include #include -#include using alpinocorpus::CorpusReader; using alpinocorpus::Either; diff --git a/util/xpath/main.cpp b/util/xpath/main.cpp index b885f79..daafbbe 100644 --- a/util/xpath/main.cpp +++ b/util/xpath/main.cpp @@ -7,8 +7,6 @@ #include #include -#include - #include #include #include @@ -30,8 +28,6 @@ using alpinocorpus::Either; using alpinocorpus::Entry; using alpinocorpus::LexItem; -namespace bf = boost::filesystem; - template std::set unique_to_first(std::set const &a, std::set const &b) {