Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add reflink FileCopyMethod (copy-on-write) #166

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
88 changes: 88 additions & 0 deletions .github/workflows/filecopymethod-test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
# Some platform-specific file copy syscalls (e.g. creating reflinks) are only
# supported on some platforms, and only with specific filesystems. These
# syscalls are used by different FileCopyMethod implementations.
#
# This workflow sets up the conditions needed for those syscalls to work,
# and then runs the tests with the different FileCopyMethods.

name: FileCopyMethod

on:
push:
branches: [ main, develop ]
pull_request:
branches: [ main, develop ]
workflow_dispatch:

jobs:
test:
name: Test
runs-on: ${{ matrix.environment.runner }}
strategy:
matrix:
environment:
- runner: macos-latest
filesystem: APFS
copymethod: ReflinkCopy
- runner: ubuntu-latest
filesystem: btrfs
copymethod: ReflinkCopy
steps:
- name: Set up Go
uses: actions/setup-go@v4
with:
go-version: 'stable'
id: go

- name: Check out code into the Go module directory
uses: actions/checkout@v3

- name: Get dependencies
run: go get -v -t -d ./...

- name: Build
run: go build -v .

- # Sets TEST_PATH environment variable.
# TEST_PATH will later be updated to the mountpoint of the filesystem mount point.
name: Set up testing environment
run: |-
mkdir ./test/filesystems
echo "TEST_PATH=." >> $GITHUB_ENV

- name: Set up filesystem (MacOS)
if: ${{ matrix.environment.filesystem != '' && startsWith(matrix.environment.runner, 'macos-') }}
run: |-
IMAGE_PATH="./test/filesystems/${{matrix.environment.filesystem}}.dmg"
MOUNT_PATH="./test/filesystems/${{matrix.environment.filesystem}}.mount"
echo "TEST_PATH=${MOUNT_PATH}" >> $GITHUB_ENV

hdiutil create -size 500m -fs APFS "$IMAGE_PATH"
hdiutil attach -mountpoint "$MOUNT_PATH" "$IMAGE_PATH"

- name: Set up filesystem (Linux)
if: ${{ matrix.environment.filesystem != '' && startsWith(matrix.environment.runner, 'ubuntu-') }}
run: |-
IMAGE_PATH="./test/filesystems/${{matrix.environment.filesystem}}.img"
MOUNT_PATH="./test/filesystems/${{matrix.environment.filesystem}}.mount"
echo "TEST_PATH=${MOUNT_PATH}" >> $GITHUB_ENV

truncate -s 500m "$IMAGE_PATH"
mkfs -t "${{matrix.environment.filesystem}}" "$IMAGE_PATH"
mkdir "$MOUNT_PATH"
whoami
id -u
sudo mount -o loop "$IMAGE_PATH" "$MOUNT_PATH"
sudo chown -R "$(id -u):$(id -g)" "$MOUNT_PATH"

- name: Copy files to mounted filesystem
if: ${{ matrix.environment.filesystem != '' }}
run: |-
rsync -av --exclude=".*" --exclude "test/filesystems" . "$TEST_PATH"

- name: Test
working-directory: ${{ env.TEST_PATH }}
env:
TEST_FILESYSTEM: ${{ matrix.environment.filesystem }}
TEST_FILECOPYMETHOD: ${{ matrix.os.copymethod }}
run: go test -v
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
test/data.copy
test/filesystems
test/owned-by-root
coverage.txt
vendor
Expand Down
36 changes: 32 additions & 4 deletions all_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,27 @@ import (
//go:embed test/data/case18/assets
var assets embed.FS

var supportsWrapReaderOption = true
var supportsFSOption = true

func setupFileCopyMethod(m *testing.M) {
// Allow running all the tests with a different FileCopyMethod.
// We want to be able to have full coverage no matter the method.
switch os.Getenv("TEST_FILECOPYMETHOD") {
case "CopyBytes":
defaultCopyMethod = CopyBytes
supportsWrapReaderOption = true
supportsFSOption = true
case "ReflinkCopy":
defaultCopyMethod = ReflinkCopy
supportsWrapReaderOption = false
supportsFSOption = false
}
}

func TestMain(m *testing.M) {
setup(m)
setupFileCopyMethod(m)
code := m.Run()
teardown(m)
os.Exit(code)
Expand Down Expand Up @@ -351,7 +370,6 @@ func TestOptions_PreserveOwner(t *testing.T) {
}

func TestOptions_CopyRateLimit(t *testing.T) {

file, err := os.Create("test/data/case16/large.file")
if err != nil {
t.Errorf("failed to create test file: %v", err)
Expand All @@ -372,8 +390,13 @@ func TestOptions_CopyRateLimit(t *testing.T) {
start := time.Now()
err = Copy("test/data/case16", "test/data.copy/case16", opt)
elapsed := time.Since(start)
Expect(t, err).ToBe(nil)
Expect(t, elapsed > 5*time.Second).ToBe(true)
if supportsWrapReaderOption {
Expect(t, err).ToBe(nil)
Expect(t, elapsed > 5*time.Second).ToBe(true)
} else {
Expect(t, err).Not().ToBe(nil)
Expect(t, errors.Is(err, ErrUnsupportedCopyMethod)).ToBe(true)
}
}

func TestOptions_OnFileError(t *testing.T) {
Expand Down Expand Up @@ -422,7 +445,12 @@ func TestOptions_FS(t *testing.T) {
FS: assets,
PermissionControl: AddPermission(200), // FIXME
})
Expect(t, err).ToBe(nil)
if supportsWrapReaderOption {
Expect(t, err).ToBe(nil)
} else {
Expect(t, err).Not().ToBe(nil)
Expect(t, errors.Is(err, ErrUnsupportedCopyMethod)).ToBe(true)
}
}

type SleepyReader struct {
Expand Down
87 changes: 87 additions & 0 deletions copy_methods_darwin.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
//go:build darwin

package copy

import (
"errors"
"fmt"
"os"
"time"

"golang.org/x/sys/unix"
)

// ReflinkCopy tries to copy the file by creating a reflink from the source
// file to the destination file. This asks the filesystem to share the
// contents between the files using a copy-on-write method.
//
// Reflinks are the fastest way to copy large files, but have a few limitations:
//
// - Requires using a supported filesystem (btrfs, xfs, apfs)
// - Source and destination must be on the same filesystem.
//
// See: https://btrfs.readthedocs.io/en/latest/Reflink.html
//
// -------------------- PLATFORM SPECIFIC INFORMATION --------------------
//
// Darwin implementation uses the `clonefile` syscall:
// https://www.manpagez.com/man/2/clonefile/
//
// Support:
// - MacOS 10.14 or newer
// - APFS filesystem
//
// Considerations:
// - Ownership is not preserved.
// - Setuid and Setgid are not preserved.
// - Times are copied by default.
// - Flag CLONE_NOFOLLOW is not used, we use lcopy instead of fcopy for
// symbolic links.
var ReflinkCopy = FileCopyMethod{
fcopy: func(src, dest string, info os.FileInfo, opt Options) (err error, skipFile bool) {
if opt.FS != nil {
return fmt.Errorf("%w: cannot create reflink from Go's fs.FS interface", ErrUnsupportedCopyMethod), false
}

if opt.WrapReader != nil {
return fmt.Errorf("%w: cannot create reflink when WrapReader option is used", ErrUnsupportedCopyMethod), false
}

// Do copy.
const clonefileFlags = 0
err = unix.Clonefile(src, dest, clonefileFlags)

// If the error is the file already exists, delete it and try again.
if errors.Is(err, os.ErrExist) {
if err = os.Remove(dest); err != nil {
return err, false
}

err = unix.Clonefile(src, dest, clonefileFlags) // retry
}

// Return error if clone is not possible.
if err != nil {
if os.IsNotExist(err) {
return nil, true // but not if source file doesn't exist
}

return &os.PathError{
Op: "create reflink",
Path: src,
Err: err,
}, false
}

// Copy-on-write preserves the modtime by default.
// If PreserveTimes is not true, update the time to now.
if !opt.PreserveTimes {
now := time.Now()
if err := os.Chtimes(dest, now, now); err != nil {
return err, false
}
}

return nil, false
},
}
79 changes: 79 additions & 0 deletions copy_methods_linux.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
//go:build linux

package copy

import (
"fmt"
"os"

"golang.org/x/sys/unix"
)

// ReflinkCopy tries to copy the file by creating a reflink from the source
// file to the destination file. This asks the filesystem to share the
// contents between the files using a copy-on-write method.
//
// Reflinks are the fastest way to copy large files, but have a few limitations:
//
// - Requires using a supported filesystem (btrfs, xfs, apfs)
// - Source and destination must be on the same filesystem.
//
// See: https://btrfs.readthedocs.io/en/latest/Reflink.html
//
// -------------------- PLATFORM SPECIFIC INFORMATION --------------------
//
// Linux implementation uses the `ficlone` ioctl:
// https://manpages.debian.org/testing/manpages-dev/ioctl_ficlone.2.en.html
//
// Support:
// - BTRFS or XFS filesystem
//
// Considerations:
// - Ownership is not preserved.
// - Setuid and Setgid are not preserved.
// - Times are not preserved.
var ReflinkCopy = FileCopyMethod{
fcopy: func(src, dest string, info os.FileInfo, opt Options) (err error, skipFile bool) {
if opt.FS != nil {
return fmt.Errorf("%w: cannot create reflink from Go's fs.FS interface", ErrUnsupportedCopyMethod), false
}

if opt.WrapReader != nil {
return fmt.Errorf("%w: cannot create reflink when WrapReader option is used", ErrUnsupportedCopyMethod), false
}

// Open source file.
readcloser, err := os.OpenFile(src, os.O_RDONLY, 0)
if err != nil {
if os.IsNotExist(err) {
return nil, true
}
return
}
defer fclose(readcloser, &err)

// Open dest file.
f, err := os.Create(dest)
if err != nil {
return
}
defer fclose(f, &err)

// Do copy.
srcFd := readcloser.Fd()
destFd := f.Fd()
err = unix.IoctlFileClone(int(destFd), int(srcFd))

// Return an error if cloning is not possible.
if err != nil {
_ = os.Remove(dest) // remove the empty file on error
return &os.PathError{
Op: "create reflink",
Path: src,
Err: err,
}, false
}

return nil, false
},
}
24 changes: 24 additions & 0 deletions copy_methods_x.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
//go:build !darwin && !linux

package copy

import (
"os"
)

// ReflinkCopy tries to copy the file by creating a reflink from the source
// file to the destination file. This asks the filesystem to share the
// contents between the files using a copy-on-write method.
//
// Reflinks are the fastest way to copy large files, but have a few limitations:
//
// - Requires using a supported filesystem (btrfs, xfs, apfs)
// - Source and destination must be on the same filesystem.
//
// See: https://btrfs.readthedocs.io/en/latest/Reflink.html
var ReflinkCopy = FileCopyMethod{
fcopy: func(src, dest string, info os.FileInfo, opt Options) (err error, skipFile bool) {
// Not supported os.
return ErrUnsupportedCopyMethod, false
},
}
7 changes: 6 additions & 1 deletion options.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ type Options struct {
//
// Available implementations:
// - CopyBytes (best compatibility)
// - ReflinkCopy (best performance)
//
// Some implementations may not be supported on the target GOOS, or on
// the user's filesystem. When these fail, an error will be returned.
Expand Down Expand Up @@ -134,6 +135,10 @@ type FileCopyMethod struct {
fcopy func(src, dest string, info os.FileInfo, opt Options) (err error, skipFile bool)
}

// The default FileCopyMethod.
// This only is changed during tests.
var defaultCopyMethod = CopyBytes

// getDefaultOptions provides default options,
// which would be modified by usage-side.
func getDefaultOptions(src, dest string) Options {
Expand All @@ -149,7 +154,7 @@ func getDefaultOptions(src, dest string) Options {
Sync: false, // Do not sync
Specials: false, // Do not copy special files
PreserveTimes: false, // Do not preserve the modification time
FileCopyMethod: CopyBytes, // Copy by bytes
FileCopyMethod: defaultCopyMethod, // Copy by bytes, unless testing this package
CopyBufferSize: 0, // Do not specify, use default bufsize (32*1024)
WrapReader: nil, // Do not wrap src files, use them as they are.
intent: intent{src, dest, nil, nil},
Expand Down
Loading