Skip to content

Commit

Permalink
Skip big files in create_wd_tree()
Browse files Browse the repository at this point in the history
  • Loading branch information
Byron committed Jan 5, 2025
1 parent d28d7e6 commit 95e62c6
Show file tree
Hide file tree
Showing 9 changed files with 209 additions and 123 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ use gitbutler_error::error::Marker;
use gitbutler_oplog::SnapshotExt;
use gitbutler_oxidize::GixRepositoryExt;
use gitbutler_project::access::WorktreeWritePermission;
use gitbutler_project::AUTO_TRACK_LIMIT_BYTES;
use gitbutler_reference::{Refname, RemoteRefname};
use gitbutler_repo::logging::{LogUntil, RepositoryExt as _};
use gitbutler_repo::{
Expand Down Expand Up @@ -305,7 +306,7 @@ impl BranchManager<'_> {

// We don't support having two branches applied that conflict with each other
{
let uncommited_changes_tree_id = repo.create_wd_tree()?.id();
let uncommited_changes_tree_id = repo.create_wd_tree(AUTO_TRACK_LIMIT_BYTES)?.id();
let gix_repo = self.ctx.gix_repository_for_merging_non_persisting()?;
let merges_cleanly = gix_repo
.merges_cleanly_compat(
Expand Down
3 changes: 2 additions & 1 deletion crates/gitbutler-branch-actions/src/virtual.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ use gitbutler_oxidize::{
git2_signature_to_gix_signature, git2_to_gix_object_id, gix_to_git2_oid, GixRepositoryExt,
};
use gitbutler_project::access::WorktreeWritePermission;
use gitbutler_project::AUTO_TRACK_LIMIT_BYTES;
use gitbutler_reference::{normalize_branch_name, Refname, RemoteRefname};
use gitbutler_repo::{
logging::{LogUntil, RepositoryExt as _},
Expand Down Expand Up @@ -1089,7 +1090,7 @@ pub fn is_remote_branch_mergeable(

let base_tree = find_base_tree(ctx.repo(), &branch_commit, &target_commit)?;

let wd_tree = ctx.repo().create_wd_tree()?;
let wd_tree = ctx.repo().create_wd_tree(AUTO_TRACK_LIMIT_BYTES)?;

let branch_tree = branch_commit.tree().context("failed to find branch tree")?;
let gix_repo_in_memory = ctx.gix_repository_for_merging()?.with_object_memory();
Expand Down
3 changes: 2 additions & 1 deletion crates/gitbutler-edit-mode/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ use gitbutler_operating_modes::{
};
use gitbutler_oxidize::{git2_to_gix_object_id, gix_to_git2_index, GixRepositoryExt};
use gitbutler_project::access::{WorktreeReadPermission, WorktreeWritePermission};
use gitbutler_project::AUTO_TRACK_LIMIT_BYTES;
use gitbutler_reference::{ReferenceName, Refname};
use gitbutler_repo::{rebase::cherry_rebase, RepositoryExt};
use gitbutler_repo::{signature, SignaturePurpose};
Expand Down Expand Up @@ -234,7 +235,7 @@ pub(crate) fn save_and_return_to_workspace(
let parents = commit.parents().collect::<Vec<_>>();

// Recommit commit
let tree = repository.create_wd_tree()?;
let tree = repository.create_wd_tree(AUTO_TRACK_LIMIT_BYTES)?;

let (_, committer) = repository.signatures()?;
let commit_headers = commit
Expand Down
10 changes: 4 additions & 6 deletions crates/gitbutler-oplog/src/oplog.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ use gitbutler_oxidize::{
};
use gitbutler_project::{
access::{WorktreeReadPermission, WorktreeWritePermission},
Project,
Project, AUTO_TRACK_LIMIT_BYTES,
};
use gitbutler_repo::RepositoryExt;
use gitbutler_repo::SignaturePurpose;
Expand All @@ -30,8 +30,6 @@ use gix::object::tree::diff::Change;
use gix::prelude::ObjectIdExt;
use tracing::instrument;

const SNAPSHOT_FILE_LIMIT_BYTES: u64 = 32 * 1024 * 1024;

/// The Oplog allows for crating snapshots of the current state of the project as well as restoring to a previous snapshot.
/// Snapshots include the state of the working directory as well as all additional GitButler state (e.g. virtual branches, conflict state).
/// The data is stored as git trees in the following shape:
Expand Down Expand Up @@ -312,7 +310,7 @@ impl OplogExt for Project {
let old_wd_tree_id = tree_from_applied_vbranches(&gix_repo, commit.parent(0)?.id())?;
let old_wd_tree = repo.find_tree(old_wd_tree_id)?;

repo.ignore_large_files_in_diffs(SNAPSHOT_FILE_LIMIT_BYTES)?;
repo.ignore_large_files_in_diffs(AUTO_TRACK_LIMIT_BYTES)?;

let mut diff_opts = git2::DiffOptions::new();
diff_opts
Expand Down Expand Up @@ -602,7 +600,7 @@ fn restore_snapshot(
let workdir_tree_id = tree_from_applied_vbranches(&gix_repo, snapshot_commit_id)?;
let workdir_tree = repo.find_tree(workdir_tree_id)?;

repo.ignore_large_files_in_diffs(SNAPSHOT_FILE_LIMIT_BYTES)?;
repo.ignore_large_files_in_diffs(AUTO_TRACK_LIMIT_BYTES)?;

// Define the checkout builder
let mut checkout_builder = git2::build::CheckoutBuilder::new();
Expand Down Expand Up @@ -739,7 +737,7 @@ fn lines_since_snapshot(project: &Project, repo: &git2::Repository) -> Result<us
// This looks at the diff between the tree of the currently selected as 'default' branch (where new changes go)
// and that same tree in the last snapshot. For some reason, comparing workdir to the workdir subree from
// the snapshot simply does not give us what we need here, so instead using tree to tree comparison.
repo.ignore_large_files_in_diffs(SNAPSHOT_FILE_LIMIT_BYTES)?;
repo.ignore_large_files_in_diffs(AUTO_TRACK_LIMIT_BYTES)?;

let oplog_state = OplogHandle::new(&project.gb_dir());
let Some(oplog_commit_id) = oplog_state.oplog_head()? else {
Expand Down
3 changes: 3 additions & 0 deletions crates/gitbutler-project/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,6 @@ pub fn configure_git2() {
// These settings are only changed from `main` of applications.
git2::opts::strict_object_creation(false);
}

/// The maximum size of files to automatically start tracking, i.e. untracked files we pick up for tree-creation.
pub const AUTO_TRACK_LIMIT_BYTES: u64 = 32 * 1024 * 1024;
178 changes: 95 additions & 83 deletions crates/gitbutler-repo/src/repository_ext.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use crate::Config;
use crate::SignaturePurpose;
use anyhow::{anyhow, bail, Context, Result};
use bstr::BString;
use bstr::{BStr, BString};
use git2::Tree;
use gitbutler_commit::commit_headers::CommitHeadersV2;
use gitbutler_config::git::{GbConfig, GitConfig};
Expand All @@ -12,6 +12,7 @@ use gitbutler_oxidize::{
use gitbutler_reference::{Refname, RemoteRefname};
use gix::filter::plumbing::pipeline::convert::ToGitOutcome;
use gix::objs::WriteTo;
use gix::status::index_worktree;
use std::collections::HashSet;
#[cfg(unix)]
use std::os::unix::fs::PermissionsExt;
Expand Down Expand Up @@ -42,8 +43,15 @@ pub trait RepositoryExt {
fn sign_buffer(&self, buffer: &[u8]) -> Result<BString>;
fn checkout_tree_builder<'a>(&'a self, tree: &'a git2::Tree<'a>) -> CheckoutTreeBuidler<'a>;
fn maybe_find_branch_by_refname(&self, name: &Refname) -> Result<Option<git2::Branch>>;
/// Based on the index, add all data similar to `git add .` and create a tree from it, which is returned.
fn create_wd_tree(&self) -> Result<Tree>;
/// Add all untracked and modified files in the worktree to
/// the object database, and create a tree from it.
///
/// Use `untracked_limit_in_bytes` to control the maximum file size for untracked files
/// before we stop tracking them automatically. Set it to 0 to disable the limit.
///
/// It should also be noted that this will fail if run on an empty branch
/// or if the HEAD branch has no commits.
fn create_wd_tree(&self, untracked_limit_in_bytes: u64) -> Result<Tree>;

/// Returns the `gitbutler/workspace` branch if the head currently points to it, or fail otherwise.
/// Use it before any modification to the repository, or extra defensively each time the
Expand Down Expand Up @@ -105,15 +113,8 @@ impl RepositoryExt for git2::Repository {
Ok(branch)
}

/// Add all untracked and modified files in the worktree to
/// the object database, and create a tree from it.
///
/// Note that right now, it doesn't skip big files.
///
/// It should also be noted that this will fail if run on an empty branch
/// or if the HEAD branch has no commits.
#[instrument(level = tracing::Level::DEBUG, skip(self), err(Debug))]
fn create_wd_tree(&self) -> Result<Tree> {
#[instrument(level = tracing::Level::DEBUG, skip(self, untracked_limit_in_bytes), err(Debug))]
fn create_wd_tree(&self, untracked_limit_in_bytes: u64) -> Result<Tree> {
use bstr::ByteSlice;
use gix::dir::walk::EmissionMode;
use gix::status;
Expand All @@ -133,6 +134,57 @@ impl RepositoryExt for git2::Repository {
)?;
let (mut pipeline, index) = repo.filter_pipeline(None)?;
let workdir = repo.work_dir().context("Need non-bare repository")?;
let mut added_worktree_file = |rela_path: &BStr,
head_tree_editor: &mut gix::object::tree::Editor<'_>|
-> anyhow::Result<bool> {
let rela_path_as_path = gix::path::from_bstr(rela_path);
let path = workdir.join(&rela_path_as_path);
let Ok(md) = std::fs::symlink_metadata(&path) else {
return Ok(false);
};
if md.len() > untracked_limit_in_bytes {
return Ok(false);
}
let (id, kind) = if md.is_symlink() {
let target = std::fs::read_link(&path).with_context(|| {
format!(
"Failed to read link at '{}' for adding to the object database",
path.display()
)
})?;
let id = repo.write_blob(gix::path::into_bstr(target).as_bytes())?;
(id, gix::object::tree::EntryKind::Link)
} else if md.is_file() {
let file = std::fs::File::open(&path).with_context(|| {
format!(
"Could not open file at '{}' for adding it to the object database",
path.display()
)
})?;
let file_for_git =
pipeline.convert_to_git(file, rela_path_as_path.as_ref(), &index)?;
let id = match file_for_git {
ToGitOutcome::Unchanged(mut file) => repo.write_blob_stream(&mut file)?,
ToGitOutcome::Buffer(buf) => repo.write_blob(buf)?,
ToGitOutcome::Process(mut read) => repo.write_blob_stream(&mut read)?,
};

let kind = if gix::fs::is_executable(&md) {
gix::object::tree::EntryKind::BlobExecutable
} else {
gix::object::tree::EntryKind::Blob
};
(id, kind)
} else {
// This is probably a type-change to something we can't track. Instead of keeping
// what's in `HEAD^{tree}` we remove the entry.
head_tree_editor.remove(rela_path)?;
return Ok(true);
};

head_tree_editor.upsert(rela_path, kind, id)?;
Ok(true)
};
let mut head_tree_editor = repo.edit_tree(repo.head_tree_id()?)?;
let status_changes = repo
.status(gix::progress::Discard)?
Expand All @@ -154,6 +206,8 @@ impl RepositoryExt for git2::Repository {
.into_iter(None)?;

let mut worktreepaths_changed = HashSet::new();
// We have to apply untracked items last, but don't have ordering here so impose it ourselves.
let mut untracked_items = Vec::new();
for change in status_changes {
let change = change?;
match change {
Expand Down Expand Up @@ -193,7 +247,7 @@ impl RepositoryExt for git2::Repository {
)?;
}
}
status::Item::IndexWorktree(gix::status::index_worktree::Item::Modification {
status::Item::IndexWorktree(index_worktree::Item::Modification {
rela_path,
status: EntryStatus::Change(Change::Removed),
..
Expand All @@ -203,73 +257,29 @@ impl RepositoryExt for git2::Repository {
}
// modified or untracked files are unconditionally added as blob.
// Note that this implementation will re-read the whole blob even on type-change
status::Item::IndexWorktree(
gix::status::index_worktree::Item::Modification {
rela_path,
status:
EntryStatus::Change(Change::Type | Change::Modification { .. })
| EntryStatus::IntentToAdd,
..
}
| gix::status::index_worktree::Item::DirectoryContents {
entry:
gix::dir::Entry {
rela_path,
status: gix::dir::entry::Status::Untracked,
..
},
..
},
) => {
let rela_path_as_path = gix::path::from_bstr(&rela_path);
let path = workdir.join(&rela_path_as_path);
let Ok(md) = std::fs::symlink_metadata(&path) else {
continue;
};
let (id, kind) = if md.is_symlink() {
let target = std::fs::read_link(&path).with_context(|| {
format!(
"Failed to read link at '{}' for adding to the object database",
path.display()
)
})?;
let id = repo.write_blob(gix::path::into_bstr(target).as_bytes())?;
(id, gix::object::tree::EntryKind::Link)
} else if md.is_file() {
let file = std::fs::File::open(&path).with_context(|| {
format!(
"Could not open file at '{}' for adding it to the object database",
path.display()
)
})?;
let file_for_git =
pipeline.convert_to_git(file, rela_path_as_path.as_ref(), &index)?;
let id = match file_for_git {
ToGitOutcome::Unchanged(mut file) => {
repo.write_blob_stream(&mut file)?
}
ToGitOutcome::Buffer(buf) => repo.write_blob(buf)?,
ToGitOutcome::Process(mut read) => repo.write_blob_stream(&mut read)?,
};

let kind = if gix::fs::is_executable(&md) {
gix::object::tree::EntryKind::BlobExecutable
} else {
gix::object::tree::EntryKind::Blob
};
(id, kind)
} else {
// This is probably a type-change to something we can't track. Instead of keeping
// what's in `HEAD^{tree}` we remove the entry.
head_tree_editor.remove(rela_path.as_bstr())?;
status::Item::IndexWorktree(index_worktree::Item::Modification {
rela_path,
status:
EntryStatus::Change(Change::Type | Change::Modification { .. })
| EntryStatus::IntentToAdd,
..
}) => {
if added_worktree_file(rela_path.as_ref(), &mut head_tree_editor)? {
worktreepaths_changed.insert(rela_path);
continue;
};

head_tree_editor.upsert(rela_path.as_bstr(), kind, id)?;
worktreepaths_changed.insert(rela_path);
}
}
status::Item::IndexWorktree(gix::status::index_worktree::Item::Modification {
status::Item::IndexWorktree(index_worktree::Item::DirectoryContents {
entry:
gix::dir::Entry {
rela_path,
status: gix::dir::entry::Status::Untracked,
..
},
..
}) => {
untracked_items.push(rela_path);
}
status::Item::IndexWorktree(index_worktree::Item::Modification {
rela_path,
status: EntryStatus::Change(Change::SubmoduleModification(change)),
..
Expand All @@ -283,18 +293,16 @@ impl RepositoryExt for git2::Repository {
worktreepaths_changed.insert(rela_path);
}
}
status::Item::IndexWorktree(gix::status::index_worktree::Item::Rewrite {
..
})
status::Item::IndexWorktree(index_worktree::Item::Rewrite { .. })
| status::Item::TreeIndex(gix::diff::index::Change::Rewrite { .. }) => {
unreachable!("disabled")
}
status::Item::IndexWorktree(
gix::status::index_worktree::Item::Modification {
index_worktree::Item::Modification {
status: EntryStatus::Conflict(_) | EntryStatus::NeedsUpdate(_),
..
}
| gix::status::index_worktree::Item::DirectoryContents {
| index_worktree::Item::DirectoryContents {
entry:
gix::dir::Entry {
status:
Expand All @@ -309,6 +317,10 @@ impl RepositoryExt for git2::Repository {
}
}

for rela_path in untracked_items {
added_worktree_file(rela_path.as_ref(), &mut head_tree_editor)?;
}

let tree_oid = gix_to_git2_oid(head_tree_editor.write()?);
Ok(self.find_tree(tree_oid)?)
}
Expand Down
Loading

0 comments on commit 95e62c6

Please sign in to comment.