Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Handle region snapshot replacement volume deletes #7046

Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
92da0a9
Handle region snapshot replacement volume deletes
jmpesp Oct 9, 2024
bee3cca
slow CI machines may not have started the sagas after running backgro…
jmpesp Nov 14, 2024
309742d
THIS WAS A TREMENDOUS OVERSIGHT
jmpesp Nov 15, 2024
c9455ec
emit VolumeReplaceResult as saga node data
jmpesp Nov 15, 2024
4630e03
fix a bunch of tests that were locking non-existent volumes
jmpesp Nov 15, 2024
4c729e6
fix a bunch more tests that were locking non-existent volumes
jmpesp Nov 15, 2024
1c50f37
fix another test that was locking non-existent volumes
jmpesp Nov 15, 2024
63cb56e
the TREMENDOUS oversight continues
jmpesp Nov 18, 2024
0ce2029
Merge branch 'main' into region_snapshot_replacement_account_for_dele…
jmpesp Nov 19, 2024
79f8ad9
fix after merge
jmpesp Nov 19, 2024
8b0a677
account for relocking a volume
jmpesp Nov 22, 2024
b2a740f
add comment about validating volume exists
jmpesp Nov 22, 2024
18efdd3
disambiguate between soft and hard delete
jmpesp Nov 22, 2024
640e678
cover the case that the region snapshot finish saga kicks off in the …
jmpesp Nov 22, 2024
1a60b15
wait for state to transition to complete
jmpesp Nov 22, 2024
9fc46ab
just an optimization
jmpesp Nov 25, 2024
2cd7881
add missing unwind edge
jmpesp Nov 25, 2024
a78cabf
remove likely
jmpesp Nov 25, 2024
c869cad
handle if region snapshot is hard deleted while replacement request i…
jmpesp Nov 27, 2024
231764d
Merge branch 'main' into region_snapshot_replacement_account_for_dele…
jmpesp Dec 9, 2024
7b93e2e
fmt
jmpesp Dec 9, 2024
cb1c2fe
fix compile time errors from merge
jmpesp Dec 9, 2024
fdf800a
conflicts are not errors!
jmpesp Dec 10, 2024
abf522e
add dummy region snapshot
jmpesp Dec 10, 2024
715c9f7
fmt
jmpesp Dec 10, 2024
e4d0844
comment for volume_repair_insert_in_txn
jmpesp Dec 17, 2024
518a86c
cargo fmt missed this one!
jmpesp Dec 17, 2024
ec9bb9a
when completing a region snapshot replacement from the start background
jmpesp Dec 18, 2024
34db989
address the unfinished comment
jmpesp Dec 18, 2024
4931f58
rework wait_for_request_state to be clearer
jmpesp Dec 18, 2024
2a37e9c
mroe -> more!
jmpesp Dec 19, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions nexus/db-model/src/region_snapshot_replacement.rs
Original file line number Diff line number Diff line change
Expand Up @@ -81,12 +81,12 @@ impl std::str::FromStr for RegionSnapshotReplacementState {
/// | |
/// v ---
/// ---
/// Running |
/// |
/// | |
/// v |
/// | responsibility of region snapshot
/// Completing | replacement finish saga
/// Running <-- |
/// | |
/// | | |
/// v | |
/// | | responsibility of region snapshot
/// Completing -- | replacement finish saga
/// |
/// | |
/// v |
Expand Down
6 changes: 4 additions & 2 deletions nexus/db-model/src/schema_versions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ use std::collections::BTreeMap;
///
/// This must be updated when you change the database schema. Refer to
/// schema/crdb/README.adoc in the root of this repository for details.
pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(115, 0, 0);
pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(117, 0, 0);

/// List of all past database schema versions, in *reverse* order
///
Expand All @@ -29,7 +29,9 @@ static KNOWN_VERSIONS: Lazy<Vec<KnownVersion>> = Lazy::new(|| {
// | leaving the first copy as an example for the next person.
// v
// KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"),
KnownVersion::new(115, "add-completing-and-new-region-volume"),
KnownVersion::new(117, "add-completing-and-new-region-volume"),
KnownVersion::new(116, "bp-physical-disk-disposition"),
KnownVersion::new(115, "inv-omicron-physical-disks-generation"),
KnownVersion::new(114, "crucible-ref-count-records"),
KnownVersion::new(113, "add-tx-eq"),
KnownVersion::new(112, "blueprint-dataset"),
Expand Down
9 changes: 5 additions & 4 deletions nexus/db-queries/src/db/datastore/region_replacement.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ use crate::db::update_and_check::UpdateAndCheck;
use crate::db::update_and_check::UpdateStatus;
use crate::db::TransactionError;
use crate::transaction_retry::OptionalError;
use async_bb8_diesel::AsyncConnection;
use async_bb8_diesel::AsyncRunQueryDsl;
use diesel::prelude::*;
use omicron_common::api::external::Error;
Expand Down Expand Up @@ -54,9 +53,11 @@ impl DataStore {
request: RegionReplacement,
) -> Result<(), Error> {
let err = OptionalError::new();
self.pool_connection_authorized(opctx)
.await?
.transaction_async(|conn| {
let conn = self.pool_connection_authorized(opctx).await?;

self.transaction_retry_wrapper("insert_region_replacement_request")
.transaction(&conn, |conn| {
let request = request.clone();
let err = err.clone();
async move {
use db::schema::region_replacement::dsl;
Expand Down
210 changes: 141 additions & 69 deletions nexus/db-queries/src/db/datastore/region_snapshot_replacement.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ use crate::db::update_and_check::UpdateAndCheck;
use crate::db::update_and_check::UpdateStatus;
use crate::db::TransactionError;
use crate::transaction_retry::OptionalError;
use async_bb8_diesel::AsyncConnection;
use async_bb8_diesel::AsyncRunQueryDsl;
use diesel::prelude::*;
use omicron_common::api::external::Error;
Expand Down Expand Up @@ -94,44 +93,48 @@ impl DataStore {
volume_id: Uuid,
) -> Result<(), Error> {
let err = OptionalError::new();
self.pool_connection_authorized(opctx)
.await?
.transaction_async(|conn| {
let err = err.clone();
async move {
use db::schema::region_snapshot_replacement::dsl;
let conn = self.pool_connection_authorized(opctx).await?;

// An associated volume repair record isn't _strictly_
// needed: snapshot volumes should never be directly
// constructed, and therefore won't ever have an associated
// Upstairs that receives a volume replacement request.
// However it's being done in an attempt to be overly
// cautious, and it validates that the volume exist:
// otherwise it would be possible to create a region
// snapshot replacement request for a volume that didn't
// exist!

Self::volume_repair_insert_in_txn(
&conn, err, volume_id, request.id,
)
.await?;
self.transaction_retry_wrapper(
"insert_region_snapshot_replacement_request_with_volume_id",
)
.transaction(&conn, |conn| {
let request = request.clone();
let err = err.clone();
async move {
use db::schema::region_snapshot_replacement::dsl;

diesel::insert_into(dsl::region_snapshot_replacement)
.values(request)
.execute_async(&conn)
.await?;
// An associated volume repair record isn't _strictly_
// needed: snapshot volumes should never be directly
Comment on lines +107 to +108
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I follow the bit about snapshots not being directly accessed by an upstairs, but I thought the repair record was still needed for mutual exclusion?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's not strictly speaking necessary - many replacements could occur on the snapshot volume at the same time, and because it's never constructed there wouldn't be any repair operation required.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

They wouldn't contend on the snapshot volume's database record?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

They would only contend around the volume repair record. If there was no lock for the snapshot volume, then the individual replacement transactions could all fire in whatever order they're going to serialize in, and it would probably work.

// constructed, and therefore won't ever have an associated
// Upstairs that receives a volume replacement request.
// However it's being done in an attempt to be overly
// cautious, and it validates that the volume exist:
// otherwise it would be possible to create a region
// snapshot replacement request for a volume that didn't
// exist!

Ok(())
}
})
.await
.map_err(|e| {
if let Some(err) = err.take() {
err
} else {
public_error_from_diesel(e, ErrorHandler::Server)
}
})
Self::volume_repair_insert_in_txn(
&conn, err, volume_id, request.id,
)
.await?;

diesel::insert_into(dsl::region_snapshot_replacement)
.values(request)
.execute_async(&conn)
.await?;

Ok(())
}
})
.await
.map_err(|e| {
if let Some(err) = err.take() {
err
} else {
public_error_from_diesel(e, ErrorHandler::Server)
}
})
}

pub async fn get_region_snapshot_replacement_request_by_id(
Expand Down Expand Up @@ -685,16 +688,25 @@ impl DataStore {
region_snapshot_replacement_id: Uuid,
operating_saga_id: Uuid,
) -> Result<(), Error> {
type TxnError = TransactionError<Error>;

let err = OptionalError::new();
let conn = self.pool_connection_authorized(opctx).await?;

self.transaction_retry_wrapper("set_region_snapshot_replacement_complete")
.transaction(&conn, |conn| {
let err = err.clone();
async move {
use db::schema::volume_repair::dsl as volume_repair_dsl;
self.transaction_retry_wrapper(
"set_region_snapshot_replacement_complete",
)
.transaction(&conn, |conn| {
let err = err.clone();
async move {
use db::schema::volume_repair::dsl as volume_repair_dsl;

diesel::delete(
volume_repair_dsl::volume_repair.filter(
volume_repair_dsl::repair_id
.eq(region_snapshot_replacement_id),
),
)
.execute_async(&conn)
.await?;

use db::schema::region_snapshot_replacement::dsl;

Expand All @@ -716,22 +728,81 @@ impl DataStore {
.execute_and_check(&conn)
.await?;

match result.status {
UpdateStatus::Updated => Ok(()),
UpdateStatus::NotUpdatedButExists => {
let record = result.found;

if record.replacement_state
== RegionSnapshotReplacementState::Complete
{
Ok(())
} else {
Err(err.bail(Error::conflict(format!(
"region snapshot replacement {} set to {:?} \
(operating saga id {:?})",
region_snapshot_replacement_id,
record.replacement_state,
record.operating_saga_id,
))))
Comment on lines +741 to +747
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is unexpected, correct? (It means either that the operating saga ID was wrong, or the caller called this on a replacement that wasn't Completing.)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's unexpected, yeah. Even in the case where the saga node is rerun the state should be set to Complete already and the saga shouldn't unwind.

}
}
}
}
})
.await
.map_err(|e| match err.take() {
Some(error) => error,
None => public_error_from_diesel(e, ErrorHandler::Server),
})
}

/// Transition a RegionSnapshotReplacement record from Requested to Complete
/// - this is required when the region snapshot is hard-deleted, which means
/// that all volume references are gone and no replacement is required. Also
/// removes the `volume_repair` record that is taking a "lock" on the
/// Volume.
pub async fn set_region_snapshot_replacement_complete_from_requested(
&self,
opctx: &OpContext,
region_snapshot_replacement_id: Uuid,
) -> Result<(), Error> {
type TxnError = TransactionError<Error>;

let err = OptionalError::new();
let conn = self.pool_connection_authorized(opctx).await?;

self.transaction_retry_wrapper("set_region_snapshot_replacement_complete")
.transaction(&conn, |conn| {
let err = err.clone();
async move {
use db::schema::volume_repair::dsl as volume_repair_dsl;
use db::schema::region_snapshot_replacement::dsl;

diesel::delete(
volume_repair_dsl::volume_repair.filter(
volume_repair_dsl::repair_id
.eq(region_snapshot_replacement_id),
),
)
.execute_async(&conn)
.await?;

let result = diesel::update(dsl::region_snapshot_replacement)
.filter(dsl::id.eq(region_snapshot_replacement_id))
.filter(
dsl::replacement_state
.eq(RegionSnapshotReplacementState::Running),
.eq(RegionSnapshotReplacementState::Requested),
)
.filter(dsl::operating_saga_id.is_null())
.set((dsl::replacement_state
.eq(RegionSnapshotReplacementState::Complete),))
.filter(dsl::new_region_volume_id.is_null())
.set(dsl::replacement_state
.eq(RegionSnapshotReplacementState::Complete))
.check_if_exists::<RegionSnapshotReplacement>(
region_snapshot_replacement_id,
)
.execute_and_check(&conn)
.await?;
.await?;
leftwo marked this conversation as resolved.
Show resolved Hide resolved

match result.status {
UpdateStatus::Updated => Ok(()),
Expand All @@ -750,7 +821,7 @@ impl DataStore {
region_snapshot_replacement_id,
record.replacement_state,
record.operating_saga_id,
),
)
))))
}
}
Expand Down Expand Up @@ -1220,11 +1291,16 @@ impl DataStore {
opctx: &OpContext,
region_snapshot_replacement_step: RegionSnapshotReplacementStep,
) -> Result<(), Error> {
type TxnError = TransactionError<Error>;
let conn = self.pool_connection_authorized(opctx).await?;
let err = OptionalError::new();

self.pool_connection_authorized(opctx)
.await?
.transaction_async(|conn| async move {
self.transaction_retry_wrapper(
"set_region_snapshot_replacement_complete",
)
.transaction(&conn, |conn| {
let err = err.clone();

async move {
use db::schema::volume_repair::dsl as volume_repair_dsl;

diesel::delete(
Expand Down Expand Up @@ -1267,27 +1343,23 @@ impl DataStore {
{
Ok(())
} else {
Err(TxnError::CustomError(Error::conflict(
format!(
"region snapshot replacement step {} set \
Err(err.bail(Error::conflict(format!(
"region snapshot replacement step {} set \
leftwo marked this conversation as resolved.
Show resolved Hide resolved
to {:?} (operating saga id {:?})",
region_snapshot_replacement_step.id,
record.replacement_state,
record.operating_saga_id,
),
)))
region_snapshot_replacement_step.id,
record.replacement_state,
record.operating_saga_id,
))))
}
}
}
})
.await
.map_err(|e| match e {
TxnError::CustomError(error) => error,

TxnError::Database(error) => {
public_error_from_diesel(error, ErrorHandler::Server)
}
})
}
})
.await
.map_err(|e| match err.take() {
Some(error) => error,
None => public_error_from_diesel(e, ErrorHandler::Server),
})
}
}

Expand Down
Loading
Loading