Skip to content

Commit

Permalink
[sled-agent] Encrypt a specific set of U.2 datasets (oxidecomputer#4853)
Browse files Browse the repository at this point in the history
This PR does the following:

- As a part of processing U.2s during initialization,
`ensure_zpool_datasets_are_encrypted` is invoked. This identifies all
datasets which should be encrypted (`cockroachdb`, `clickhouse`,
`internal_dns`, `external_dns`, `clickhouse_keeper`) and performs a
one-way migration from unencrypted to encrypted dataset.
- Additionally, during zone launching, the sled agent verifies
properties about datasets that it expects should be encrypted. This
helps prevent these encrypted dataset from being used before their
transfer has finished, and also prevents these zones from ever using
unencrypted datasets in the future.
- Furthermore, for all new deployments, this PR uses encryption on these
datasets by default.
  • Loading branch information
smklein authored Jan 22, 2024
1 parent 205382f commit e5a3caa
Show file tree
Hide file tree
Showing 6 changed files with 532 additions and 30 deletions.
63 changes: 43 additions & 20 deletions illumos-utils/src/zfs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -108,12 +108,13 @@ enum GetValueErrorRaw {
MissingValue,
}

/// Error returned by [`Zfs::get_oxide_value`].
/// Error returned by [`Zfs::get_oxide_value`] or [`Zfs::get_value`].
#[derive(thiserror::Error, Debug)]
#[error("Failed to get value '{name}' from filesystem {filesystem}: {err}")]
#[error("Failed to get value '{name}' from filesystem {filesystem}")]
pub struct GetValueError {
filesystem: String,
name: String,
#[source]
err: GetValueErrorRaw,
}

Expand Down Expand Up @@ -464,28 +465,13 @@ impl Zfs {
Zfs::get_value(filesystem_name, &format!("oxide:{}", name))
}

/// Calls "zfs get" with a single value
pub fn get_value(
filesystem_name: &str,
name: &str,
) -> Result<String, GetValueError> {
let mut command = std::process::Command::new(PFEXEC);
let cmd =
command.args(&[ZFS, "get", "-Ho", "value", &name, filesystem_name]);
let output = execute(cmd).map_err(|err| GetValueError {
filesystem: filesystem_name.to_string(),
name: name.to_string(),
err: err.into(),
})?;
let stdout = String::from_utf8_lossy(&output.stdout);
let value = stdout.trim();
if value == "-" {
return Err(GetValueError {
filesystem: filesystem_name.to_string(),
name: name.to_string(),
err: GetValueErrorRaw::MissingValue,
});
}
Ok(value.to_string())
let [value] = Self::get_values(filesystem_name, &[name])?;
Ok(value)
}

/// List all extant snapshots.
Expand Down Expand Up @@ -549,6 +535,43 @@ impl Zfs {
}
}

// These methods don't work with mockall, so they exist in a separate impl block
impl Zfs {
/// Calls "zfs get" to acquire multiple values
pub fn get_values<const N: usize>(
filesystem_name: &str,
names: &[&str; N],
) -> Result<[String; N], GetValueError> {
let mut cmd = std::process::Command::new(PFEXEC);
let all_names =
names.into_iter().map(|n| *n).collect::<Vec<&str>>().join(",");
cmd.args(&[ZFS, "get", "-Ho", "value", &all_names, filesystem_name]);
let output = execute(&mut cmd).map_err(|err| GetValueError {
filesystem: filesystem_name.to_string(),
name: format!("{:?}", names),
err: err.into(),
})?;
let stdout = String::from_utf8_lossy(&output.stdout);
let values = stdout.trim();

const EMPTY_STRING: String = String::new();
let mut result: [String; N] = [EMPTY_STRING; N];

for (i, value) in values.lines().enumerate() {
let value = value.trim();
if value == "-" {
return Err(GetValueError {
filesystem: filesystem_name.to_string(),
name: names[i].to_string(),
err: GetValueErrorRaw::MissingValue,
});
}
result[i] = value.to_string();
}
Ok(result)
}
}

/// A read-only snapshot of a ZFS filesystem.
#[derive(Clone, Debug)]
pub struct Snapshot {
Expand Down
51 changes: 49 additions & 2 deletions sled-agent/src/services.rs
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,24 @@ pub enum Error {
#[error("Failed to get address: {0}")]
GetAddressFailure(#[from] illumos_utils::zone::GetAddressError),

#[error(
"Failed to launch zone {zone} because ZFS value cannot be accessed"
)]
GetZfsValue {
zone: String,
#[source]
source: illumos_utils::zfs::GetValueError,
},

#[error("Cannot launch {zone} with {dataset} (saw {prop_name} = {prop_value}, expected {prop_value_expected})")]
DatasetNotReady {
zone: String,
dataset: String,
prop_name: String,
prop_value: String,
prop_value_expected: String,
},

#[error("NTP zone not ready")]
NtpZoneNotReady,

Expand Down Expand Up @@ -1474,7 +1492,7 @@ impl ServiceManager {
ZoneArgs::Omicron(zone_config) => zone_config
.zone
.dataset_name()
.map(|n| zone::Dataset { name: n.full() })
.map(|n| zone::Dataset { name: n.full_name() })
.into_iter()
.collect(),
ZoneArgs::Switch(_) => vec![],
Expand Down Expand Up @@ -1711,7 +1729,7 @@ impl ServiceManager {
dataset.pool_name.clone(),
DatasetKind::Crucible,
)
.full();
.full_name();
let uuid = &Uuid::new_v4().to_string();
let config = PropertyGroupBuilder::new("config")
.add_property("datalink", "astring", datalink)
Expand Down Expand Up @@ -2930,6 +2948,35 @@ impl ServiceManager {
// Currently, the zone filesystem should be destroyed between
// reboots, so it's fine to make this decision locally.
let root = if let Some(dataset) = zone.dataset_name() {
// Check that the dataset is actually ready to be used.
let [zoned, canmount, encryption] =
illumos_utils::zfs::Zfs::get_values(
&dataset.full_name(),
&["zoned", "canmount", "encryption"],
)
.map_err(|err| Error::GetZfsValue {
zone: zone.zone_name(),
source: err,
})?;

let check_property = |name, actual, expected| {
if actual != expected {
return Err(Error::DatasetNotReady {
zone: zone.zone_name(),
dataset: dataset.full_name(),
prop_name: String::from(name),
prop_value: actual,
prop_value_expected: String::from(expected),
});
}
return Ok(());
};
check_property("zoned", zoned, "on")?;
check_property("canmount", canmount, "on")?;
if dataset.dataset().dataset_should_be_encrypted() {
check_property("encryption", encryption, "aes-256-gcm")?;
}

// If the zone happens to already manage a dataset, then
// we co-locate the zone dataset on the same zpool.
//
Expand Down
2 changes: 1 addition & 1 deletion sled-agent/src/sled_agent.rs
Original file line number Diff line number Diff line change
Expand Up @@ -611,7 +611,7 @@ impl SledAgent {
warn!(
self.log,
"Failed to load services, will retry in {:?}", delay;
"error" => %err,
"error" => ?err,
);
},
)
Expand Down
Loading

0 comments on commit e5a3caa

Please sign in to comment.