Skip to content

Commit

Permalink
refactor: parse integrity eagerly (#180)
Browse files Browse the repository at this point in the history
* refactor: parse integrity eagerly

The parsing of integrity has been moved from tarball to serde

* docs: clarify

* docs: remove the 3rd option

* refactor: remove unnecessary `pipe_ref`
  • Loading branch information
KSXGitHub authored Nov 4, 2023
1 parent 6265c74 commit 2fb4bd4
Show file tree
Hide file tree
Showing 9 changed files with 51 additions and 43 deletions.
3 changes: 3 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions crates/lockfile/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ node-semver = { workspace = true }
pipe-trait = { workspace = true }
serde = { workspace = true }
serde_yaml = { workspace = true }
ssri = { workspace = true }
split-first-char = { workspace = true }

[dev-dependencies]
Expand Down
23 changes: 14 additions & 9 deletions crates/lockfile/src/resolution.rs
Original file line number Diff line number Diff line change
@@ -1,21 +1,22 @@
use derive_more::{From, TryInto};
use pipe_trait::Pipe;
use serde::{Deserialize, Serialize};
use ssri::Integrity;

/// For tarball hosted remotely or locally.
#[derive(Debug, Clone, PartialEq, Deserialize, Serialize)]
#[serde(deny_unknown_fields, rename_all = "camelCase")]
pub struct TarballResolution {
pub tarball: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub integrity: Option<String>,
pub integrity: Option<Integrity>,
}

/// For standard package specification, with package name and version range.
#[derive(Debug, Clone, PartialEq, Deserialize, Serialize)]
#[serde(deny_unknown_fields, rename_all = "camelCase")]
pub struct RegistryResolution {
pub integrity: String,
pub integrity: Integrity,
}

/// For local directory on a filesystem.
Expand Down Expand Up @@ -45,10 +46,10 @@ pub enum LockfileResolution {

impl LockfileResolution {
/// Get the integrity field if available.
pub fn integrity(&self) -> Option<&'_ str> {
pub fn integrity(&self) -> Option<&'_ Integrity> {
match self {
LockfileResolution::Tarball(resolution) => resolution.integrity.as_deref(),
LockfileResolution::Registry(resolution) => resolution.integrity.as_str().pipe(Some),
LockfileResolution::Tarball(resolution) => resolution.integrity.as_ref(),
LockfileResolution::Registry(resolution) => Some(&resolution.integrity),
LockfileResolution::Directory(_) | LockfileResolution::Git(_) => None,
}
}
Expand Down Expand Up @@ -101,6 +102,10 @@ mod tests {
use pretty_assertions::assert_eq;
use text_block_macros::text_block;

fn integrity(integrity_str: &str) -> Integrity {
integrity_str.parse().expect("parse integrity string")
}

#[test]
fn deserialize_tarball_resolution() {
eprintln!("CASE: without integrity");
Expand All @@ -124,7 +129,7 @@ mod tests {
dbg!(&received);
let expected = LockfileResolution::Tarball(TarballResolution {
tarball: "file:ts-pipe-compose-0.2.1.tgz".to_string(),
integrity: "sha512-gf6ZldcfCDyNXPRiW3lQjEP1Z9rrUM/4Cn7BZbv3SdTA82zxWRP8OmLwvGR974uuENhGCFgFdN11z3n1Ofpprg==".to_string().into()
integrity: integrity("sha512-gf6ZldcfCDyNXPRiW3lQjEP1Z9rrUM/4Cn7BZbv3SdTA82zxWRP8OmLwvGR974uuENhGCFgFdN11z3n1Ofpprg==").into()
});
assert_eq!(received, expected);
}
Expand All @@ -147,7 +152,7 @@ mod tests {
eprintln!("CASE: with integrity");
let resolution = LockfileResolution::Tarball(TarballResolution {
tarball: "file:ts-pipe-compose-0.2.1.tgz".to_string(),
integrity: "sha512-gf6ZldcfCDyNXPRiW3lQjEP1Z9rrUM/4Cn7BZbv3SdTA82zxWRP8OmLwvGR974uuENhGCFgFdN11z3n1Ofpprg==".to_string().into()
integrity: integrity("sha512-gf6ZldcfCDyNXPRiW3lQjEP1Z9rrUM/4Cn7BZbv3SdTA82zxWRP8OmLwvGR974uuENhGCFgFdN11z3n1Ofpprg==").into()
});
let received = serde_yaml::to_string(&resolution).unwrap();
let received = received.trim();
Expand All @@ -167,15 +172,15 @@ mod tests {
let received: LockfileResolution = serde_yaml::from_str(yaml).unwrap();
dbg!(&received);
let expected = LockfileResolution::Registry(RegistryResolution {
integrity: "sha512-gf6ZldcfCDyNXPRiW3lQjEP1Z9rrUM/4Cn7BZbv3SdTA82zxWRP8OmLwvGR974uuENhGCFgFdN11z3n1Ofpprg==".to_string()
integrity: integrity("sha512-gf6ZldcfCDyNXPRiW3lQjEP1Z9rrUM/4Cn7BZbv3SdTA82zxWRP8OmLwvGR974uuENhGCFgFdN11z3n1Ofpprg==")
});
assert_eq!(received, expected);
}

#[test]
fn serialize_registry_resolution() {
let resolution = LockfileResolution::Registry(RegistryResolution {
integrity: "sha512-gf6ZldcfCDyNXPRiW3lQjEP1Z9rrUM/4Cn7BZbv3SdTA82zxWRP8OmLwvGR974uuENhGCFgFdN11z3n1Ofpprg==".to_string()
integrity: integrity("sha512-gf6ZldcfCDyNXPRiW3lQjEP1Z9rrUM/4Cn7BZbv3SdTA82zxWRP8OmLwvGR974uuENhGCFgFdN11z3n1Ofpprg==")
});
let received = serde_yaml::to_string(&resolution).unwrap();
let received = received.trim();
Expand Down
4 changes: 2 additions & 2 deletions crates/package-manager/src/install_package_by_snapshot.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ impl<'a> InstallPackageBySnapshot<'a> {

let (tarball_url, integrity) = match resolution {
LockfileResolution::Tarball(tarball_resolution) => {
let integrity = tarball_resolution.integrity.as_deref().unwrap_or_else(|| {
let integrity = tarball_resolution.integrity.as_ref().unwrap_or_else(|| {
// TODO: how to handle the absent of integrity field?
panic!("Current implementation requires integrity, but {dependency_path} doesn't have it");
});
Expand All @@ -54,7 +54,7 @@ impl<'a> InstallPackageBySnapshot<'a> {
let version = ver_peer.version();
let bare_name = name.bare.as_str();
let tarball_url = format!("{registry}/{name}/-/{bare_name}-{version}.tgz");
let integrity = registry_resolution.integrity.as_str();
let integrity = &registry_resolution.integrity;
(Cow::Owned(tarball_url), integrity)
}
LockfileResolution::Directory(_) | LockfileResolution::Git(_) => {
Expand Down
1 change: 1 addition & 0 deletions crates/registry/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ node-semver = { workspace = true }
pipe-trait = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }
ssri = { workspace = true }
tokio = { workspace = true }
miette = { workspace = true }

Expand Down
3 changes: 2 additions & 1 deletion crates/registry/src/package_distribution.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
use serde::{Deserialize, Serialize};
use ssri::Integrity;

#[derive(Serialize, Deserialize, Debug, Default, Clone, Eq)]
#[serde(rename_all = "camelCase")]
pub struct PackageDistribution {
pub integrity: Option<String>,
pub integrity: Option<Integrity>,
pub shasum: Option<String>,
pub tarball: String,
pub file_count: Option<usize>,
Expand Down
33 changes: 12 additions & 21 deletions crates/tarball/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,15 +30,6 @@ pub struct NetworkError {
pub error: reqwest::Error,
}

#[derive(Debug, Display, Error, Diagnostic)]
#[display("Cannot parse {integrity:?} from {url} as an integrity: {error}")]
pub struct ParseIntegrityError {
pub url: String,
pub integrity: String,
#[error(source)]
pub error: ssri::Error,
}

#[derive(Debug, Display, Error, Diagnostic)]
#[display("Failed to verify the integrity of {url}: {error}")]
pub struct VerifyChecksumError {
Expand All @@ -57,9 +48,6 @@ pub enum TarballError {
#[diagnostic(code(pacquet_tarball::io_error))]
ReadTarballEntries(std::io::Error),

#[diagnostic(code(pacquet_tarball::parse_integrity_error))]
ParseIntegrity(ParseIntegrityError),

#[diagnostic(code(pacquet_tarball::verify_checksum_error))]
Checksum(VerifyChecksumError),

Expand Down Expand Up @@ -128,7 +116,7 @@ pub struct DownloadTarballToStore<'a> {
pub tarball_cache: &'a Cache,
pub http_client: &'a Client,
pub store_dir: &'static StoreDir,
pub package_integrity: &'a str,
pub package_integrity: &'a Integrity,
pub package_unpacked_size: Option<usize>,
pub package_url: &'a str,
}
Expand Down Expand Up @@ -199,12 +187,11 @@ impl<'a> DownloadTarballToStore<'a> {

tracing::info!(target: "pacquet::download", ?package_url, "Download completed");

let package_integrity: Integrity =
package_integrity.parse().map_err(|error| ParseIntegrityError {
url: package_url.to_string(),
integrity: package_integrity.to_string(),
error,
})?;
// TODO: Cloning here is less than desirable, there are 2 possible solutions for this problem:
// 1. Use an Arc and convert this line to Arc::clone.
// 2. Replace ssri with base64 and serde magic (which supports Copy).
let package_integrity = package_integrity.clone();

#[derive(Debug, From)]
enum TaskError {
Checksum(ssri::Error),
Expand Down Expand Up @@ -303,6 +290,10 @@ mod tests {

use super::*;

fn integrity(integrity_str: &str) -> Integrity {
integrity_str.parse().expect("parse integrity string")
}

/// **Problem:**
/// The tested function requires `'static` paths, leaking would prevent
/// temporary files from being cleaned up.
Expand All @@ -328,7 +319,7 @@ mod tests {
tarball_cache: &Default::default(),
http_client: &Default::default(),
store_dir: store_path,
package_integrity: "sha512-dj7vjIn1Ar8sVXj2yAXiMNCJDmS9MQ9XMlIecX2dIzzhjSHCyKo4DdXjXMs7wKW2kj6yvVRSpuQjOZ3YLrh56w==",
package_integrity: &integrity("sha512-dj7vjIn1Ar8sVXj2yAXiMNCJDmS9MQ9XMlIecX2dIzzhjSHCyKo4DdXjXMs7wKW2kj6yvVRSpuQjOZ3YLrh56w=="),
package_unpacked_size: Some(16697),
package_url: "https://registry.npmjs.org/@fastify/error/-/error-3.3.0.tgz"
}
Expand Down Expand Up @@ -368,7 +359,7 @@ mod tests {
tarball_cache: &Default::default(),
http_client: &Default::default(),
store_dir: store_path,
package_integrity: "sha512-aaaan1Ar8sVXj2yAXiMNCJDmS9MQ9XMlIecX2dIzzhjSHCyKo4DdXjXMs7wKW2kj6yvVRSpuQjOZ3YLrh56w==",
package_integrity: &integrity("sha512-aaaan1Ar8sVXj2yAXiMNCJDmS9MQ9XMlIecX2dIzzhjSHCyKo4DdXjXMs7wKW2kj6yvVRSpuQjOZ3YLrh56w=="),
package_unpacked_size: Some(16697),
package_url: "https://registry.npmjs.org/@fastify/error/-/error-3.3.0.tgz",
}
Expand Down
1 change: 1 addition & 0 deletions tasks/micro-benchmark/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,5 @@ tempfile = { workspace = true }
pipe-trait = { workspace = true }
project-root = { workspace = true }
reqwest = { workspace = true }
ssri = { workspace = true }
node-semver = { workspace = true }
25 changes: 15 additions & 10 deletions tasks/micro-benchmark/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ use pacquet_tarball::DownloadTarballToStore;
use pipe_trait::Pipe;
use project_root::get_project_root;
use reqwest::Client;
use ssri::Integrity;
use tempfile::tempdir;

#[derive(Debug, Parser)]
Expand All @@ -24,24 +25,28 @@ fn bench_tarball(c: &mut Criterion, server: &mut ServerGuard, fixtures_folder: &
let rt = tokio::runtime::Builder::new_multi_thread().enable_all().build().unwrap();

let url = &format!("{0}/@fastify+error-3.3.0.tgz", server.url());
let package_integrity: Integrity = "sha512-dj7vjIn1Ar8sVXj2yAXiMNCJDmS9MQ9XMlIecX2dIzzhjSHCyKo4DdXjXMs7wKW2kj6yvVRSpuQjOZ3YLrh56w==".parse().expect("parse integrity string");

group.throughput(Throughput::Bytes(file.len() as u64));
group.bench_function("download_dependency", |b| {
b.to_async(&rt).iter(|| async {
// NOTE: the tempdir is being leaked, meaning the cleanup would be postponed until the end of the benchmark
let dir = tempdir().unwrap();
let store_dir = dir.path().to_path_buf().pipe(StoreDir::from).pipe(Box::new).pipe(Box::leak);
let store_dir =
dir.path().to_path_buf().pipe(StoreDir::from).pipe(Box::new).pipe(Box::leak);
let http_client = Client::new();

let cas_map =
DownloadTarballToStore{
tarball_cache: &Default::default(),
http_client: &http_client,
store_dir,
package_integrity: "sha512-dj7vjIn1Ar8sVXj2yAXiMNCJDmS9MQ9XMlIecX2dIzzhjSHCyKo4DdXjXMs7wKW2kj6yvVRSpuQjOZ3YLrh56w==",
package_unpacked_size: Some(16697),
package_url: url,
}.run().await.unwrap();
let cas_map = DownloadTarballToStore {
tarball_cache: &Default::default(),
http_client: &http_client,
store_dir,
package_integrity: &package_integrity,
package_unpacked_size: Some(16697),
package_url: url,
}
.run()
.await
.unwrap();
cas_map.len()
});
});
Expand Down

0 comments on commit 2fb4bd4

Please sign in to comment.