Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Handle case mismatches when looking up env vars in the Config snapshot #11824

Merged
merged 7 commits into from
Mar 17, 2023
2 changes: 1 addition & 1 deletion src/cargo/util/config/de.rs
Original file line number Diff line number Diff line change
Expand Up @@ -424,7 +424,7 @@ impl<'config> ValueDeserializer<'config> {
let definition = {
let env = de.key.as_env_key();
let env_def = Definition::Environment(env.to_string());
match (de.config.env_has_key(env), de.config.get_cv(&de.key)?) {
match (de.config.env.contains_key(env), de.config.get_cv(&de.key)?) {
(true, Some(cv)) => {
// Both, pick highest priority.
if env_def.is_higher_priority(cv.definition()) {
Expand Down
147 changes: 147 additions & 0 deletions src/cargo/util/config/environment.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
//! Encapsulates snapshotting of environment variables.

use std::collections::HashMap;
use std::ffi::{OsStr, OsString};

use crate::util::errors::CargoResult;
use anyhow::{anyhow, bail};

/// Generate `case_insensitive_env` and `normalized_env` from the `env`.
fn make_case_insensitive_and_normalized_env(
env: &HashMap<OsString, OsString>,
) -> (HashMap<String, String>, HashMap<String, String>) {
let case_insensitive_env: HashMap<_, _> = env
.keys()
.filter_map(|k| k.to_str())
.map(|k| (k.to_uppercase(), k.to_owned()))
.collect();
let normalized_env = env
.iter()
// Only keep entries where both the key and value are valid UTF-8
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can this comment also mention why it is skipping over non-utf-8 values? Perhaps something like "Because the config env vars only support utf-8, this needs to be kept in sync with that, otherwise the normalized map warning could incorrectly warn about entries that can't be read by the config system." Or something shorter if you can word it better.

.filter_map(|(k, v)| Some((k.to_str()?, v.to_str()?)))
.map(|(k, _)| (k.to_uppercase().replace("-", "_"), k.to_owned()))
.collect();
(case_insensitive_env, normalized_env)
}

#[derive(Debug)]
pub struct Env {
/// A snapshot of the process's environment variables.
env: HashMap<OsString, OsString>,
/// A map from normalized (upper case and with "-" replaced by "_") env keys to the actual key
/// in the environment.
/// The "normalized" key is the format expected by Cargo.
/// This is used to warn users when env keys are not provided in this format.
normalized_env: HashMap<String, String>,
/// A map from uppercased env keys to the actual key in the environment.
/// This is relevant on Windows, where env keys are case-insensitive.
/// For example, this might hold a pair `("PATH", "Path")`.
case_insensitive_env: HashMap<String, String>,
}

impl Env {
/// Create a new `Env` from process's environment variables.
pub fn new() -> Self {
let env: HashMap<_, _> = std::env::vars_os().collect();
let (case_insensitive_env, normalized_env) = make_case_insensitive_and_normalized_env(&env);
Self {
env,
case_insensitive_env,
normalized_env,
}
}

/// Set the env directly from a `HashMap`.
/// This should be used for debugging purposes only.
pub(super) fn from_map(env: HashMap<String, String>) -> Self {
let env = env.into_iter().map(|(k, v)| (k.into(), v.into())).collect();
let (case_insensitive_env, normalized_env) = make_case_insensitive_and_normalized_env(&env);
Self {
env,
case_insensitive_env,
normalized_env,
}
}

/// Returns all environment variables as an iterator,
/// keeping only entries where both the key and value are valid UTF-8.
pub fn iter_str(&self) -> impl Iterator<Item = (&str, &str)> {
self.env
.iter()
.filter_map(|(k, v)| Some((k.to_str()?, v.to_str()?)))
}

/// Returns all environment variable keys, filtering out keys that are not valid UTF-8.
pub fn keys_str(&self) -> impl Iterator<Item = &str> {
self.env.keys().filter_map(|k| k.to_str())
}

/// Get the value of environment variable `key` through the `Config` snapshot.
///
/// This can be used similarly to `std::env::var_os`.
/// On Windows, we check for case mismatch since environment keys are case-insensitive.
pub fn get_env_os(&self, key: impl AsRef<OsStr>) -> Option<OsString> {
match self.env.get(key.as_ref()) {
Some(s) => Some(s.clone()),
None => {
if cfg!(windows) {
self.get_env_case_insensitive(key).cloned()
} else {
None
}
}
}
}

/// Get the value of environment variable `key` through the `self.env` snapshot.
///
/// This can be used similarly to `std::env::var`.
/// On Windows, we check for case mismatch since environment keys are case-insensitive.
pub fn get_env(&self, key: impl AsRef<OsStr>) -> CargoResult<String> {
let key = key.as_ref();
let s = self
.get_env_os(key)
.ok_or_else(|| anyhow!("{key:?} could not be found in the environment snapshot"))?;

match s.to_str() {
Some(s) => Ok(s.to_owned()),
None => bail!("environment variable value is not valid unicode: {s:?}"),
}
}

/// Performs a case-insensitive lookup of `key` in the environment.
///
/// This is relevant on Windows, where environment variables are case-insensitive.
/// Note that this only works on keys that are valid UTF-8 and it uses Unicode uppercase,
/// which may differ from the OS's notion of uppercase.
fn get_env_case_insensitive(&self, key: impl AsRef<OsStr>) -> Option<&OsString> {
let upper_case_key = key.as_ref().to_str()?.to_uppercase();
let env_key: &OsStr = self.case_insensitive_env.get(&upper_case_key)?.as_ref();
self.env.get(env_key)
}

/// Get the value of environment variable `key` as a `&str`.
/// Returns `None` if `key` is not in `self.env` or if the value is not valid UTF-8.
///
/// This is intended for use in private methods of `Config`,
/// and does not check for env key case mismatch.
Comment on lines +150 to +151
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could this include a comment explaining why it is case-sensitive?

I was looking over the history, and it looks like this was an unintended regression in 1.28 via #5552. I think I somehow forgot about windows case sensitivity at that time, and have been confused on a few occasions (like #9169) due to the fact that Command used to force keys to be uppercase on Windows (fixed in 1.55 via rust-lang/rust#85270). When launching cargo from within cargo's testsuite, it used Command which ended up mucking with the environment keys.

I don't think this needs to be fixed here in this PR. I don't think anyone has complained about this since then, so maybe it's best just to leave it.

So, a comment could explain something like:

This is case-sensitive on Windows (even though Windows is usually case-insensitive) due to an unintended regression in 1.28 (via #5552). This should only affect keys used for cargo's config-system env variables (CARGO_ prefixed ones) which are currently all uppercase. We may want to consider rectifying it if users report issues. One thing that adds a wrinkle here is the unstable advanced-env option which requires case-sensitive keys.

Do not use this for any other purposes. Use [Env::get_env_os] or [Env::get_env] instead, which properly handle case insensitivity on Windows.

pub(super) fn get_str(&self, key: impl AsRef<OsStr>) -> Option<&str> {
self.env.get(key.as_ref()).and_then(|s| s.to_str())
}

/// Check if the environment contains `key`.
///
/// This is intended for use in private methods of `Config`,
/// and does not check for env key case mismatch.
pub(super) fn contains_key(&self, key: impl AsRef<OsStr>) -> bool {
self.env.contains_key(key.as_ref())
}

/// Looks up a normalized `key` in the `normalized_env`.
/// Returns the corresponding (non-normalized) env key if it exists, else `None`.
///
/// This is used by `Config::check_environment_key_mismatch`.
pub(super) fn get_normalized(&self, key: &str) -> Option<&str> {
self.normalized_env.get(key).map(|s| s.as_ref())
}
}
68 changes: 21 additions & 47 deletions src/cargo/util/config/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,9 @@ pub use path::{ConfigRelativePath, PathAndArgs};
mod target;
pub use target::{TargetCfgConfig, TargetConfig};

mod environment;
use environment::Env;

// Helper macro for creating typed access methods.
macro_rules! get_value_typed {
($name:ident, $ty:ty, $variant:ident, $expected:expr) => {
Expand Down Expand Up @@ -199,10 +202,8 @@ pub struct Config {
creation_time: Instant,
/// Target Directory via resolved Cli parameter
target_dir: Option<Filesystem>,
/// Environment variables, separated to assist testing.
env: HashMap<OsString, OsString>,
/// Environment variables, converted to uppercase to check for case mismatch
upper_case_env: HashMap<String, String>,
/// Environment variable snapshot.
env: Env,
/// Tracks which sources have been updated to avoid multiple updates.
updated_sources: LazyCell<RefCell<HashSet<SourceId>>>,
/// Cache of credentials from configuration or credential providers.
Expand Down Expand Up @@ -260,16 +261,10 @@ impl Config {
}
});

let env: HashMap<_, _> = env::vars_os().collect();

let upper_case_env = env
.iter()
.filter_map(|(k, _)| k.to_str()) // Only keep valid UTF-8
.map(|k| (k.to_uppercase().replace("-", "_"), k.to_owned()))
.collect();
let env = Env::new();

let cache_key: &OsStr = "CARGO_CACHE_RUSTC_INFO".as_ref();
let cache_rustc_info = match env.get(cache_key) {
let cache_key = "CARGO_CACHE_RUSTC_INFO";
let cache_rustc_info = match env.get_env_os(cache_key) {
Some(cache) => cache != "0",
_ => true,
};
Expand Down Expand Up @@ -303,7 +298,6 @@ impl Config {
creation_time: Instant::now(),
target_dir: None,
env,
upper_case_env,
updated_sources: LazyCell::new(),
credential_cache: LazyCell::new(),
package_cache_lock: RefCell::new(None),
Expand Down Expand Up @@ -658,7 +652,7 @@ impl Config {
// Root table can't have env value.
return Ok(cv);
}
let env = self.get_env_str(key.as_env_key());
let env = self.env.get_str(key.as_env_key());
let env_def = Definition::Environment(key.as_env_key().to_string());
let use_env = match (&cv, env) {
// Lists are always merged.
Expand Down Expand Up @@ -729,28 +723,26 @@ impl Config {

/// Helper primarily for testing.
pub fn set_env(&mut self, env: HashMap<String, String>) {
self.env = env.into_iter().map(|(k, v)| (k.into(), v.into())).collect();
self.env = Env::from_map(env);
}

/// Returns all environment variables as an iterator, filtering out entries
/// that are not valid UTF-8.
/// Returns all environment variables as an iterator,
/// keeping only entries where both the key and value are valid UTF-8.
pub(crate) fn env(&self) -> impl Iterator<Item = (&str, &str)> {
self.env
.iter()
.filter_map(|(k, v)| Some((k.to_str()?, v.to_str()?)))
self.env.iter_str()
}

/// Returns all environment variable keys, filtering out entries that are not valid UTF-8.
/// Returns all environment variable keys, filtering out keys that are not valid UTF-8.
fn env_keys(&self) -> impl Iterator<Item = &str> {
self.env.iter().filter_map(|(k, _)| k.to_str())
self.env.keys_str()
}

fn get_config_env<T>(&self, key: &ConfigKey) -> Result<OptValue<T>, ConfigError>
where
T: FromStr,
<T as FromStr>::Err: fmt::Display,
{
match self.get_env_str(key.as_env_key()) {
match self.env.get_str(key.as_env_key()) {
Some(value) => {
let definition = Definition::Environment(key.as_env_key().to_string());
Ok(Some(Value {
Expand All @@ -771,39 +763,21 @@ impl Config {
///
/// This can be used similarly to `std::env::var`.
pub fn get_env(&self, key: impl AsRef<OsStr>) -> CargoResult<String> {
let key = key.as_ref();
let s = match self.env.get(key) {
Some(s) => s,
None => bail!("{key:?} could not be found in the environment snapshot",),
};
match s.to_str() {
Some(s) => Ok(s.to_owned()),
None => bail!("environment variable value is not valid unicode: {s:?}"),
}
self.env.get_env(key)
}

/// Get the value of environment variable `key` through the `Config` snapshot.
///
/// This can be used similarly to `std::env::var_os`.
pub fn get_env_os(&self, key: impl AsRef<OsStr>) -> Option<OsString> {
self.env.get(key.as_ref()).cloned()
}

/// Get the value of environment variable `key`.
/// Returns `None` if `key` is not in `self.env` or if the value is not valid UTF-8.
fn get_env_str(&self, key: impl AsRef<OsStr>) -> Option<&str> {
self.env.get(key.as_ref()).and_then(|s| s.to_str())
}

fn env_has_key(&self, key: impl AsRef<OsStr>) -> bool {
self.env.contains_key(key.as_ref())
self.env.get_env_os(key)
}

/// Check if the [`Config`] contains a given [`ConfigKey`].
///
/// See `ConfigMapAccess` for a description of `env_prefix_ok`.
fn has_key(&self, key: &ConfigKey, env_prefix_ok: bool) -> CargoResult<bool> {
if self.env_has_key(key.as_env_key()) {
if self.env.contains_key(key.as_env_key()) {
return Ok(true);
}
if env_prefix_ok {
Expand All @@ -821,7 +795,7 @@ impl Config {
}

fn check_environment_key_case_mismatch(&self, key: &ConfigKey) {
if let Some(env_key) = self.upper_case_env.get(key.as_env_key()) {
if let Some(env_key) = self.env.get_normalized(key.as_env_key()) {
let _ = self.shell().warn(format!(
"Environment variables are expected to use uppercase letters and underscores, \
the variable `{}` will be ignored and have no effect",
Expand Down Expand Up @@ -920,7 +894,7 @@ impl Config {
key: &ConfigKey,
output: &mut Vec<(String, Definition)>,
) -> CargoResult<()> {
let env_val = match self.get_env_str(key.as_env_key()) {
let env_val = match self.env.get_str(key.as_env_key()) {
Some(v) => v,
None => {
self.check_environment_key_case_mismatch(key);
Expand Down
26 changes: 26 additions & 0 deletions tests/testsuite/cargo_command.rs
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,32 @@ fn list_command_looks_at_path() {
);
}

#[cfg(windows)]
#[cargo_test]
fn list_command_looks_at_path_case_mismatch() {
let proj = project()
.executable(Path::new("path-test").join("cargo-1"), "")
.build();

let mut path = path();
path.push(proj.root().join("path-test"));
let path = env::join_paths(path.iter()).unwrap();

// See issue #11814: Environment variable names are case-insensitive on Windows.
// We need to check that having "Path" instead of "PATH" is okay.
let output = cargo_process("-v --list")
.env("Path", &path)
.env_remove("PATH")
.exec_with_output()
.unwrap();
let output = str::from_utf8(&output.stdout).unwrap();
assert!(
output.contains("\n 1 "),
"missing 1: {}",
output
);
}

#[cargo_test]
fn list_command_handles_known_external_commands() {
let p = project()
Expand Down
25 changes: 25 additions & 0 deletions tests/testsuite/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,31 @@ f1 = 123
assert_eq!(s, S { f1: Some(456) });
}

#[cfg(windows)]
#[cargo_test]
fn environment_variable_casing() {
// Issue #11814: Environment variable names are case-insensitive on Windows.
let config = ConfigBuilder::new()
.env("Path", "abc")
.env("Two-Words", "abc")
.env("two_words", "def")
.build();

let var = config.get_env("PATH").unwrap();
assert_eq!(var, String::from("abc"));

let var = config.get_env("path").unwrap();
assert_eq!(var, String::from("abc"));

let var = config.get_env("TWO-WORDS").unwrap();
assert_eq!(var, String::from("abc"));

// Make sure that we can still distinguish between dashes and underscores
// in variable names.
let var = config.get_env("Two_Words").unwrap();
assert_eq!(var, String::from("def"));
}

#[cargo_test]
fn config_works_with_extension() {
write_config_toml(
Expand Down