diff --git a/library/std/src/sys/windows/c.rs b/library/std/src/sys/windows/c.rs index 3b2ded301c26c..63f9be7b7e350 100644 --- a/library/std/src/sys/windows/c.rs +++ b/library/std/src/sys/windows/c.rs @@ -74,6 +74,10 @@ pub type ADDRESS_FAMILY = USHORT; pub const TRUE: BOOL = 1; pub const FALSE: BOOL = 0; +pub const CSTR_LESS_THAN: c_int = 1; +pub const CSTR_EQUAL: c_int = 2; +pub const CSTR_GREATER_THAN: c_int = 3; + pub const FILE_ATTRIBUTE_READONLY: DWORD = 0x1; pub const FILE_ATTRIBUTE_DIRECTORY: DWORD = 0x10; pub const FILE_ATTRIBUTE_REPARSE_POINT: DWORD = 0x400; @@ -970,6 +974,14 @@ extern "system" { pub fn ReleaseSRWLockShared(SRWLock: PSRWLOCK); pub fn TryAcquireSRWLockExclusive(SRWLock: PSRWLOCK) -> BOOLEAN; pub fn TryAcquireSRWLockShared(SRWLock: PSRWLOCK) -> BOOLEAN; + + pub fn CompareStringOrdinal( + lpString1: LPCWSTR, + cchCount1: c_int, + lpString2: LPCWSTR, + cchCount2: c_int, + bIgnoreCase: BOOL, + ) -> c_int; } #[link(name = "ws2_32")] diff --git a/library/std/src/sys/windows/process.rs b/library/std/src/sys/windows/process.rs index b3caf23206430..b082e21ab3bcc 100644 --- a/library/std/src/sys/windows/process.rs +++ b/library/std/src/sys/windows/process.rs @@ -4,6 +4,7 @@ mod tests; use crate::borrow::Borrow; +use crate::cmp; use crate::collections::BTreeMap; use crate::convert::{TryFrom, TryInto}; use crate::env; @@ -34,32 +35,95 @@ use libc::{c_void, EXIT_FAILURE, EXIT_SUCCESS}; // Command //////////////////////////////////////////////////////////////////////////////// -#[derive(Clone, Debug, Eq, PartialEq, Ord, PartialOrd)] +#[derive(Clone, Debug, Eq)] #[doc(hidden)] -pub struct EnvKey(OsString); +pub struct EnvKey { + os_string: OsString, + // This stores a UTF-16 encoded string to workaround the mismatch between + // Rust's OsString (WTF-8) and the Windows API string type (UTF-16). + // Normally converting on every API call is acceptable but here + // `c::CompareStringOrdinal` will be called for every use of `==`. + utf16: Vec, +} + +// Comparing Windows environment variable keys[1] are behaviourally the +// composition of two operations[2]: +// +// 1. Case-fold both strings. This is done using a language-independent +// uppercase mapping that's unique to Windows (albeit based on data from an +// older Unicode spec). It only operates on individual UTF-16 code units so +// surrogates are left unchanged. This uppercase mapping can potentially change +// between Windows versions. +// +// 2. Perform an ordinal comparison of the strings. A comparison using ordinal +// is just a comparison based on the numerical value of each UTF-16 code unit[3]. +// +// Because the case-folding mapping is unique to Windows and not guaranteed to +// be stable, we ask the OS to compare the strings for us. This is done by +// calling `CompareStringOrdinal`[4] with `bIgnoreCase` set to `TRUE`. +// +// [1] https://docs.microsoft.com/en-us/dotnet/standard/base-types/best-practices-strings#choosing-a-stringcomparison-member-for-your-method-call +// [2] https://docs.microsoft.com/en-us/dotnet/standard/base-types/best-practices-strings#stringtoupper-and-stringtolower +// [3] https://docs.microsoft.com/en-us/dotnet/api/system.stringcomparison?view=net-5.0#System_StringComparison_Ordinal +// [4] https://docs.microsoft.com/en-us/windows/win32/api/stringapiset/nf-stringapiset-comparestringordinal +impl Ord for EnvKey { + fn cmp(&self, other: &Self) -> cmp::Ordering { + unsafe { + let result = c::CompareStringOrdinal( + self.utf16.as_ptr(), + self.utf16.len() as _, + other.utf16.as_ptr(), + other.utf16.len() as _, + c::TRUE, + ); + match result { + c::CSTR_LESS_THAN => cmp::Ordering::Less, + c::CSTR_EQUAL => cmp::Ordering::Equal, + c::CSTR_GREATER_THAN => cmp::Ordering::Greater, + // `CompareStringOrdinal` should never fail so long as the parameters are correct. + _ => panic!("comparing environment keys failed: {}", Error::last_os_error()), + } + } + } +} +impl PartialOrd for EnvKey { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} +impl PartialEq for EnvKey { + fn eq(&self, other: &Self) -> bool { + if self.utf16.len() != other.utf16.len() { + false + } else { + self.cmp(other) == cmp::Ordering::Equal + } + } +} +// Environment variable keys should preserve their original case even though +// they are compared using a caseless string mapping. impl From for EnvKey { - fn from(mut k: OsString) -> Self { - k.make_ascii_uppercase(); - EnvKey(k) + fn from(k: OsString) -> Self { + EnvKey { utf16: k.encode_wide().collect(), os_string: k } } } impl From for OsString { fn from(k: EnvKey) -> Self { - k.0 + k.os_string } } impl Borrow for EnvKey { fn borrow(&self) -> &OsStr { - &self.0 + &self.os_string } } impl AsRef for EnvKey { fn as_ref(&self) -> &OsStr { - &self.0 + &self.os_string } } @@ -537,7 +601,8 @@ fn make_envp(maybe_env: Option>) -> io::Result<(*mut } for (k, v) in env { - blk.extend(ensure_no_nuls(k.0)?.encode_wide()); + ensure_no_nuls(k.os_string)?; + blk.extend(k.utf16); blk.push('=' as u16); blk.extend(ensure_no_nuls(v)?.encode_wide()); blk.push(0); diff --git a/library/std/src/sys/windows/process/tests.rs b/library/std/src/sys/windows/process/tests.rs index 8830ae049c65d..ff3f9131cc889 100644 --- a/library/std/src/sys/windows/process/tests.rs +++ b/library/std/src/sys/windows/process/tests.rs @@ -1,5 +1,7 @@ use super::make_command_line; +use crate::env; use crate::ffi::{OsStr, OsString}; +use crate::process::Command; #[test] fn test_make_command_line() { @@ -41,3 +43,62 @@ fn test_make_command_line() { "\"\u{03c0}\u{042f}\u{97f3}\u{00e6}\u{221e}\"" ); } + +// On Windows, environment args are case preserving but comparisons are case-insensitive. +// See: #85242 +#[test] +fn windows_env_unicode_case() { + let test_cases = [ + ("ä", "Ä"), + ("ß", "SS"), + ("Ä", "Ö"), + ("Ä", "Ö"), + ("I", "İ"), + ("I", "i"), + ("I", "ı"), + ("i", "I"), + ("i", "İ"), + ("i", "ı"), + ("İ", "I"), + ("İ", "i"), + ("İ", "ı"), + ("ı", "I"), + ("ı", "i"), + ("ı", "İ"), + ("ä", "Ä"), + ("ß", "SS"), + ("Ä", "Ö"), + ("Ä", "Ö"), + ("I", "İ"), + ("I", "i"), + ("I", "ı"), + ("i", "I"), + ("i", "İ"), + ("i", "ı"), + ("İ", "I"), + ("İ", "i"), + ("İ", "ı"), + ("ı", "I"), + ("ı", "i"), + ("ı", "İ"), + ]; + // Test that `cmd.env` matches `env::set_var` when setting two strings that + // may (or may not) be case-folded when compared. + for (a, b) in test_cases.iter() { + let mut cmd = Command::new("cmd"); + cmd.env(a, "1"); + cmd.env(b, "2"); + env::set_var(a, "1"); + env::set_var(b, "2"); + + for (key, value) in cmd.get_envs() { + assert_eq!( + env::var(key).ok(), + value.map(|s| s.to_string_lossy().into_owned()), + "command environment mismatch: {} {}", + a, + b + ); + } + } +}