Skip to content

Commit

Permalink
dns: fix search lookup (#1099)
Browse files Browse the repository at this point in the history
* dns: fix search lookup

Fixes #1074 (issue was incorrect
basically).

Before our search domain was always ztunnel's (istio-system). This is incorrect; we
should know the search domain of the request.

We don't know the per-pod custom settings
(#555), but we can use the
correct defaults.

* Fix unwrap
  • Loading branch information
howardjohn authored Jun 4, 2024
1 parent f18be9c commit 2e41538
Show file tree
Hide file tree
Showing 2 changed files with 106 additions and 14 deletions.
115 changes: 102 additions & 13 deletions src/dns/server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ use hickory_proto::error::ProtoErrorKind;
use hickory_proto::op::ResponseCode;
use hickory_proto::rr::rdata::{A, AAAA, CNAME};
use hickory_proto::rr::{Name, RData, Record, RecordType};
use hickory_resolver::config::ResolverConfig;
use hickory_resolver::config::{NameServerConfig, ResolverConfig, ResolverOpts};
use hickory_resolver::system_conf::read_system_conf;
use hickory_server::authority::LookupError;
use hickory_server::server::Request;
Expand Down Expand Up @@ -331,6 +331,10 @@ impl Store {

// Iterate over all possible aliases for the requested hostname from the perspective of
// the client (e.g. <svc>, <svc>.<ns>, <svc>.<ns>.svc, <svc>.<ns>.svc.cluster.local).
trace!(
"checking aliases {:#?}",
self.get_aliases(client, requested_name)
);
for alias in self.get_aliases(client, requested_name) {
// For each alias, try matching against all possible wildcards.
//
Expand All @@ -340,6 +344,10 @@ impl Store {
// 'www.example.com' (the alias, itself)
// '*.example.com'
// '*.com'
trace!(
"checking against wildcards {:#?}",
get_wildcards(&alias.name)
);
for mut search_name in get_wildcards(&alias.name) {
// Convert the name to a string for lookup, removing the trailing '.'.
search_name.set_fqdn(false);
Expand All @@ -360,7 +368,8 @@ impl Store {
// its better than *ALL* ServiceEntry doing this
.filter(|service| {
// Domain will be like `.svc.cluster.local.` (trailing .), so ignore the last character.
let domain = self.svc_domain.to_utf8();
let domain = ".".to_string() + &self.svc_domain.to_utf8();

let domain = domain
.strip_suffix('.')
.expect("the svc domain must have a trailing '.'");
Expand Down Expand Up @@ -538,6 +547,7 @@ impl Resolver for Store {

// Find the service for the requested host.
let requested_name = Name::from(request.query().name().clone());
trace!("incoming request {requested_name:?}");
let Some(service_match) = self.find_server(&client, &requested_name) else {
trace!("unknown host, forwarding");
// Unknown host. Forward to the upstream resolver.
Expand Down Expand Up @@ -607,6 +617,7 @@ impl Resolver for Store {
}

/// An alias for the requested hostname.
#[derive(Debug)]
struct Alias {
/// The name to be used in the search.
name: Name,
Expand All @@ -623,6 +634,7 @@ impl Display for Alias {
}

/// Created for an alias generated by stripping a search domain from the requested host.
#[derive(Debug)]
struct Stripped {
/// The requested hostname with the `search_domain` removed.
name: Name,
Expand All @@ -632,6 +644,7 @@ struct Stripped {
}

/// Returned when a server was successfully found for the requested hostname.
#[derive(Debug)]
struct ServerMatch {
/// The hostname that was used to find the service. This is identical to the
/// service hostname, except that it is an FQDN [Name].
Expand Down Expand Up @@ -715,13 +728,17 @@ pub trait Forwarder: Send + Sync {
}

/// Creates the appropriate DNS forwarder for the proxy mode.
pub fn forwarder_for_mode(proxy_mode: ProxyMode) -> Result<Arc<dyn Forwarder>, Error> {
pub fn forwarder_for_mode(
proxy_mode: ProxyMode,
cluster_domain: String,
) -> Result<Arc<dyn Forwarder>, Error> {
Ok(match proxy_mode {
ProxyMode::Shared => {
// TODO(https://github.com/istio/ztunnel/issues/555): Use pod settings if available.
Arc::new(SystemForwarder::new()?)
// Today, we only support the basic namespace awareness
Arc::new(SystemForwarder::new(true, cluster_domain)?)
}
ProxyMode::Dedicated => Arc::new(SystemForwarder::new()?),
ProxyMode::Dedicated => Arc::new(SystemForwarder::new(false, cluster_domain)?),
})
}

Expand All @@ -730,20 +747,43 @@ pub fn forwarder_for_mode(proxy_mode: ProxyMode) -> Result<Arc<dyn Forwarder>, E
/// that would have been used by the client. For shared proxy mode, this will be the resolver
/// configuration for the ztunnel DaemonSet (i.e. node-level resolver settings).
struct SystemForwarder {
search_domains: Vec<Name>,
search_domains: SearchDomains,
resolver: Arc<dyn Resolver>,
}

enum SearchDomains {
Static(Vec<Name>),
Dynamic(String),
}

impl SystemForwarder {
fn new() -> Result<Self, Error> {
// Get the resolver config from /etc/resolv.conf.
fn new(per_pod: bool, cluster_domain: String) -> Result<Self, Error> {
// Get the resolver config from `ztunnel's` /etc/resolv.conf.
let (cfg, opts) = read_system_conf().map_err(|e| Error::Generic(Box::new(e)))?;

// Extract the parts.
let domain = cfg.domain().cloned();
let search_domains = cfg.search().to_vec();
let name_servers = cfg.name_servers().to_vec();

Self::from_parts(
per_pod,
cluster_domain,
opts,
domain,
search_domains,
name_servers,
)
}

fn from_parts(
per_pod: bool,
cluster_domain: String,
opts: ResolverOpts,
domain: Option<Name>,
search_domains: Vec<Name>,
name_servers: Vec<NameServerConfig>,
) -> Result<Self, Error> {
// Remove the search list before passing to the resolver. The local resolver that
// sends the original request will already have search domains applied. We want
// this resolver to simply use the request host rather than re-adding search domains.
Expand All @@ -753,6 +793,13 @@ impl SystemForwarder {
let resolver = Arc::new(
dns::forwarder::Forwarder::new(cfg, opts).map_err(|e| Error::Generic(Box::new(e)))?,
);
let search_domains = if per_pod {
// Standard Kubernetes search is 'istio-system.svc.cluster.local svc.cluster.local cluster.local'
// But we need a *per-pod* one, or we will search for the wrong thing
SearchDomains::Dynamic(cluster_domain)
} else {
SearchDomains::Static(search_domains)
};

Ok(Self {
search_domains,
Expand All @@ -763,8 +810,19 @@ impl SystemForwarder {

#[async_trait::async_trait]
impl Forwarder for SystemForwarder {
fn search_domains(&self, _: &Workload) -> Vec<Name> {
self.search_domains.clone()
fn search_domains(&self, wl: &Workload) -> Vec<Name> {
// TODO: https://github.com/istio/ztunnel/issues/555 really get this from pods
// Today we hardcode the default search assumptions
match &self.search_domains {
SearchDomains::Static(s) => s.clone(),
SearchDomains::Dynamic(cluster_domain) => vec![
Name::from_utf8(format!("{}.svc.{cluster_domain}", wl.namespace))
.expect("inputs must be valid DNS labels"),
Name::from_utf8(format!("svc.{cluster_domain}"))
.expect("inputs must be valid DNS labels"),
Name::from_utf8(cluster_domain).expect("inputs must be valid DNS labels"),
],
}
}

async fn forward(
Expand All @@ -787,7 +845,6 @@ mod tests {
use prometheus_client::registry::Registry;

use super::*;
use crate::metrics;
use crate::strng;
use crate::test_helpers::dns::{
a, aaaa, cname, ip, ipv4, ipv6, n, new_message, new_tcp_client, new_udp_client, run_dns,
Expand All @@ -800,6 +857,7 @@ mod tests {
use crate::xds::istio::workload::PortList as XdsPortList;
use crate::xds::istio::workload::Service as XdsService;
use crate::xds::istio::workload::Workload as XdsWorkload;
use crate::{metrics, test_helpers};

const NS1: &str = "ns1";
const NS2: &str = "ns2";
Expand Down Expand Up @@ -937,8 +995,6 @@ mod tests {
assert_eq!(expected, actual);
}

// TODO(nmittler): Test headless services (https://github.com/istio/ztunnel/issues/554).
// TODO(nmittler): Test truncation once fixed (https://github.com/bluejekyll/trust-dns/issues/1973).
#[tokio::test]
async fn lookup() {
initialize_telemetry();
Expand Down Expand Up @@ -1425,6 +1481,39 @@ mod tests {
assert_eq!(75, resp.answers().len(), "expected UDP to be truncated");
}

#[test]
fn search_domains() {
let opts = ResolverOpts::default();
let search = vec![
as_name("istio-system.svc.cluster.local"),
as_name("svc.cluster.local"),
as_name("cluster.local"),
];
let f = SystemForwarder::from_parts(
true,
"cluster.local".to_string(),
opts,
None,
search,
vec![],
)
.unwrap();
let make_workload = |ns: &str| Workload {
name: "something".into(),
namespace: ns.into(),
..test_helpers::test_default_workload()
};

assert_eq!(
f.search_domains(&make_workload("default-ns")),
vec![
as_name("default-ns.svc.cluster.local"),
as_name("svc.cluster.local"),
as_name("cluster.local"),
]
);
}

/// Sort the IP records so that we can directly compare them to the expected. The resulting
/// list will contain CNAME first, followed by A, and then by AAAA. Within each record type,
/// records will be sorted in ascending order by their string representation.
Expand Down
5 changes: 4 additions & 1 deletion src/proxyfactory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,10 @@ impl ProxyFactory {
self.config.dns_proxy_addr,
self.config.network.clone(),
self.state.clone(),
dns::forwarder_for_mode(self.config.proxy_mode)?,
dns::forwarder_for_mode(
self.config.proxy_mode,
self.config.cluster_domain.clone(),
)?,
self.dns_metrics.clone().unwrap(),
drain.clone(),
socket_factory.as_ref(),
Expand Down

0 comments on commit 2e41538

Please sign in to comment.