diff --git a/.gitattributes b/.gitattributes index 28d84bb5..a19fc681 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,12 +1,13 @@ * text=auto -/.editorconfig export-ignore -/.gitattributes export-ignore -/.github export-ignore -/.gitignore export-ignore -/.php_cs export-ignore -/.travis.yml export-ignore -/README.md export-ignore -/CHANGELOG.md export-ignore -/phpunit.xml.dist export-ignore -/tests export-ignore +/.editorconfig export-ignore +/.gitattributes export-ignore +/.github export-ignore +/.gitignore export-ignore +/.php_cs export-ignore +/.phpstan.src.neon export-ignore +/.phpstan.tests.neon export-ignore +/.travis.yml export-ignore +/README.md export-ignore +/phpunit.xml.dist export-ignore +/tests export-ignore diff --git a/.travis.yml b/.travis.yml index b393c0a1..04c351dc 100644 --- a/.travis.yml +++ b/.travis.yml @@ -5,13 +5,13 @@ sudo: false matrix: include: - php: 7.0 - env: COLLECT_COVERAGE=true VALIDATE_CODING_STYLE=true IGNORE_PLATFORMS=false + env: VALIDATE_CODING_STYLE=false RUN_PHPSTAN=false IGNORE_PLATFORMS=false - php: 7.1 - env: COLLECT_COVERAGE=true VALIDATE_CODING_STYLE=true IGNORE_PLATFORMS=false + env: VALIDATE_CODING_STYLE=false RUN_PHPSTAN=false IGNORE_PLATFORMS=false - php: 7.2 - env: COLLECT_COVERAGE=false VALIDATE_CODING_STYLE=false IGNORE_PLATFORMS=true + env: VALIDATE_CODING_STYLE=true RUN_PHPSTAN=true IGNORE_PLATFORMS=false - php: nightly - env: COLLECT_COVERAGE=false VALIDATE_CODING_STYLE=false IGNORE_PLATFORMS=true + env: VALIDATE_CODING_STYLE=false RUN_PHPSTAN=false IGNORE_PLATFORMS=true allow_failures: - php: nightly fast_finish: true @@ -31,5 +31,5 @@ script: - composer phpunit after_script: - - if [ "$COLLECT_COVERAGE" == "true" ]; then wget https://scrutinizer-ci.com/ocular.phar && php ocular.phar code-coverage:upload --format=php-clover build/clover.xml; fi - - if [ "$VALIDATE_CODING_STYLE" == "true" ]; then composer phpcs; fi \ No newline at end of file + - if [ "$VALIDATE_CODING_STYLE" == "true" ]; then composer phpcs; fi + - if [ "$RUN_PHPSTAN" == "true" ]; then composer phpstan; fi \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index f3b9c163..f26b25d7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,32 @@ # Changelog -All Notable changes to `PHP Domain Parser` will be documented in this file +All Notable changes to `PHP Domain Parser` **5.x** series will be documented in this file + +## 5.2.0 - 2018-02-23 + +### Added + +- `Pdp\Rules::getPublicSuffix` returns a `Pdp\PublicSuffix` value object +- `Pdp\Rules::__set_state` is implemented +- `Pdp\Domain::toUnicode` returns a `Pdp\Domain` with its value converted to its Unicode form +- `Pdp\Domain::toAscii` returns a `Pdp\Domain` with its value converted to its AScii form +- `Pdp\PublicSuffix::toUnicode` returns a `Pdp\PublicSuffix` with its value converted to its Unicode form +- `Pdp\PublicSuffix::toAscii` returns a `Pdp\PublicSuffix` with its value converted to its AScii form + +### Fixed + +- `Pdp\Domain::getDomain` returns the normalized form of the domain name +- `Pdp\PublicSuffix` is no longer internal. +- Normalizes IDN conversion using a internal `IDNConverterTrait` +- Internal code improved by requiring PHPStan for development + +### Deprecated + +- None + +### Removed + +- None ## 5.1.0 - 2017-12-18 diff --git a/README.md b/README.md index e18d0a03..f8674d7c 100644 --- a/README.md +++ b/README.md @@ -28,7 +28,6 @@ System Requirements You need: - **PHP >= 7.0** but the latest stable version of PHP is recommended -- the `mbstring` extension - the `intl` extension Dependencies @@ -48,27 +47,7 @@ Documentation ### Domain name resolution -In order to resolve a domain name one we must: - -- Convert the Public Suffix List (PSL) into a structure usable in PHP -- Resolve the domain name against the PSL rules - -PSL Conversion is done using the `Pdp\Converter` class. - -~~~php -resolve('www.ulb.ac.be'); //using Rules::ALL_DOMAINS +$domain = $rules->resolve('www.Ulb.AC.be'); //using Rules::ALL_DOMAINS $domain->getDomain(); //returns 'www.ulb.ac.be' $domain->getPublicSuffix(); //returns 'ac.be' $domain->getRegistrableDomain(); //returns 'ulb.ac.be' @@ -175,7 +152,7 @@ echo json_encode($domain, JSON_PRETTY_PRINT); //The same domain will yield a different result using the PSL PRIVATE DOMAIN SECTION only -$domain = $rules->resolve('www.ulb.ac.be', Rules::PRIVATE_DOMAINS); +$domain = $rules->resolve('www.Ulb.AC.be', Rules::PRIVATE_DOMAINS); echo json_encode($domain, JSON_PRETTY_PRINT); // returns // { @@ -189,11 +166,49 @@ echo json_encode($domain, JSON_PRETTY_PRINT); // } ~~~ +The `Pdp\Domain` getter methods returns: + +- the submitted domain name using `Pdp\Domain::getDomain` +- the public suffix part normalized according to the domain using `Pdp\Domain::getPublicSuffix` +- the registrable domain part using `Pdp\Domain::getRegistrableDomain` +- the subdomain part using `Pdp\Domain::getSubDomain`. + +If the domain name or some of its part are seriously malformed or unrecognized, the getter methods will return `null`. + +**The Domain name status depends on the PSL section used to resolve it:** + +- `Pdp\Domain::isKnown` returns `true` if the public suffix is found in the selected PSL; +- `Pdp\Domain::isICANN` returns `true` if the public suffix is found using a PSL which includes the ICANN DOMAINS section; +- `Pdp\Domain::isPrivate` returns `true` if the public suffix is found using a PSL which includes the PRIVATE DOMAINS section; + +The `Rules::getPublicSuffix` method expects the same arguments as `Rules::resolve` but returns a `Pdp\PublicSuffix` object instead. + +~~~php + + * @author Ignace Nyamagana Butera */ final class Converter { + use IDNAConverterTrait; + /** * Convert the Public Suffix List into * an associative, multidimensional array @@ -99,7 +104,7 @@ private function addRule(array $list, array $rule_parts): array // "The domain and all rules must be canonicalized in the normal way // for hostnames - lower-case, Punycode (RFC 3492)." - $part = idn_to_ascii($part, 0, INTL_IDNA_VARIANT_UTS46); + $part = $this->idnToAscii($part); $isDomain = true; if (0 === strpos($part, '!')) { $part = substr($part, 1); diff --git a/src/Domain.php b/src/Domain.php index 24c88944..687a324a 100644 --- a/src/Domain.php +++ b/src/Domain.php @@ -29,6 +29,8 @@ */ final class Domain implements JsonSerializable { + use IDNAConverterTrait; + /** * @var string|null */ @@ -49,6 +51,14 @@ final class Domain implements JsonSerializable */ private $subDomain; + /** + * {@inheritdoc} + */ + public static function __set_state(array $properties): self + { + return new self($properties['domain'], $properties['publicSuffix']); + } + /** * New instance. * @@ -57,12 +67,36 @@ final class Domain implements JsonSerializable */ public function __construct($domain = null, PublicSuffix $publicSuffix = null) { + if (false !== strpos((string) $domain, '%')) { + $domain = rawurldecode($domain); + } + + if (null !== $domain) { + $domain = strtolower($domain); + } + $this->domain = $domain; - $this->publicSuffix = $publicSuffix ?? new PublicSuffix(); + $this->publicSuffix = $this->setPublicSuffix($publicSuffix); $this->registrableDomain = $this->setRegistrableDomain(); $this->subDomain = $this->setSubDomain(); } + /** + * Filter the PublicSuffix + * + * @param PublicSuffix|null $publicSuffix + * + * @return PublicSuffix + */ + private function setPublicSuffix(PublicSuffix $publicSuffix = null): PublicSuffix + { + if (null === $publicSuffix || null === $this->domain) { + return new PublicSuffix(); + } + + return $publicSuffix; + } + /** * Compute the registrable domain part. * @@ -82,29 +116,7 @@ private function setRegistrableDomain() $domainLabels = explode('.', $this->domain); $registrableDomain = implode('.', array_slice($domainLabels, count($domainLabels) - $nbLabelsToRemove)); - return $this->normalize($registrableDomain); - } - - /** - * Normalizes the domain according to its representation. - * - * @param string $domain - * - * @return string|null - */ - private function normalize(string $domain) - { - $func = 'idn_to_utf8'; - if (false !== strpos($domain, 'xn--')) { - $func = 'idn_to_ascii'; - } - - $domain = $func($domain, 0, INTL_IDNA_VARIANT_UTS46); - if (false === $domain) { - return null; - } - - return strtolower($domain); + return $registrableDomain; } /** @@ -127,7 +139,7 @@ private function setSubDomain() $subDomain = implode('.', array_slice($domainLabels, 0, $countLabels - $nbLabelsToRemove)); - return $this->normalize($subDomain); + return $subDomain; } /** @@ -135,15 +147,11 @@ private function setSubDomain() */ public function jsonSerialize() { - return [ + return array_merge([ 'domain' => $this->domain, 'registrableDomain' => $this->registrableDomain, 'subDomain' => $this->subDomain, - 'publicSuffix' => $this->publicSuffix->getContent(), - 'isKnown' => $this->publicSuffix->isKnown(), - 'isICANN' => $this->publicSuffix->isICANN(), - 'isPrivate' => $this->publicSuffix->isPrivate(), - ]; + ], $this->publicSuffix->jsonSerialize()); } /** @@ -154,14 +162,6 @@ public function __debugInfo() return $this->jsonSerialize(); } - /** - * {@inheritdoc} - */ - public static function __set_state(array $properties) - { - return new self($properties['domain'], $properties['publicSuffix']); - } - /** * Returns the full domain name. * @@ -214,7 +214,7 @@ public function getPublicSuffix() } /** - * Tells whether the public suffix has a matching rule in a Public Suffix List. + * Tells whether the public suffix has been matching rule in a Public Suffix List. * * @return bool */ @@ -242,4 +242,47 @@ public function isPrivate(): bool { return $this->publicSuffix->isPrivate(); } + + /** + * Converts the domain to its IDNA ASCII form. + * + * This method MUST retain the state of the current instance, and return + * an instance with is content converted to its IDNA ASCII form + * + * @throws Exception if the domain can not be converted to ASCII using IDN UTS46 algorithm + * + * @return self + */ + public function toAscii(): self + { + if (null === $this->domain || false !== strpos($this->domain, 'xn--')) { + return $this; + } + + $newDomain = $this->idnToAscii($this->domain); + if ($newDomain === $this->domain) { + return $this; + } + + return new self($newDomain, $this->publicSuffix->toAscii()); + } + + /** + * Converts the domain to its IDNA UTF8 form. + * + * This method MUST retain the state of the current instance, and return + * an instance with is content converted to its IDNA UTF8 form + * + * @throws Exception if the domain can not be converted to Unicode using IDN UTS46 algorithm + * + * @return self + */ + public function toUnicode(): self + { + if (null === $this->domain || false === strpos($this->domain, 'xn--')) { + return $this; + } + + return new self($this->idnToUnicode($this->domain), $this->publicSuffix->toUnicode()); + } } diff --git a/src/IDNAConverterTrait.php b/src/IDNAConverterTrait.php new file mode 100644 index 00000000..d4f5c981 --- /dev/null +++ b/src/IDNAConverterTrait.php @@ -0,0 +1,110 @@ + + */ +trait IDNAConverterTrait +{ + /** + * Get and format IDN conversion error message + * + * @param int $error_bit + * + * @return string + */ + private static function getIdnErrors(int $error_bit): string + { + /** + * IDNA errors + * + * @see http://icu-project.org/apiref/icu4j/com/ibm/icu/text/IDNA.Error.html + */ + static $idn_errors = [ + IDNA_ERROR_EMPTY_LABEL => 'a non-final domain name label (or the whole domain name) is empty', + IDNA_ERROR_LABEL_TOO_LONG => 'a domain name label is longer than 63 bytes', + IDNA_ERROR_DOMAIN_NAME_TOO_LONG => 'a domain name is longer than 255 bytes in its storage form', + IDNA_ERROR_LEADING_HYPHEN => 'a label starts with a hyphen-minus ("-")', + IDNA_ERROR_TRAILING_HYPHEN => 'a label ends with a hyphen-minus ("-")', + IDNA_ERROR_HYPHEN_3_4 => 'a label contains hyphen-minus ("-") in the third and fourth positions', + IDNA_ERROR_LEADING_COMBINING_MARK => 'a label starts with a combining mark', + IDNA_ERROR_DISALLOWED => 'a label or domain name contains disallowed characters', + IDNA_ERROR_PUNYCODE => 'a label starts with "xn--" but does not contain valid Punycode', + IDNA_ERROR_LABEL_HAS_DOT => 'a label contains a dot=full stop', + IDNA_ERROR_INVALID_ACE_LABEL => 'An ACE label does not contain a valid label string', + IDNA_ERROR_BIDI => 'a label does not meet the IDNA BiDi requirements (for right-to-left characters)', + IDNA_ERROR_CONTEXTJ => 'a label does not meet the IDNA CONTEXTJ requirements', + ]; + + $res = []; + foreach ($idn_errors as $error => $reason) { + if ($error_bit & $error) { + $res[] = $reason; + } + } + + return empty($res) ? 'Unknown IDNA conversion error.' : implode(', ', $res).'.'; + } + + /** + * Converts the input to its IDNA ASCII form. + * + * This method returns the string converted to IDN ASCII form + * + * @param string $host + * @throws Exception if the string can not be converted to ASCII using IDN UTS46 algorithm + * + * @return string + */ + private function idnToAscii(string $host): string + { + if (false !== strpos($host, '%')) { + $host = rawurldecode($host); + } + + $host = strtolower($host); + static $pattern = '/[\pL]+/u'; + if (!preg_match($pattern, $host)) { + return $host; + } + + $output = idn_to_ascii($host, 0, INTL_IDNA_VARIANT_UTS46, $arr); + if (!$arr['errors']) { + return $output; + } + + throw new Exception(sprintf('The host `%s` is invalid : %s', $host, self::getIdnErrors($arr['errors']))); + } + + /** + * Converts the input to its IDNA UNICODE form. + * + * This method returns the string converted to IDN UNICODE form + * + * @param string $host + * @throws Exception if the string can not be converted to UNICODE using IDN UTS46 algorithm + * + * @return string + */ + private function idnToUnicode(string $host): string + { + $output = idn_to_utf8($host, 0, INTL_IDNA_VARIANT_UTS46, $arr); + if (!$arr['errors']) { + return $output; + } + + throw new Exception(sprintf('The host `%s` is invalid : %s', $host, self::getIdnErrors($arr['errors']))); + } +} diff --git a/src/Installer.php b/src/Installer.php index 49aff609..4da93e13 100644 --- a/src/Installer.php +++ b/src/Installer.php @@ -40,7 +40,7 @@ public static function updateLocalCache(Event $event = null) require $vendor.'/autoload.php'; - $io->write('Updating your Public Suffix List ICANN Section local cache.'); + $io->write('Updating your Public Suffix List local cache.'); if (!extension_loaded('curl')) { $io->writeError([ '😓 😓 😓 Your local cache could not be updated. 😓 😓 😓', @@ -67,7 +67,7 @@ public static function updateLocalCache(Event $event = null) $io->writeError([ '😓 😓 😓 Your local cache could not be updated. 😓 😓 😓', 'An error occurred during the update.', - '----- Error Trace ----', + '----- Error Message ----', ]); $io->writeError($e->getMessage()); die(1); @@ -83,7 +83,7 @@ public static function updateLocalCache(Event $event = null) */ private static function getVendorPath(Event $event = null) { - if ($event instanceof Event) { + if (null !== $event) { return $event->getComposer()->getConfig()->get('vendor-dir'); } @@ -103,11 +103,11 @@ private static function getVendorPath(Event $event = null) * * @param Event|null $event * - * @return object + * @return mixed */ private static function getIO(Event $event = null) { - if ($event instanceof Event) { + if (null !== $event) { return $event->getIO(); } diff --git a/src/Manager.php b/src/Manager.php index a483e50a..119060e3 100644 --- a/src/Manager.php +++ b/src/Manager.php @@ -18,6 +18,9 @@ * * This class obtains, writes, caches, and returns PHP representations * of the Public Suffix List ICANN section + * + * @author Jeremy Kendall + * @author Ignace Nyamagana Butera */ final class Manager { @@ -78,9 +81,9 @@ public function getRules(string $source_url = self::PSL_URL): Rules */ private function getCacheKey(string $str): string { - static $cacheKeyPrefix = 'PSL-FULL'; + static $cacheKeyPrefix = 'PSL_FULL'; - return $cacheKeyPrefix.'-'.md5(strtolower($str)); + return $cacheKeyPrefix.'_'.md5(strtolower($str)); } /** diff --git a/src/PublicSuffix.php b/src/PublicSuffix.php index fb0af46e..dea2960b 100644 --- a/src/PublicSuffix.php +++ b/src/PublicSuffix.php @@ -12,6 +12,7 @@ namespace Pdp; use Countable; +use JsonSerializable; /** * Public Suffix Value Object @@ -25,10 +26,11 @@ * software with no update mechanism." * * @author Ignace Nyamagana Butera - * @internal used internally to represent a public suffix */ -final class PublicSuffix implements Countable +final class PublicSuffix implements Countable, JsonSerializable { + use IDNAConverterTrait; + /** * @var string|null */ @@ -37,18 +39,55 @@ final class PublicSuffix implements Countable /** * @var string */ - private $type; + private $section; + + /** + * {@inheritdoc} + */ + public static function __set_state(array $properties): self + { + return new self($properties['publicSuffix'], $properties['section']); + } /** * New instance. * * @param string|null $publicSuffix - * @param string $type + * @param string $section */ - public function __construct(string $publicSuffix = null, string $type = '') + public function __construct(string $publicSuffix = null, string $section = '') { + if (false !== strpos((string) $publicSuffix, '%')) { + $publicSuffix = rawurldecode($publicSuffix); + } + + if (null !== $publicSuffix) { + $publicSuffix = strtolower($publicSuffix); + } + $this->publicSuffix = $publicSuffix; - $this->type = $type; + $this->section = $section; + } + + /** + * {@inheritdoc} + */ + public function jsonSerialize() + { + return [ + 'publicSuffix' => $this->getContent(), + 'isKnown' => $this->isKnown(), + 'isICANN' => $this->isICANN(), + 'isPrivate' => $this->isPrivate(), + ]; + } + + /** + * {@inheritdoc} + */ + public function __debugInfo() + { + return $this->jsonSerialize(); } /** @@ -80,7 +119,7 @@ public function count() */ public function isKnown(): bool { - return '' !== $this->type; + return '' !== $this->section; } /** @@ -90,7 +129,7 @@ public function isKnown(): bool */ public function isICANN(): bool { - return Rules::ICANN_DOMAINS === $this->type; + return Rules::ICANN_DOMAINS === $this->section; } /** @@ -100,27 +139,49 @@ public function isICANN(): bool */ public function isPrivate(): bool { - return Rules::PRIVATE_DOMAINS === $this->type; + return Rules::PRIVATE_DOMAINS === $this->section; } /** - * {@inheritdoc} + * Converts the domain to its IDNA UTF8 form. + * + * This method MUST retain the state of the current instance, and return + * an instance with is content converted to its IDNA UTF8 form + * + * @throws Exception if the domain can not be converted to Unicode using IDN UTS46 algorithm + * + * @return self */ - public function __debugInfo() + public function toUnicode(): self { - return [ - 'publicSuffix' => $this->getContent(), - 'isKnown' => $this->isKnown(), - 'isICANN' => $this->isICANN(), - 'isPrivate' => $this->isPrivate(), - ]; + if (null === $this->publicSuffix || false === strpos($this->publicSuffix, 'xn--')) { + return $this; + } + + return new self($this->idnToUnicode($this->publicSuffix), $this->section); } /** - * {@inheritdoc} + * Converts the domain to its IDNA ASCII form. + * + * This method MUST retain the state of the current instance, and return + * an instance with is content converted to its IDNA ASCII form + * + * @throws Exception if the domain can not be converted to ASCII using IDN UTS46 algorithm + * + * @return self */ - public static function __set_state(array $properties) + public function toAscii(): self { - return new self($properties['publicSuffix'], $properties['type']); + if (null === $this->publicSuffix || false !== strpos($this->publicSuffix, 'xn--')) { + return $this; + } + + $newPublicSuffix = $this->idnToAscii($this->publicSuffix); + if ($newPublicSuffix === $this->publicSuffix) { + return $this; + } + + return new self($newPublicSuffix, $this->section); } } diff --git a/src/Rules.php b/src/Rules.php index db1dc4dc..8c721b87 100644 --- a/src/Rules.php +++ b/src/Rules.php @@ -19,6 +19,8 @@ */ final class Rules { + use IDNAConverterTrait; + const ALL_DOMAINS = 'ALL_DOMAINS'; const ICANN_DOMAINS = 'ICANN_DOMAINS'; const PRIVATE_DOMAINS = 'PRIVATE_DOMAINS'; @@ -67,6 +69,14 @@ public static function createFromString(string $content): self return new self((new Converter())->convert($content)); } + /** + * {@inheritdoc} + */ + public static function __set_state(array $properties): self + { + return new self($properties['rules']); + } + /** * new instance. * @@ -78,69 +88,83 @@ public function __construct(array $rules) } /** - * Returns PSL ICANN public info for a given domain. + * Determines the public suffix for a given domain. * * @param string|null $domain * @param string $section * - * @return Domain + * @throws Exception + * If the Domain is invalid or malformed + * If the section is invalid or not supported + * If the PublicSuffix can not be converted using against the domain encoding. + * + * @return PublicSuffix */ - public function resolve(string $domain = null, string $section = self::ALL_DOMAINS): Domain + public function getPublicSuffix(string $domain = null, string $section = self::ALL_DOMAINS): PublicSuffix { - if (!in_array($section, [self::PRIVATE_DOMAINS, self::ICANN_DOMAINS, self::ALL_DOMAINS], true)) { - throw new Exception(sprintf('%s is an unknown Public Suffix List section', $section)); + if (null === $domain || !$this->isMatchable($domain)) { + throw new Exception(sprintf('The submitted domain `%s` is invalid or malformed', $domain)); } + $this->validateSection($section); - if (!$this->isMatchable($domain)) { + return $this->findPublicSuffix($domain, $section); + } + + /** + * Returns PSL info for a given domain. + * + * @param string|null $domain + * @param string $section + * + * @return Domain + */ + public function resolve(string $domain = null, string $section = self::ALL_DOMAINS): Domain + { + $this->validateSection($section); + if (null === $domain || !$this->isMatchable($domain)) { return new Domain(); } - $publicSuffix = $this->findPublicSuffix($domain, $section); - if (null === $publicSuffix->getContent()) { - return new Domain($domain, $this->handleNoMatches($domain)); + try { + return new Domain($domain, $this->findPublicSuffix($domain, $section)); + } catch (Exception $e) { + return new Domain($domain); } - - return new Domain($domain, $this->handleMatches($domain, $publicSuffix)); } /** * Tells whether the given domain can be resolved. * - * @param string|null $domain + * @param string $domain * * @return bool */ private function isMatchable($domain): bool { - return null !== $domain - && strpos($domain, '.') > 0 + return strpos($domain, '.') > 0 && strlen($domain) === strcspn($domain, '][') && !filter_var($domain, FILTER_VALIDATE_IP); } /** - * Normalizes a domain name. - * - * "The domain must be canonicalized in the normal way for hostnames - lower-case, Punycode." - * - * @see https://tools.ietf.org/html/rfc3492 + * Assert the section status. * - * @param string $domain + * @param string $section * - * @return string + * @throws Exception if the submitted section is not supported */ - private function normalize(string $domain): string + private function validateSection(string $section) { - if (false !== strpos($domain, '%')) { - $domain = rawurldecode($domain); + if (self::ALL_DOMAINS === $section) { + return; } - $normalize = idn_to_ascii($domain, 0, INTL_IDNA_VARIANT_UTS46); - if (false === $normalize) { - return ''; + $rules = $this->rules[$section] ?? null; + if (null !== $rules && is_array($rules)) { + return; } - return strtolower($normalize); + throw new Exception(sprintf('%s is an unknown Public Suffix List section', $section)); } /** @@ -153,23 +177,42 @@ private function normalize(string $domain): string */ private function findPublicSuffix(string $domain, string $section): PublicSuffix { - $normalizedDomain = $this->normalize($domain); - $reverseLabels = array_reverse(explode('.', $normalizedDomain)); - $resultIcann = $this->findPublicSuffixFromSection($reverseLabels, self::ICANN_DOMAINS); + $reverseLabels = array_reverse(explode('.', $this->normalizeDomain($domain))); + $icann = $this->findPublicSuffixFromSection($reverseLabels, self::ICANN_DOMAINS); if (self::ICANN_DOMAINS === $section) { - return $resultIcann; + return $this->normalizePublicSuffix($icann, $domain); } - $resultPrivate = $this->findPublicSuffixFromSection($reverseLabels, self::PRIVATE_DOMAINS); - if (count($resultPrivate) > count($resultIcann)) { - return $resultPrivate; + $private = $this->findPublicSuffixFromSection($reverseLabels, self::PRIVATE_DOMAINS); + if (count($private) > count($icann)) { + return $this->normalizePublicSuffix($private, $domain); } if (self::ALL_DOMAINS === $section) { - return $resultIcann; + return $this->normalizePublicSuffix($icann, $domain); } - return new PublicSuffix(); + return $this->normalizePublicSuffix(new PublicSuffix(), $domain); + } + + /** + * Normalizes a domain name. + * + * "The domain must be canonicalized in the normal way for hostnames - lower-case, Punycode." + * + * @see https://tools.ietf.org/html/rfc3492 + * + * @param string $domain + * + * @return string + */ + private function normalizeDomain(string $domain): string + { + try { + return $this->idnToAscii($domain); + } catch (Exception $e) { + return ''; + } } /** @@ -182,7 +225,7 @@ private function findPublicSuffix(string $domain, string $section): PublicSuffix */ private function findPublicSuffixFromSection(array $labels, string $section): PublicSuffix { - $rules = $this->rules[$section] ?? null; + $rules = $this->rules[$section] ?? []; $matches = []; foreach ($labels as $label) { //match exception rule @@ -214,57 +257,24 @@ private function findPublicSuffixFromSection(array $labels, string $section): Pu } /** - * Returns a PublicSuffix if none was found using the PSL. + * Normalize the found Public Suffix against its domain name. * - * @param string $domain + * @param PublicSuffix $publicSuffix + * @param string $domain * * @return PublicSuffix */ - private function handleNoMatches(string $domain): PublicSuffix + private function normalizePublicSuffix(PublicSuffix $publicSuffix, string $domain): PublicSuffix { - $labels = explode('.', $domain); - $publicSuffix = array_pop($labels); - if ($this->isPunycoded($domain)) { - return new PublicSuffix($publicSuffix); - } - - $publicSuffix = idn_to_utf8($publicSuffix, 0, INTL_IDNA_VARIANT_UTS46); - if (false !== $publicSuffix) { - return new PublicSuffix($publicSuffix); + if (null === $publicSuffix->getContent()) { + $labels = explode('.', $domain); + $publicSuffix = new PublicSuffix($this->idnToAscii(array_pop($labels))); } - return new PublicSuffix(); - } - - /** - * Tells whether the domain is punycoded. - * - * @param string $domain - * - * @return bool - */ - private function isPunycoded(string $domain): bool - { - return false !== strpos($domain, 'xn--'); - } - - /** - * Returns a PublicSuffix if one was found using the PSL. - * - * @param string $domain - * @param PublicSuffix $publicSuffix - * - * @return PublicSuffix - */ - private function handleMatches($domain, PublicSuffix $publicSuffix): PublicSuffix - { - if ($this->isPunycoded($domain)) { - return $publicSuffix; + if (false === strpos($domain, 'xn--')) { + return $publicSuffix->toUnicode(); } - return new PublicSuffix( - idn_to_utf8($publicSuffix->getContent(), 0, INTL_IDNA_VARIANT_UTS46), - $publicSuffix->isICANN() ? self::ICANN_DOMAINS : self::PRIVATE_DOMAINS - ); + return $publicSuffix; } } diff --git a/tests/CacheTest.php b/tests/CacheTest.php index 18e6802b..0e5bbb84 100644 --- a/tests/CacheTest.php +++ b/tests/CacheTest.php @@ -2,14 +2,14 @@ declare(strict_types=1); -namespace pdp\tests; +namespace Pdp\Tests; use DateInterval; +use Iterator; use org\bovigo\vfs\vfsStream; use Pdp\Cache; use PHPUnit\Framework\TestCase; use Psr\SimpleCache\InvalidArgumentException; -use Traversable; /** * Abstract PSR-16 tester. @@ -47,8 +47,7 @@ public function tearDown() /** * @dataProvider storableValuesProvider * - * @param mixed $expected - * @param string $key + * @param mixed $expected */ public function testSetGet($expected) { @@ -387,7 +386,7 @@ public function testGetMultipleInvalidArg() // first value is requested. // // This extra line is just a precaution for that - if ($result instanceof Traversable) { + if ($result instanceof Iterator) { $result->current(); } } diff --git a/tests/CurlHttpClientTest.php b/tests/CurlHttpClientTest.php index ac419d81..436b2701 100644 --- a/tests/CurlHttpClientTest.php +++ b/tests/CurlHttpClientTest.php @@ -2,7 +2,7 @@ declare(strict_types=1); -namespace pdp\tests; +namespace Pdp\Tests; use Pdp\CurlHttpClient; use Pdp\Exception; diff --git a/tests/DomainTest.php b/tests/DomainTest.php index f39c4bfa..081f2149 100644 --- a/tests/DomainTest.php +++ b/tests/DomainTest.php @@ -2,9 +2,10 @@ declare(strict_types=1); -namespace pdp\tests; +namespace Pdp\Tests; use Pdp\Domain; +use Pdp\Exception; use Pdp\PublicSuffix; use Pdp\Rules; use PHPUnit\Framework\TestCase; @@ -24,13 +25,24 @@ public function testRegistrableDomainIsNullWithFoundDomain(string $domain, $publ $this->assertNull($domain->getSubDomain()); } + public function testToAsciiThrowsException() + { + $this->expectException(Exception::class); + (new Domain('_b%C3%A9bé.be-'))->toAscii(); + } + + public function testToUnicodeThrowsException() + { + $this->expectException(Exception::class); + (new Domain('xn--a-ecp.ru'))->toUnicode(); + } + public function invalidRegistrableDomainProvider() { return [ 'domain and suffix are the same' => ['co.uk', 'co.uk'], 'domain has no labels' => ['faketld', 'faketld'], 'public suffix is null' => ['faketld', null], - 'public suffix is invalid' => ['_b%C3%A9bé.be-', 'be-'], ]; } @@ -49,4 +61,148 @@ public function testPublicSuffixnternalPhpMethod() $this->assertInternalType('array', $publicSuffix->__debugInfo()); $this->assertEquals($publicSuffix, $generatePublicSuffix); } + + /** + * @dataProvider toUnicodeProvider + * @param null|string $domain + * @param null|string $publicSuffix + * @param null|string $expectedDomain + * @param null|string $expectedSuffix + * @param null|string $expectedIDNDomain + * @param null|string $expectedIDNSuffix + */ + public function testToIDN( + $domain, + $publicSuffix, + $expectedDomain, + $expectedSuffix, + $expectedIDNDomain, + $expectedIDNSuffix + ) { + $domain = new Domain($domain, new PublicSuffix($publicSuffix)); + $this->assertSame($expectedDomain, $domain->getDomain()); + $this->assertSame($expectedSuffix, $domain->getPublicSuffix()); + + $domainIDN = $domain->toUnicode(); + $this->assertSame($expectedIDNDomain, $domainIDN->getDomain()); + $this->assertSame($expectedIDNSuffix, $domainIDN->getPublicSuffix()); + } + + public function toUnicodeProvider() + { + return [ + 'simple domain' => [ + 'domain' => 'www.ulb.ac.be', + 'publicSuffix' => 'ac.be', + 'expectedDomain' => 'www.ulb.ac.be', + 'expectedSuffix' => 'ac.be', + 'expectedIDNDomain' => 'www.ulb.ac.be', + 'expectedIDNSuffix' => 'ac.be', + ], + 'ASCII to IDN domain' => [ + 'domain' => 'www.xn--85x722f.xn--55qx5d.cn', + 'publicSuffix' => 'xn--85x722f.xn--55qx5d.cn', + 'expectedDomain' => 'www.xn--85x722f.xn--55qx5d.cn', + 'expectedSuffix' => 'xn--85x722f.xn--55qx5d.cn', + 'expectedIDNDomain' => 'www.食狮.公司.cn', + 'expectedIDNSuffix' => '食狮.公司.cn', + ], + 'IDN to IDN domain' => [ + 'domain' => 'www.食狮.公司.cn', + 'publicSuffix' => '食狮.公司.cn', + 'expectedDomain' => 'www.食狮.公司.cn', + 'expectedSuffix' => '食狮.公司.cn', + 'expectedIDNDomain' => 'www.食狮.公司.cn', + 'expectedIDNSuffix' => '食狮.公司.cn', + ], + 'null domain and suffix' => [ + 'domain' => null, + 'publicSuffix' => null, + 'expectedDomain' => null, + 'expectedSuffix' => null, + 'expectedIDNDomain' => null, + 'expectedIDNSuffix' => null, + ], + 'domain with null suffix' => [ + 'domain' => 'www.xn--85x722f.xn--55qx5d.cn', + 'publicSuffix' => null, + 'expectedDomain' => 'www.xn--85x722f.xn--55qx5d.cn', + 'expectedSuffix' => null, + 'expectedIDNDomain' => 'www.食狮.公司.cn', + 'expectedIDNSuffix' => null, + ], + ]; + } + + /** + * @dataProvider toAsciiProvider + * @param null|string $domain + * @param null|string $publicSuffix + * @param null|string $expectedDomain + * @param null|string $expectedSuffix + * @param null|string $expectedAsciiDomain + * @param null|string $expectedAsciiSuffix + */ + public function testToAscii( + $domain, + $publicSuffix, + $expectedDomain, + $expectedSuffix, + $expectedAsciiDomain, + $expectedAsciiSuffix + ) { + $domain = new Domain($domain, new PublicSuffix($publicSuffix)); + $this->assertSame($expectedDomain, $domain->getDomain()); + $this->assertSame($expectedSuffix, $domain->getPublicSuffix()); + + $domainIDN = $domain->toAscii(); + $this->assertSame($expectedAsciiDomain, $domainIDN->getDomain()); + $this->assertSame($expectedAsciiSuffix, $domainIDN->getPublicSuffix()); + } + + public function toAsciiProvider() + { + return [ + 'simple domain' => [ + 'domain' => 'www.ulb.ac.be', + 'publicSuffix' => 'ac.be', + 'expectedDomain' => 'www.ulb.ac.be', + 'expectedSuffix' => 'ac.be', + 'expectedIDNDomain' => 'www.ulb.ac.be', + 'expectedIDNSuffix' => 'ac.be', + ], + 'ASCII to ASCII domain' => [ + 'domain' => 'www.xn--85x722f.xn--55qx5d.cn', + 'publicSuffix' => 'xn--85x722f.xn--55qx5d.cn', + 'expectedDomain' => 'www.xn--85x722f.xn--55qx5d.cn', + 'expectedSuffix' => 'xn--85x722f.xn--55qx5d.cn', + 'expectedIDNDomain' => 'www.xn--85x722f.xn--55qx5d.cn', + 'expectedIDNSuffix' => 'xn--85x722f.xn--55qx5d.cn', + ], + 'ASCII to IDN domain' => [ + 'domain' => 'www.食狮.公司.cn', + 'publicSuffix' => '食狮.公司.cn', + 'expectedDomain' => 'www.食狮.公司.cn', + 'expectedSuffix' => '食狮.公司.cn', + 'expectedIDNDomain' => 'www.xn--85x722f.xn--55qx5d.cn', + 'expectedIDNSuffix' => 'xn--85x722f.xn--55qx5d.cn', + ], + 'null domain and suffix' => [ + 'domain' => null, + 'publicSuffix' => null, + 'expectedDomain' => null, + 'expectedSuffix' => null, + 'expectedIDNDomain' => null, + 'expectedIDNSuffix' => null, + ], + 'domain with null suffix' => [ + 'domain' => 'www.食狮.公司.cn', + 'publicSuffix' => null, + 'expectedDomain' => 'www.食狮.公司.cn', + 'expectedSuffix' => null, + 'expectedIDNDomain' => 'www.xn--85x722f.xn--55qx5d.cn', + 'expectedIDNSuffix' => null, + ], + ]; + } } diff --git a/tests/ManagerTest.php b/tests/ManagerTest.php index e5f5925a..0a552b47 100644 --- a/tests/ManagerTest.php +++ b/tests/ManagerTest.php @@ -2,7 +2,7 @@ declare(strict_types=1); -namespace pdp\tests; +namespace Pdp\Tests; use org\bovigo\vfs\vfsStream; use Pdp\Cache; @@ -13,9 +13,6 @@ class ManagerTest extends TestCase { - /** - * @var Manager - */ protected $manager; protected $cachePool; protected $cacheDir; diff --git a/tests/PublicSuffixTest.php b/tests/PublicSuffixTest.php new file mode 100644 index 00000000..71b26615 --- /dev/null +++ b/tests/PublicSuffixTest.php @@ -0,0 +1,38 @@ +assertEquals($publicSuffix, $generatePublicSuffix); + } + + public function testPSToUnicodeWithUrlEncode() + { + $this->assertSame('bébe', (new PublicSuffix('b%C3%A9be'))->toUnicode()->getContent()); + } + + public function testPSToAsciiThrowsException() + { + $this->expectException(Exception::class); + (new PublicSuffix('_b%C3%A9bé.be-'))->toAscii(); + } + + public function testConversionReturnsTheSameInstance() + { + $instance = new PublicSuffix('ac.be', Rules::ICANN_DOMAINS); + $this->assertSame($instance->toUnicode(), $instance); + $this->assertSame($instance->toAscii(), $instance); + } +} diff --git a/tests/RulesTest.php b/tests/RulesTest.php index f3667d31..50c81699 100644 --- a/tests/RulesTest.php +++ b/tests/RulesTest.php @@ -2,7 +2,7 @@ declare(strict_types=1); -namespace pdp\tests; +namespace Pdp\Tests; use Pdp\Cache; use Pdp\CurlHttpClient; @@ -44,6 +44,12 @@ public function testCreateFromPathThrowsException() Rules::createFromPath('/foo/bar.dat'); } + public function testDomainInternalPhpMethod() + { + $generateRules = eval('return '.var_export($this->rules, true).';'); + $this->assertEquals($this->rules, $generateRules); + } + public function testNullWillReturnNullDomain() { $domain = $this->rules->resolve('COM'); @@ -96,7 +102,7 @@ public function testSudDomainIsNull() public function testWithInvalidDomainName() { $domain = $this->rules->resolve('_b%C3%A9bé.be-'); - $this->assertSame('_b%C3%A9bé.be-', $domain->getDomain()); + $this->assertSame('_bébé.be-', $domain->getDomain()); $this->assertFalse($domain->isKnown()); $this->assertFalse($domain->isICANN()); $this->assertFalse($domain->isPrivate()); @@ -213,12 +219,32 @@ public function parseDataProvider() ]; } - public function testGetPublicSuffixHandlesWrongCaseProperly() + /** + * @dataProvider invalidParseProvider + * @param mixed $domain + * @param mixed $section + */ + public function testDetermine($domain, $section) { - $publicSuffix = 'рф'; - $domain = 'Яндекс.РФ'; + $this->expectException(Exception::class); + $this->rules->getPublicSuffix($domain, $section); + } - $this->assertSame($publicSuffix, $this->rules->resolve($domain, Rules::ICANN_DOMAINS)->getPublicSuffix()); + public function invalidParseProvider() + { + return [ + 'IPv6' => ['[::1]', Rules::ICANN_DOMAINS], + 'IPv4' => ['192.168.1.2', Rules::ICANN_DOMAINS], + 'single label host' => ['localhost', Rules::ICANN_DOMAINS], + ]; + } + + public function testPublicSuffixSection() + { + $expected = 'рф'; + $domain = 'Яндекс.РФ'; + $publicSuffix = $this->rules->getPublicSuffix($domain); + $this->assertSame($expected, $publicSuffix->getContent()); } /** @@ -231,12 +257,12 @@ public function testGetPublicSuffixHandlesWrongCaseProperly() * * @see http://publicsuffix.org/list/ * - * @param string $input Domain and public suffix - * @param string $expected Expected result + * @param string|null $input Domain and public suffix + * @param string|null $expected Expected result */ public function checkPublicSuffix($input, $expected) { - $this->assertSame($expected, $this->rules->resolve($input, Rules::ICANN_DOMAINS)->getRegistrableDomain()); + $this->assertSame($expected, $this->rules->resolve($input)->getRegistrableDomain()); } /**