diff --git a/src/unearth/collector.py b/src/unearth/collector.py index 8fb422f..beafa4f 100644 --- a/src/unearth/collector.py +++ b/src/unearth/collector.py @@ -176,9 +176,14 @@ def collect_links_from_location( index_html = Link(path_to_url(path.joinpath("index.html").as_posix())) yield from _collect_links_from_index(session, index_html, headers) else: - yield from _collect_links_from_index(session, location, headers) + if _is_html_file(str(path)): + yield from _collect_links_from_index(session, location, headers) + else: + yield location - else: + else: # remote url, can be either a remote file or an index URL containing files + if is_secure_origin(session, location) and not location.is_vcs: + yield location yield from _collect_links_from_index(session, location) diff --git a/src/unearth/finder.py b/src/unearth/finder.py index 0a3a845..579ef4d 100644 --- a/src/unearth/finder.py +++ b/src/unearth/finder.py @@ -7,11 +7,11 @@ import itertools import os import pathlib +import posixpath import warnings from datetime import datetime from tempfile import TemporaryDirectory from typing import TYPE_CHECKING, Any, Iterable, NamedTuple, Sequence -from urllib.parse import urljoin import packaging.requirements from packaging.utils import BuildTag, canonicalize_name, parse_wheel_filename @@ -190,9 +190,8 @@ def build_evaluator( ) def _build_index_page_link(self, index_url: str, package_name: str) -> Link: - return Link( - urljoin(index_url.rstrip("/") + "/", canonicalize_name(package_name) + "/") - ) + url = posixpath.join(index_url, canonicalize_name(package_name)) + "/" + return self._build_find_link(url) def _build_find_link(self, find_link: str) -> Link: if os.path.exists(find_link): diff --git a/tests/test_collector.py b/tests/test_collector.py index d08bf7c..72740a8 100644 --- a/tests/test_collector.py +++ b/tests/test_collector.py @@ -25,22 +25,15 @@ def test_collector_skip_vcs_link(pypi_session, caplog): def test_collect_links_from_404_page(pypi_session): - collected = list( - collect_links_from_location( - pypi_session, Link("https://test.pypi.org/simple/not-found") - ) - ) - assert not collected + link = Link("https://test.pypi.org/simple/not-found") + collected = list(collect_links_from_location(pypi_session, link)) + assert collected == [link] def test_skip_non_html_archive(pypi_session, caplog): - collected = list( - collect_links_from_location( - pypi_session, - Link("https://test.pypi.org/files/click-8.1.3-py3-none-any.whl"), - ) - ) - assert not collected + link = Link("https://test.pypi.org/files/click-8.1.3-py3-none-any.whl") + collected = list(collect_links_from_location(pypi_session, link)) + assert collected == [link] assert "Content-Type unsupported" in caplog.records[0].message @@ -52,7 +45,7 @@ def test_collect_links_from_index_page(pypi_session): ), key=lambda link: link.filename, ) - assert len(collected) == 4 + assert len(collected) == 5 assert all(link.url.startswith("https://test.pypi.org") for link in collected)