__all__ = ["HTTPRangeRequestUnsupported", "dist_from_wheel_url"]
from bisect import bisect_left, bisect_right
from contextlib import contextmanager
from tempfile import NamedTemporaryFile
from typing import Any, Dict, Generator, List, Optional, Tuple
from zipfile import BadZipFile, ZipFile
from pip._vendor.packaging.utils import canonicalize_name
from pip._vendor.requests.models import CONTENT_CHUNK_SIZE, Response
from pip._internal.metadata import BaseDistribution, MemoryWheel, get_wheel_distribution
from pip._internal.network.session import PipSession
from pip._internal.network.utils import HEADERS, raise_for_status, response_chunks
class HTTPRangeRequestUnsupported(Exception):
pass
def dist_from_wheel_url(name: str, url: str, session: PipSession) -> BaseDistribution:
with LazyZipOverHTTP(url, session) as zf:
wheel = MemoryWheel(zf.name, zf) return get_wheel_distribution(wheel, canonicalize_name(name))
class LazyZipOverHTTP:
def __init__(
self, url: str, session: PipSession, chunk_size: int = CONTENT_CHUNK_SIZE
) -> None:
head = session.head(url, headers=HEADERS)
raise_for_status(head)
assert head.status_code == 200
self._session, self._url, self._chunk_size = session, url, chunk_size
self._length = int(head.headers["Content-Length"])
self._file = NamedTemporaryFile()
self.truncate(self._length)
self._left: List[int] = []
self._right: List[int] = []
if "bytes" not in head.headers.get("Accept-Ranges", "none"):
raise HTTPRangeRequestUnsupported("range request is not supported")
self._check_zip()
@property
def mode(self) -> str:
return "rb"
@property
def name(self) -> str:
return self._file.name
def seekable(self) -> bool:
return True
def close(self) -> None:
self._file.close()
@property
def closed(self) -> bool:
return self._file.closed
def read(self, size: int = -1) -> bytes:
download_size = max(size, self._chunk_size)
start, length = self.tell(), self._length
stop = length if size < 0 else min(start + download_size, length)
start = max(0, stop - download_size)
self._download(start, stop - 1)
return self._file.read(size)
def readable(self) -> bool:
return True
def seek(self, offset: int, whence: int = 0) -> int:
return self._file.seek(offset, whence)
def tell(self) -> int:
return self._file.tell()
def truncate(self, size: Optional[int] = None) -> int:
return self._file.truncate(size)
def writable(self) -> bool:
return False
def __enter__(self) -> "LazyZipOverHTTP":
self._file.__enter__()
return self
def __exit__(self, *exc: Any) -> None:
self._file.__exit__(*exc)
@contextmanager
def _stay(self) -> Generator[None, None, None]:
pos = self.tell()
try:
yield
finally:
self.seek(pos)
def _check_zip(self) -> None:
end = self._length - 1
for start in reversed(range(0, end, self._chunk_size)):
self._download(start, end)
with self._stay():
try:
ZipFile(self) except BadZipFile:
pass
else:
break
def _stream_response(
self, start: int, end: int, base_headers: Dict[str, str] = HEADERS
) -> Response:
headers = base_headers.copy()
headers["Range"] = f"bytes={start}-{end}"
headers["Cache-Control"] = "no-cache"
return self._session.get(self._url, headers=headers, stream=True)
def _merge(
self, start: int, end: int, left: int, right: int
) -> Generator[Tuple[int, int], None, None]:
lslice, rslice = self._left[left:right], self._right[left:right]
i = start = min([start] + lslice[:1])
end = max([end] + rslice[-1:])
for j, k in zip(lslice, rslice):
if j > i:
yield i, j - 1
i = k + 1
if i <= end:
yield i, end
self._left[left:right], self._right[left:right] = [start], [end]
def _download(self, start: int, end: int) -> None:
with self._stay():
left = bisect_left(self._right, start)
right = bisect_right(self._left, end)
for start, end in self._merge(start, end, left, right):
response = self._stream_response(start, end)
response.raise_for_status()
self.seek(start)
for chunk in response_chunks(response, self._chunk_size):
self._file.write(chunk)