import functools
import importlib.metadata
import logging
import os
import pathlib
import sys
import zipfile
import zipimport
from typing import Iterator, List, Optional, Sequence, Set, Tuple

from pip._vendor.packaging.utils import NormalizedName, canonicalize_name

from pip._internal.metadata.base import BaseDistribution, BaseEnvironment
from pip._internal.models.wheel import Wheel
from pip._internal.utils.deprecation import deprecated
from pip._internal.utils.filetypes import WHEEL_EXTENSION

from ._compat import BadMetadata, BasePath, get_dist_name, get_info_location
from ._dists import Distribution

logger = logging.getLogger(__name__)


def _looks_like_wheel(location: str) -> bool:
    if not location.endswith(WHEEL_EXTENSION):
        return False
    if not os.path.isfile(location):
        return False
    if not Wheel.wheel_file_re.match(os.path.basename(location)):
        return False
    return zipfile.is_zipfile(location)


class _DistributionFinder:
    """Finder to locate distributions.

    The main purpose of this class is to memoize found distributions' names, so
    only one distribution is returned for each package name. At lot of pip code
    assumes this (because it is setuptools's behavior), and not doing the same
    can potentially cause a distribution in lower precedence path to override a
    higher precedence one if the caller is not careful.

    Eventually we probably want to make it possible to see lower precedence
    installations as well. It's useful feature, after all.
    """

    FoundResult = Tuple[importlib.metadata.Distribution, Optional[BasePath]]

    def __init__(self) -> None:
        self._found_names: Set[NormalizedName] = set()

    def _find_impl(self, location: str) -> Iterator[FoundResult]:
        """Find distributions in a location."""
        # Skip looking inside a wheel. Since a package inside a wheel is not
        # always valid (due to .data directories etc.), its .dist-info entry
        # should not be considered an installed distribution.
        if _looks_like_wheel(location):
            return
        # To know exactly where we find a distribution, we have to feed in the
        # paths one by one, instead of dumping the list to importlib.metadata.
        for dist in importlib.metadata.distributions(path=[location]):
            info_location = get_info_location(dist)
            try:
                raw_name = get_dist_name(dist)
            except BadMetadata as e:
                logger.warning("Skipping %s due to %s", info_location, e.reason)
                continue
            normalized_name = canonicalize_name(raw_name)
            if normalized_name in self._found_names:
                continue
            self._found_names.add(normalized_name)
            yield dist, info_location

    def find(self, location: str) -> Iterator[BaseDistribution]:
        """Find distributions in a location.

        The path can be either a directory, or a ZIP archive.
        """
        for dist, info_location in self._find_impl(location):
            if info_location is None:
                installed_location: Optional[BasePath] = None
            else:
                installed_location = info_location.parent
            yield Distribution(dist, info_location, installed_location)

    def find_linked(self, location: str) -> Iterator[BaseDistribution]:
        """Read location in egg-link files and return distributions in there.

        The path should be a directory; otherwise this returns nothing. This
        follows how setuptools does this for compatibility. The first non-empty
        line in the egg-link is read as a path (resolved against the egg-link's
        containing directory if relative). Distributions found at that linked
        location are returned.
        """
        path = pathlib.Path(location)
        if not path.is_dir():
            return
        for child in path.iterdir():
            if child.suffix != ".egg-link":
                continue
            with child.open() as f:
                lines = (line.strip() for line in f)
                target_rel = next((line for line in lines if line), "")
            if not target_rel:
                continue
            target_location = str(path.joinpath(target_rel))
            for dist, info_location in self._find_impl(target_location):
                yield Distribution(dist, info_location, path)

    def _find_eggs_in_dir(self, location: str) -> Iterator[BaseDistribution]:
        from pip._vendor.pkg_resources import find_distributions

        from pip._internal.metadata import pkg_resources as legacy

        with os.scandir(location) as it:
            for entry in it:
                if not entry.name.endswith(".egg"):
                    continue
                for dist in find_distributions(entry.path):
                    yield legacy.Distribution(dist)

    def _find_eggs_in_zip(self, location: str) -> Iterator[BaseDistribution]:
        from pip._vendor.pkg_resources import find_eggs_in_zip

        from pip._internal.metadata import pkg_resources as legacy

        try:
            importer = zipimport.zipimporter(location)
        except zipimport.ZipImportError:
            return
        for dist in find_eggs_in_zip(importer, location):
            yield legacy.Distribution(dist)

    def find_eggs(self, location: str) -> Iterator[BaseDistribution]:
        """Find eggs in a location.

        This actually uses the old *pkg_resources* backend. We likely want to
        deprecate this so we can eventually remove the *pkg_resources*
        dependency entirely. Before that, this should first emit a deprecation
        warning for some versions when using the fallback since importing
        *pkg_resources* is slow for those who don't need it.
        """
        if os.path.isdir(location):
            yield from self._find_eggs_in_dir(location)
        if zipfile.is_zipfile(location):
            yield from self._find_eggs_in_zip(location)


@functools.lru_cache(maxsize=None)  # Warn a distribution exactly once.
def _emit_egg_deprecation(location: Optional[str]) -> None:
    deprecated(
        reason=f"Loading egg at {location} is deprecated.",
        replacement="to use pip for package installation.",
        gone_in="24.3",
        issue=12330,
    )


class Environment(BaseEnvironment):
    def __init__(self, paths: Sequence[str]) -> None:
        self._paths = paths

    @classmethod
    def default(cls) -> BaseEnvironment:
        return cls(sys.path)

    @classmethod
    def from_paths(cls, paths: Optional[List[str]]) -> BaseEnvironment:
        if paths is None:
            return cls(sys.path)
        return cls(paths)

    def _iter_distributions(self) -> Iterator[BaseDistribution]:
        finder = _DistributionFinder()
        for location in self._paths:
            yield from finder.find(location)
            for dist in finder.find_eggs(location):
                _emit_egg_deprecation(dist.location)
                yield dist
            # This must go last because that's how pkg_resources tie-breaks.
            yield from finder.find_linked(location)

    def get_distribution(self, name: str) -> Optional[BaseDistribution]:
        matches = (
            distribution
            for distribution in self.iter_all_distributions()
            if distribution.canonical_name == canonicalize_name(name)
        )
        return next(matches, None)