Source code for inatcog.taxon_query

"""Module to query iNat taxa."""
from redbot.core.commands import BadArgument, Context
from dronefly.core.constants import RANK_EQUIVALENTS, RANK_LEVELS
from dronefly.core.formatters.generic import format_taxon_name
from dronefly.core.query.query import Query, TaxonQuery
from pyinaturalist.models import Taxon

from .converters.base import NaturalQueryConverter
from .taxa import get_taxon, match_taxon


[docs]class INatTaxonQuery:
    """Query iNat for one or more taxa."""

    def __init__(self, cog):
        self.cog = cog

[docs]    async def get_taxon_ancestor(self, ctx: Context, taxon, rank):
        """Get Taxon ancestor for specified rank from a Taxon object.

        Parameters
        ----------
        taxon: Taxon
            The taxon for which the ancestor at the specified rank is requested.
        rank: str
            The rank of the ancestor to return.

        Returns
        -------
        Taxon
            A Taxon object for the matching ancestor, if any, else None.
        """

        def taxon_ancestor_ranks(taxon: Taxon):
            return (
                ["stateofmatter"] + [ancestor.rank for ancestor in taxon.ancestors]
                if taxon.ancestors
                else []
            )

        rank = RANK_EQUIVALENTS.get(rank) or rank
        ranks = taxon_ancestor_ranks(taxon)
        if rank in ranks:
            rank_index = ranks.index(rank)
            ancestor = await get_taxon(ctx, taxon.ancestor_ids[rank_index])
            return ancestor
        return None

[docs]    async def maybe_match_taxon(
        self,
        ctx: Context,
        taxon_query: TaxonQuery,
        ancestor_id: int = None,
        preferred_place_id: int = None,
        scientific_name: bool = False,
        locale: str = None,
    ):
        """Get taxon and return a match, if any."""
        kwargs = {}
        taxon = None
        records_read = 0
        total_records = 0

        if locale:
            kwargs["locale"] = locale
        if preferred_place_id:
            kwargs["preferred_place_id"] = int(preferred_place_id)
        if taxon_query.taxon_id:
            taxon = await get_taxon(ctx, taxon_query.taxon_id)
        else:
            if taxon_query.terms:
                kwargs["q"] = " ".join(taxon_query.terms)
            if taxon_query.ranks:
                kwargs["rank"] = ",".join(taxon_query.ranks)
            if ancestor_id:
                kwargs["taxon_id"] = ancestor_id
            for page in range(11):
                if page == 0:
                    per_page = 30
                    endpoint = ctx.inat_client.taxa.autocomplete
                else:
                    # restart numbering, as we are using a different endpoint
                    # now with different page size:
                    if page == 1:
                        records_read = 0
                    kwargs["page"] = page
                    per_page = 200
                    endpoint = ctx.inat_client.taxa.search
                kwargs["per_page"] = per_page
                paginator = endpoint(limit=per_page, **kwargs)
                if paginator:
                    records = await paginator.async_all()
                    total_records = paginator.count()
                if not records:
                    break
                records_read += len(records)
                taxon = match_taxon(
                    taxon_query,
                    records,
                    scientific_name=scientific_name,
                    locale=locale,
                )
                if taxon:
                    break
                if records_read >= total_records:
                    break

        if not taxon:
            if records_read >= total_records:
                raise LookupError("No matching taxon found.")

            raise LookupError(
                f"No {'exact ' if taxon_query.phrases else ''}match "
                f"found in {'scientific name of ' if scientific_name else ''}{records_read}"
                f" of {total_records} total records containing those terms."
            )

        return taxon

[docs]    async def maybe_match_taxon_compound(
        self,
        ctx: Context,
        query: Query,
        preferred_place_id=None,
        scientific_name=False,
        locale=None,
    ):
        """Get one or more taxa and return a match, if any.

        Currently the grammar supports only one ancestor taxon
        and one child taxon.
        """
        if query.ancestor:
            ancestor = None
            try:
                ancestor = await self.maybe_match_taxon(
                    ctx,
                    query.ancestor,
                    preferred_place_id=preferred_place_id,
                    scientific_name=scientific_name,
                    locale=locale,
                )
                if ancestor:
                    if query.main.ranks:
                        max_query_rank_level = max(
                            [RANK_LEVELS[rank] for rank in query.main.ranks]
                        )
                        ancestor_rank_level = RANK_LEVELS[ancestor.rank]
                        if max_query_rank_level >= ancestor_rank_level:
                            raise LookupError(
                                "Child rank%s: `%s` must be below ancestor rank: `%s`"
                                % (
                                    "s" if len(query.main.ranks) > 1 else "",
                                    ",".join(query.main.ranks),
                                    ancestor.rank,
                                )
                            )
                    taxon = await self.maybe_match_taxon(
                        ctx,
                        query.main,
                        ancestor_id=ancestor.id,
                        preferred_place_id=preferred_place_id,
                        scientific_name=scientific_name,
                        locale=locale,
                    )
            except LookupError as err:
                reason = (
                    str(err) + "\nPerhaps instead of `in` (ancestor), you meant\n"
                    "`from` (place) or `in prj` (project)?"
                )
                if ancestor:
                    reason = (
                        f"{reason}\n\n"
                        f"Ancestor taxon: {format_taxon_name(ancestor, with_term=True)}"
                    )
                else:
                    reason = f"{reason}\n\nAncestor taxon not found."
                raise LookupError(reason) from err
        else:
            taxon = await self.maybe_match_taxon(
                ctx,
                query.main,
                preferred_place_id=preferred_place_id,
                scientific_name=scientific_name,
                locale=locale,
            )

        return taxon

[docs]    async def query_taxa(self, ctx, query):
        """Query for one or more taxa and return list of matching taxa, if any."""
        queries = query.split(",")

        # De-duplicate the query via dict:
        taxa = {}
        missing_taxa = []
        for query_str in queries:
            try:
                query = await NaturalQueryConverter.convert(ctx, query_str)
                query_response = await self.cog.query.get(ctx, query)
                if query_response.taxon:
                    taxon = query_response.taxon
                    taxa[str(taxon.id)] = taxon
            except (BadArgument, LookupError):
                missing_taxa.append(query_str)
                pass

        result = taxa.values()
        if not result:
            raise LookupError("No taxon found")

        return (result, missing_taxa)

[docs]    async def query_paginated_taxa(self, ctx, query):
        """Query for one or more taxa and return paginator for matching taxa, if any.

        Notes:

        - In its original conception, this was used only for comma-delimited
          lists of taxon queries for map & related, or a list of taxon ancestor
          IDs. These had a small, definite number of elements (whatever the user
          typed, or all the ancestors of a taxon), were de-duplicated, and
          didn't need to be paginated.

        - We want to go one step further here and return multiple taxa, whether or
          not multiple were given as input:

            - The return is then a paginator for all matching taxa.
            - Which may be filtered in some fashion, e.g.
                - All taxa matching the supplied name(s).
                - For a given rank keyword.
            - And if there are no filters, then just all results matching the
              query.

        - The "rank" filter is baked into maybe_match_taxon_compound (I think)
          and needs to be pulled out of that.

            - In fact, most of that relates to selecting "one best" match,
              so really isn't needed here.
        """
        queries = query.split(",")

        async def _get_taxon(query):
            # TODO: extract from the following whatever logic applies
            # to our taxon search and redo in a more modular way:
            # - components:
            #   - matchers (phrases, AOU codes, etc.)
            #   - scorers (point values for exact / inexact, etc.)
            #   - a filter (the `in` clause for parent taxon)
            # - reassemble those components to implement the logic described
            #   above, and especially the scorer has to be abandoned unless
            #   the whole result set is fully enumerated (can't be done
            #   efficiently with an arbitrary result set! might work for
            #   single "root" taxon for `in` though)
            return await self.cog.taxon_query.maybe_match_taxon_compound(ctx, query)

        # De-duplicate the query via dict:
        taxa = {}
        for query_str in queries:
            try:
                _query = await NaturalQueryConverter.convert(ctx, query_str)
                taxon = await _get_taxon(_query)
                if taxon:
                    taxa[str(taxon.id)] = taxon
            except (BadArgument, LookupError):
                pass

        result = taxa.values()
        if not result:
            raise LookupError("No taxon found")

        return result