"""Module to work with iNat taxa."""
import copy
import re
from typing import NamedTuple, Optional, Union
from dronefly.core.constants import RANK_LEVELS
from dronefly.core.query.query import TaxonQuery
from dronefly.core.utils import obs_url_from_v1
from pyinaturalist.models import Place, Taxon, User
from redbot.core.commands import Context
TAXON_PLACES_HEADER = "__obs# (spp#) from place:__"
TAXON_PLACES_HEADER_PAT = re.compile(re.escape(TAXON_PLACES_HEADER) + "\n")
TAXON_COUNTS_HEADER = "__obs# (spp#) by user:__"
TAXON_COUNTS_HEADER_PAT = re.compile(re.escape(TAXON_COUNTS_HEADER) + "\n")
TAXON_IDBY_HEADER = "__obs# (spp#) identified by user:__"
TAXON_IDBY_HEADER_PAT = re.compile(re.escape(TAXON_IDBY_HEADER) + "\n")
TAXON_NOTBY_HEADER = "__obs# (spp#) unobserved by user:__"
TAXON_NOTBY_HEADER_PAT = re.compile(re.escape(TAXON_NOTBY_HEADER) + "\n")
TAXON_LIST_DELIMITER = [", ", " > "]
[docs]async def get_taxon_preferred_establishment_means(ctx, taxon):
"""Get the preferred establishment means for the taxon."""
try:
establishment_means = taxon.establishment_means
place_id = establishment_means.place.id
if getattr(taxon, "listed_taxa", None) is None:
taxon = await ctx.client.taxa.populate(taxon)
except (AttributeError, LookupError):
return None
find_means = (means for means in taxon.listed_taxa if means.place.id == place_id)
return next(find_means, establishment_means)
[docs]class NameMatch(NamedTuple):
"""Match for each name field in Taxon matching a pattern."""
term: Optional[re.match]
name: Optional[re.match]
common: Optional[re.match]
NO_NAME_MATCH = NameMatch(None, None, None)
[docs]def match_pat(record, pat, scientific_name=False, locale=None):
"""Match specified pattern.
Parameters
----------
record: Taxon
A candidate taxon to match.
pat: re.Pattern or str
A pattern to match against each name field in the record.
scientific_name: bool
Only search scientific name
locale: str
Only search common names matching locale
Returns
-------
NameMatch
A tuple of search results for the pat for each name in the record.
"""
if scientific_name:
return NameMatch(
None,
re.search(pat, record.name),
None,
)
if locale:
names = [
name["name"]
for name in sorted(
[
name
for name in record.names
if name["is_valid"] and re.match(locale, name["locale"], re.I)
],
key=lambda x: x["position"],
)
]
for name in names:
mat = re.search(pat, name)
if mat:
return NameMatch(
mat,
None,
mat,
)
return NO_NAME_MATCH
return NameMatch(
re.search(pat, record.matched_term),
re.search(pat, record.name),
re.search(pat, record.preferred_common_name)
if record.preferred_common_name
else None,
)
[docs]def match_pat_list(record, pat_list, scientific_name=False, locale=None):
"""Match all of a list of patterns.
Parameters
----------
record: Taxon
A candidate taxon to match.
exact: list
A list of patterns to match.
Returns
-------
NameMatch
A tuple of ORed search results for every pat for each name in
the record, i.e. each name in the tuple is the match result from
the first matching pattern.
"""
matched = NO_NAME_MATCH
try:
for pat in pat_list:
this_match = match_pat(record, pat, scientific_name, locale)
if this_match == NO_NAME_MATCH:
matched = this_match
raise ValueError("At least one field must match.")
matched = NameMatch(
matched.term or this_match.term,
matched.name or this_match.name,
matched.common or this_match.common,
)
except ValueError:
pass
return matched
[docs]def score_match(
taxon_query: TaxonQuery,
record,
all_terms,
pat_list=None,
scientific_name=False,
locale=None,
):
"""Score a matched record. A higher score is a better match.
Parameters
----------
taxon_query: TaxonQuery
The query for the matched record being scored.
record: Taxon
A candidate taxon to match.
all_terms: re.Pattern
A pattern matching all terms.
pat_list: list
A list of patterns to match.
Returns
-------
int
score < 0 indicates the match is not a valid candidate.
score >= 0 and score < 200 indicates a non-exact match
score >= 200 indicates an exact match either on a phrase or the whole query
"""
score = 0
if taxon_query.taxon_id:
return 1000 # An id is always the best match
matched = (
match_pat_list(record, pat_list, scientific_name, locale)
if pat_list
else NO_NAME_MATCH
)
all_matched = (
match_pat(record, all_terms, scientific_name, locale)
if taxon_query.taxon_id
else NO_NAME_MATCH
)
if scientific_name:
if matched.name:
score = 200
else:
score = -1
elif locale:
if matched.term:
score = 200
else:
score = -1
else:
if taxon_query.code and (taxon_query.code == record.matched_term):
score = 300
elif matched.name or matched.common:
score = 210
elif matched.term:
score = 200
elif all_matched.name or all_matched.common:
score = 120
elif all_matched.term:
score = 110
else:
score = 100
return score
[docs]def match_taxon(taxon_query: TaxonQuery, records, scientific_name=False, locale=None):
"""Match a single taxon for the given query among records returned by API."""
if taxon_query.ranks and not taxon_query.terms:
return records[0] if records else None
pat_list = []
all_terms = re.compile(r"^%s$" % re.escape(" ".join(taxon_query.terms)), re.I)
if taxon_query.phrases:
for phrase in taxon_query.phrases:
pat = re.compile(r"\b%s\b" % re.escape(" ".join(phrase)), re.I)
pat_list.append(pat)
elif scientific_name or locale:
for term in taxon_query.terms:
pat = re.compile(r"\b%s" % re.escape(term), re.I)
pat_list.append(pat)
scores = [0] * len(records)
for num, record in enumerate(records, start=0):
scores[num] = score_match(
taxon_query,
record,
all_terms=all_terms,
pat_list=pat_list,
scientific_name=scientific_name,
locale=locale,
)
best_score = max(scores)
best_record = records[scores.index(best_score)]
min_score_met = (best_score >= 0) and (
(not taxon_query.phrases) or (best_score >= 200)
)
return best_record if min_score_met else None
[docs]async def get_taxon(ctx: Context, taxon_id, **kwargs):
"""Get taxon by id."""
paginator = ctx.inat_client.taxa.from_ids(taxon_id, limit=1, **kwargs)
taxa = await paginator.async_all() if paginator else None
return taxa[0] if taxa else None