""" | |
Retrieve top k candidate standard terms for normalization using oaklib. | |
""" | |
from oaklib import get_adapter | |
from oaklib.datamodels.search import SearchConfiguration | |
adapter = get_adapter("ols:") | |
def get_candidates(term: str, top_k: int = 10) -> list[tuple[str, str]]: | |
""" | |
Get top k candidates for RAG. | |
""" | |
# Set config for search (limit # terms returned) | |
cfg = SearchConfiguration(limit=top_k) | |
results = adapter.basic_search(term, config=cfg) | |
labels = list(adapter.labels(results)) # list of tuples of CURIE ids and labels | |
# Keep both URI and standard terms for "explainable" output | |
# But first convert CURIE IDs to URIs | |
# Add explicit if clause because I saw sometimes the curies were None | |
candidates = list((adapter.curie_to_uri(curie), term) for (curie, term) in labels if curie is not None) | |
return candidates | |