BioMedNorm-MCP-Server / oaklib_utils.py
RohanKarthikeyan's picture
Explicit removal of NoneType CURIE Ids
c8b6532 verified
raw
history blame contribute delete
847 Bytes
"""
Retrieve top k candidate standard terms for normalization using oaklib.
"""
from oaklib import get_adapter
from oaklib.datamodels.search import SearchConfiguration
adapter = get_adapter("ols:")
def get_candidates(term: str, top_k: int = 10) -> list[tuple[str, str]]:
"""
Get top k candidates for RAG.
"""
# Set config for search (limit # terms returned)
cfg = SearchConfiguration(limit=top_k)
results = adapter.basic_search(term, config=cfg)
labels = list(adapter.labels(results)) # list of tuples of CURIE ids and labels
# Keep both URI and standard terms for "explainable" output
# But first convert CURIE IDs to URIs
# Add explicit if clause because I saw sometimes the curies were None
candidates = list((adapter.curie_to_uri(curie), term) for (curie, term) in labels if curie is not None)
return candidates