""" Retrieve top k candidate standard terms for normalization using oaklib. """ from oaklib import get_adapter from oaklib.datamodels.search import SearchConfiguration adapter = get_adapter("ols:") def get_candidates(term: str, top_k: int = 10) -> list[tuple[str, str]]: """ Get top k candidates for RAG. """ # Set config for search (limit # terms returned) cfg = SearchConfiguration(limit=top_k) results = adapter.basic_search(term, config=cfg) labels = list(adapter.labels(results)) # list of tuples of CURIE ids and labels # Keep both URI and standard terms for "explainable" output # But first convert CURIE IDs to URIs # Add explicit if clause because I saw sometimes the curies were None candidates = list((adapter.curie_to_uri(curie), term) for (curie, term) in labels if curie is not None) return candidates