Spaces:

Agents-MCP-Hackathon
/

BioMedNorm-MCP-Server

Running

App Files Files Community

BioMedNorm-MCP-Server / app.py

RohanKarthikeyan

Removed comments

ca3a4f2 verified 2 days ago

raw

history blame contribute delete

7.98 kB

	"""
	BioMed text normalization MCP server.
	"""

	import asyncio
	from typing import Optional

	import gradio as gr

	from oaklib_utils import get_candidates
	from openai_utils import ask_openai

	NER_PROMPT = """
	You are an expert annotator of biomedical text.

	Annotate/Extract all {entity}(s) in this text: {text}

	Instructions:
	1. If no such entity or entities are found, then return exactly: Not Found
	2. Extract only the entity. If only an abbreviation is present, expand it based on the
	biomedical context in the given paragraph. For e.g., BA12 full form is Brodmann (1909) area 12.
	3. Do not provide any additional information or formatting.

	Do not guess or hallucinate if you are uncertain. This has high-stakes, so it's better to be safe
	than sorry. This is very important, so you'd better be sure of your answer, OK?
	"""

	RAG_PROMPT = """
	You are an expert normalizer of biomedical entities.

	Given the following list of candidate standard terms: {top_k_preds},
	find the single closest matching term for this unnormalized entity: {entity}.

	Instructions:
	1. IMPORTANT: Do NOT guess or hallucinate. Do NOT provide any term that
	is not explicitly present in the list of standardized terms.
	2. Do not overgeneralize unless no match is available.
	3. Do not provide any additional information or formatting.

	This has high-stakes, so it's better to be safe than sorry. This is very important, so you'd better
	be sure of your answer, OK?
	"""


	async def extract_entities(paragraph: str, target_entity: str) -> Optional[list[str]]:
	"""
	Extract entities of a specific type from a given paragraph.

	Args:
	paragraph (str): The paragraph from which entities are to be extracted.
	target_entity (str): The type of entity to extract from the paragraph (e.g., 'disease', 'tissue').

	Returns:
	Optional[list[str]]: A list of extracted entities of the specified type, or
	None if the model did not return a valid response.
	"""
	prompt = NER_PROMPT.format(entity=target_entity, text=paragraph)
	extracted_entities = await ask_openai(prompt, usage="ner")

	return extracted_entities


	async def normalize_entities(
	raw_terms: list[str],
	) -> list[dict[Optional[str], Optional[str]]]:
	"""
	Normalize a list of raw terms to the most appropriate standard terms from a list
	of candidates.

	This function is designed to process the output from extract_entities().

	Args:
	raw_terms (list[str]): List of unnormalized terms, typically from extract_entities().

	Returns:
	list[dict[Optional[str], Optional[str]]]: A list of dictionaries, where each dictionary contains
	the best matching normalized term (key) and its corresponding URI (value). If normalization fails
	for a term, the dictionary will have a None value for both the key and URI.
	"""

	# Do normalization for each entity
	async def process_single_entity(raw_term: str) -> dict[Optional[str], Optional[str]]:
	# Generate candidates specifically for this entity
	# If the oaklib function is not async, wrap it with run_in_executor
	candidates = await asyncio.to_thread(get_candidates, raw_term)
	candidate_std_terms = [candidates for _, candidates in candidates]

	# Now use these entity-specific candidates for the OpenAI call
	prompt = RAG_PROMPT.format(entity=raw_term, top_k_preds=candidate_std_terms)
	result = await ask_openai(prompt, usage="rag")
	if result is not None:
	# Find the URI for the std term returned by LLM
	result_URI = next((URI for URI, term in candidates if term == result), None)
	else:
	result_URI = None

	return {result: result_URI}

	# Process all entities in parallel
	tasks = [process_single_entity(entity) for entity in raw_terms]
	normalized_entities = await asyncio.gather(*tasks)

	return normalized_entities


	async def extract_and_normalize(
	paragraph: str, target_entity: str
	) -> list[dict[Optional[str], Optional[str]]]:
	"""
	Extract entities from a paragraph and normalize them in one operation.

	Args:
	paragraph: The paragraph from which to extract entities.
	target_entity: The type of entity to extract and normalize.

	Returns:
	list[dict[Optional[str], Optional[str]]]: A list of dictionaries, where each dictionary contains
	the best matching normalized term (key) and its corresponding URI (value). If normalization fails
	for a term, the dictionary will have a None value for both the key and URI.
	"""
	target_entity = target_entity.lower()
	extracted_entities = await extract_entities(paragraph, target_entity)
	if extracted_entities is None or extracted_entities == ["Not Found"]:
	return []

	result = await normalize_entities(extracted_entities)
	return result


	def toggle_custom_box(selected: str):
	show = selected == "Custom"
	return (
	gr.Textbox(visible=show, interactive=show),
	gr.Markdown(
	"Warning: This tool is optimized and tested for Disease, Tissue, and Cell Type entities. "
	"While you can input custom entities, results may vary in accuracy and reliability.",
	visible=show,
	),
	)


	# Create the Gradio app
	app_theme = gr.themes.Soft(
	primary_hue="teal",
	secondary_hue="green",
	)

	with gr.Blocks(theme=app_theme) as demo:
	gr.Markdown("""
	# 🧬 BioMedNorm: Entity Extraction & Normalization

	Welcome to the BioMedNorm MCP Server.

	This server is designed to be used by LLMs to extract and standardize biological
	entities (like disease, tissue) from biomedical text.

	Enter the text below, specify the entity type to extract and normalize entities, and voila!
	""")

	# Two-column layout using Row + Column
	with gr.Row():
	with gr.Column():
	paragraph = gr.Textbox(
	label="Text Input",
	placeholder="Enter paragraph here...",
	lines=8,
	info="Enter the biomedical text for entity extraction.",
	)
	target_entity = gr.Dropdown(
	["Disease", "Tissue", "Cell Type", "Custom"],
	label="Entity Type",
	value="Disease",
	# allow_custom_value=True,
	info="Select the type of entity you want to extract and normalize from the text.",
	)
	custom_entity = gr.Textbox(
	label="Custom Entity",
	placeholder="Enter custom entity type here",
	visible=False,
	interactive=True,
	info="Enter your custom entity type if 'Custom' is selected.",
	)
	warning = gr.Markdown(
	visible=False # Initially hidden
	)
	normalize_btn = gr.Button("Normalize", variant="primary")

	with gr.Column():
	output = gr.JSON(label="Normalized Entities")

	# Update custom_entity and warning visibility based on dropdown selection
	target_entity.change(
	fn=toggle_custom_box, inputs=target_entity, outputs=[custom_entity, warning],
	api_name=False
	)

	# Add a loading indicator
	with gr.Row():
	status = gr.Markdown("")

	with gr.Accordion("Example Inputs", open=False):
	gr.Examples(
	examples=[
	["The patient was diagnosed with diabetes and hypertension.", "Disease"],
	[
	"Samples of BA12 tissue, weighing approximately 50-100 mg each, were homogenized in nuclei extraction buffer.",
	"Tissue",
	],
	[
	"Coupling scTCR-seq with scRNA-seq can reveal the relationship between clonotype and phenotype in T or B cell populations.",
	"Cell Type",
	],
	],
	inputs=[paragraph, target_entity],
	)

	# Set up the button click event
	normalize_btn.click(
	lambda: "Processing...", # Show loading immediately
	None,
	status,
	queue=False,
	api_name=False, # Hide this endpoint
	).then(
	extract_and_normalize, # Async processing
	[paragraph, target_entity],
	output,
	).then(
	lambda: "", # Clear status
	None,
	status,
	api_name=False, # Hide this endpoint
	)


	if __name__ == "__main__":
	demo.launch(mcp_server=True)