Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
"""Processing utilities for radiology report structuring prompts. | |
This module provides helper functions for processing and formatting prompts | |
used in the LangExtract system for radiology report structuring. | |
""" | |
import dataclasses | |
import json | |
from typing import Optional | |
from langextract.data import ExampleData | |
from langextract.data_lib import enum_asdict_factory | |
from prompt_instruction import PROMPT_INSTRUCTION | |
def clean_dict(obj): | |
"""Removes null values and empty objects/lists from dictionary recursively. | |
This function recursively traverses a dictionary or list structure | |
and removes any keys with null values, empty dictionaries, or empty | |
lists to create cleaner JSON output for the prompt examples. | |
Args: | |
obj: The object to clean (dict, list, or primitive value). | |
Returns: | |
The cleaned object with null/empty values removed. | |
""" | |
if isinstance(obj, dict): | |
cleaned = {} | |
for key, value in obj.items(): | |
cleaned_value = clean_dict(value) | |
# Only include non-null, non-empty values | |
if ( | |
cleaned_value is not None | |
and cleaned_value != {} | |
and cleaned_value != [] | |
): | |
cleaned[key] = cleaned_value | |
return cleaned | |
elif isinstance(obj, list): | |
return [clean_dict(item) for item in obj if clean_dict(item) is not None] | |
else: | |
return obj | |
def generate_markdown_prompt( | |
examples: list[ExampleData], input_text: Optional[str] = None | |
) -> str: | |
"""Generate markdown prompt with examples using LangExtract's enum_asdict_factory. | |
Args: | |
examples: List of ExampleData objects for few-shot learning | |
input_text: Optional input text to include in inference example | |
Returns: | |
Formatted markdown string containing the complete prompt | |
""" | |
examples_list = [] | |
for i, example in enumerate(examples, 1): | |
example_dict = dataclasses.asdict(example, dict_factory=enum_asdict_factory) | |
# Clean up null values and empty objects | |
cleaned_extractions = clean_dict({"extractions": example_dict["extractions"]}) | |
json_output = json.dumps(cleaned_extractions, indent=2) | |
example_section = f"""## Example {i} | |
**Input Text:** | |
``` | |
{example.text} | |
``` | |
**Expected Output:** | |
```json | |
{json_output} | |
```""" | |
examples_list.append(example_section) | |
examples_formatted = "\n\n---\n\n".join(examples_list) | |
# Format inference section if input text provided | |
inference_section = "" | |
if input_text: | |
inference_section = f""" | |
## Inference Example: | |
**Input Text:** | |
``` | |
{input_text} | |
``` | |
**Expected Output:** | |
""" | |
return PROMPT_INSTRUCTION.format( | |
examples=examples_formatted, inference_section=inference_section | |
) | |