radextract / prompt_lib.py
goelak's picture
Initial commit for RadExtract
fab8051
raw
history blame
2.82 kB
"""Processing utilities for radiology report structuring prompts.
This module provides helper functions for processing and formatting prompts
used in the LangExtract system for radiology report structuring.
"""
import dataclasses
import json
from typing import Optional
from langextract.data import ExampleData
from langextract.data_lib import enum_asdict_factory
from prompt_instruction import PROMPT_INSTRUCTION
def clean_dict(obj):
"""Removes null values and empty objects/lists from dictionary recursively.
This function recursively traverses a dictionary or list structure
and removes any keys with null values, empty dictionaries, or empty
lists to create cleaner JSON output for the prompt examples.
Args:
obj: The object to clean (dict, list, or primitive value).
Returns:
The cleaned object with null/empty values removed.
"""
if isinstance(obj, dict):
cleaned = {}
for key, value in obj.items():
cleaned_value = clean_dict(value)
# Only include non-null, non-empty values
if (
cleaned_value is not None
and cleaned_value != {}
and cleaned_value != []
):
cleaned[key] = cleaned_value
return cleaned
elif isinstance(obj, list):
return [clean_dict(item) for item in obj if clean_dict(item) is not None]
else:
return obj
def generate_markdown_prompt(
examples: list[ExampleData], input_text: Optional[str] = None
) -> str:
"""Generate markdown prompt with examples using LangExtract's enum_asdict_factory.
Args:
examples: List of ExampleData objects for few-shot learning
input_text: Optional input text to include in inference example
Returns:
Formatted markdown string containing the complete prompt
"""
examples_list = []
for i, example in enumerate(examples, 1):
example_dict = dataclasses.asdict(example, dict_factory=enum_asdict_factory)
# Clean up null values and empty objects
cleaned_extractions = clean_dict({"extractions": example_dict["extractions"]})
json_output = json.dumps(cleaned_extractions, indent=2)
example_section = f"""## Example {i}
**Input Text:**
```
{example.text}
```
**Expected Output:**
```json
{json_output}
```"""
examples_list.append(example_section)
examples_formatted = "\n\n---\n\n".join(examples_list)
# Format inference section if input text provided
inference_section = ""
if input_text:
inference_section = f"""
## Inference Example:
**Input Text:**
```
{input_text}
```
**Expected Output:**
"""
return PROMPT_INSTRUCTION.format(
examples=examples_formatted, inference_section=inference_section
)