Spaces:
Sleeping
Sleeping
File size: 1,302 Bytes
32519eb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 |
from datasets import load_dataset
import json
import os
from pathlib import Path
def test_medical_dataset():
try:
# Load a small sample of the medical questions dataset
dataset = load_dataset("medical_questions_pairs", split="train[:100]")
print(f"Successfully loaded {len(dataset)} samples from medical_questions_pairs")
# Print sample structure
print("\nSample structure:")
print(json.dumps(dataset[0], indent=2))
return True
except Exception as e:
print(f"Error loading dataset: {str(e)}")
return False
def verify_data_directory():
data_dir = Path("data/raw")
if not data_dir.exists():
print(f"Creating data directory: {data_dir}")
data_dir.mkdir(parents=True, exist_ok=True)
# Check for JSON files
json_files = list(data_dir.glob("*.json"))
if json_files:
print(f"\nFound {len(json_files)} JSON files in data/raw:")
for file in json_files:
print(f"- {file.name}")
else:
print("\nNo JSON files found in data/raw directory")
if __name__ == "__main__":
print("Testing Hugging Face dataset loading...")
test_medical_dataset()
print("\nVerifying data directory structure...")
verify_data_directory() |