File size: 2,987 Bytes
ce500ca
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import argparse
import logging
from pathlib import Path
import sys
import os

# Append the current directory to sys.path
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..',
                "src", "license_plate_detector_ocr", "data")))

from dataset_processing import config_loader, downloader, processor, converter

def main(args):
    logging.basicConfig(filename='dataset_conversion.log', level=logging.INFO,
                        format='%(asctime)s - %(levelname)s - %(message)s')

    config = config_loader.load_config(args.config)
    datasets = config['datasets']
    os.makedirs(args.dataset_base_dir, exist_ok=True)
    os.makedirs(args.output_dir, exist_ok=True)

    # Download datasets
    for idx, ds in enumerate(datasets):
        if ds['type'] == 'kaggle' and 'kaggle' in args.platforms:
            downloader.download_kaggle_dataset(ds['id'], Path(args.dataset_base_dir) / f"dataset_{idx}")
        elif ds['type'] == 'roboflow' and 'roboflow' in args.platforms:
            downloader.download_roboflow_dataset(ds['id'], ds['format'], ds['version'], Path(args.dataset_base_dir) / f"dataset_{idx}", args.roboflow_api_key)
        elif ds['type'] == 'huggingface' and 'huggingface' in args.platforms:
            downloader.download_huggingface_dataset(ds['id'], Path(args.dataset_base_dir) / f"dataset_{idx}")

    # Convert and combine datasets
    converter.coco_kaggle_to_yolo(args.dataset_base_dir, args.output_dir)
    for idx, ds in enumerate(datasets):
        if ds['type'] == 'roboflow' and 'roboflow' in args.platforms:
            converter.copy_dataset_to_combined_folder(Path(args.dataset_base_dir) / f"dataset_{idx}", args.output_dir)
    for idx, ds in enumerate(datasets):
        if ds['type'] == 'huggingface' and 'huggingface' in args.platforms:
            converter.convert_coco_huggingface_to_yolo(
                dataset_base_path=Path(args.dataset_base_dir) / f"dataset_{idx}/license-plate-object-detection/data",
                output_dir=args.output_dir)
    processor.process_folders(args.output_dir)

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Download and process license plate datasets.")
    parser.add_argument("--output-dir", default="./data/yolo_standard_dataset", help="Output directory for YOLOv11 dataset")
    parser.add_argument("--dataset-base-dir", default="./data/all_datasets", help="Base directory for downloaded datasets")
    parser.add_argument("--roboflow-api-key", required='roboflow' in sys.argv, help="Roboflow API key for downloading datasets")
    parser.add_argument("--config", default="./configs/datasets_config.yaml", help="Path to dataset config YAML")
    parser.add_argument("--platforms", nargs="*", default=["kaggle", "roboflow", "huggingface"], choices=["kaggle", "roboflow", "huggingface"], help="Platforms to download (default: all)")
    
    args = parser.parse_args()

    main(args)