Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -377,19 +377,22 @@ LORA_NAME = "ohami_filter_autorun"
|
|
377 |
|
378 |
# ========== HELPERS ==========
|
379 |
def create_dataset(images, *captions):
|
380 |
-
|
381 |
-
|
382 |
-
|
383 |
-
|
384 |
-
|
385 |
-
|
386 |
-
|
387 |
-
|
388 |
-
|
389 |
-
|
390 |
-
|
391 |
-
|
392 |
-
|
|
|
|
|
|
|
393 |
|
394 |
def recursive_update(d, u):
|
395 |
for k, v in u.items():
|
|
|
377 |
|
378 |
# ========== HELPERS ==========
|
379 |
def create_dataset(images, *captions):
|
380 |
+
if len(images) != len(captions):
|
381 |
+
raise ValueError("Number of images and captions must be the same.")
|
382 |
+
|
383 |
+
destination_folder = Path(f"/tmp/datasets_{uuid.uuid4()}")
|
384 |
+
destination_folder.mkdir(parents=True, exist_ok=True)
|
385 |
+
|
386 |
+
jsonl_file_path = destination_folder / "metadata.jsonl"
|
387 |
+
|
388 |
+
with jsonl_file_path.open("a", encoding="utf-8") as jsonl_file:
|
389 |
+
for image_path, caption in zip(images, captions):
|
390 |
+
new_image_path = shutil.copy(str(image_path), destination_folder)
|
391 |
+
file_name = Path(new_image_path).name
|
392 |
+
entry = {"file_name": file_name, "prompt": caption}
|
393 |
+
jsonl_file.write(json.dumps(entry, ensure_ascii=False) + "\n")
|
394 |
+
|
395 |
+
return str(destination_folder)
|
396 |
|
397 |
def recursive_update(d, u):
|
398 |
for k, v in u.items():
|