Spaces:
Runtime error
Runtime error
import os | |
import re | |
from urllib.parse import urlparse | |
import openai | |
import base64 | |
import requests | |
import json | |
from dotenv import load_dotenv | |
products = ( | |
"YOUR ROLE:\n" | |
"You are an image tagging expert that works for an online shopping company, " | |
"your job is to meticulously tag every image given in a lot of detail with " | |
"the purpose of using those tags for improved user search and a recommender algorithm\n\n" | |
"UNDERSTANDING:\n" | |
"First, explain your understanding of what the product is, what it might be used for " | |
"and who it might appeal to. Use that understanding to put the rest of the analysis and tagging into context\n\n" | |
"TAGGING:\n" | |
"You need to generate 2 types of tags and output them in a JSON format in a single file. " | |
"So to start with, think about what kind of tags would be useful for someone searching " | |
"for an item to enhance the recommender algorithm.\n\n" | |
"1) Image descriptions: generate tags between 20 and 50 tags that describe the item in a " | |
"useful way. Include descriptive tags about the image, then think about the tags that would " | |
"be useful for someone to find this image or for a recommender algorithm to perform well.\n\n" | |
"2) Category tags: there are high level categories for products, you need to first put them into " | |
"the appropriate categories they are, and then for each category, you need to list sub-categories " | |
"(description given, but please come up with relevant items yourself) - there could be more than " | |
"one sub-category:\n" | |
"- How is it for [target audience]\n" | |
"- Types [product type]\n" | |
"- Style [styles]\n" | |
"- Purpose [how and where the item would be used]\n\n" | |
"Please respond in a JSON format, in the following structure:\n\n" | |
"{\n" | |
' "product": {\n' | |
' "title": "{{ Product Title }}",\n' | |
' "description": "{{ Description of the Product\'s Features and Usage }}",\n' | |
' "appeal": "{{ Who the product might appeal to based on demographics like age, gender, interests, etc }}"\n' | |
" },\n" | |
' "description_tags": [\n' | |
' "{{ Description Tag 1 }}",\n' | |
' "{{ Description Tag 2 }}",\n' | |
" // ... additional description tags as needed, between 20 to 50 tags\n" | |
" ],\n" | |
' "category_tags": {\n' | |
' "target_audience": ["{{ Target Audience Option 1 }}", "{{ Target Audience Option 2 }}", "..."],\n' | |
' "product_type": ["{{ Product Type Option 1 }}", "{{ Product Type Option 2 }}", "..."],\n' | |
' "style": ["{{ Style Option 1 }}", "{{ Style Option 2 }}", "..."],\n' | |
' "purpose": ["{{ Purpose Option 1 }}", "{{ Purpose Option 2 }}", "..."]\n' | |
" }\n" | |
"You MUST ONLY output JSON, no other text is permitted.\n" | |
"}" | |
) | |
# Function to encode a single image | |
def encode_image(image_path): | |
with open(image_path, "rb") as image_file: | |
return base64.b64encode(image_file.read()).decode("utf-8") | |
# Function to save response content | |
def save_response_content(garments_json, image_name, content): | |
json_string = content.replace("```json", "").replace("```", "").strip() | |
data = json.loads(json_string) | |
file_name = image_name.split("/")[-1] | |
data["product"]["image"] = f"http://gen-ai.local:8000/static/garments/{file_name}" | |
data["product"]["id"] = file_name.split(".")[-2] | |
# Check if file exists and has content | |
try: | |
with open(garments_json, "r+") as file: | |
# Read current data from file | |
file_data = json.load(file) | |
# Append new data | |
file_data.append(data) | |
# Set file's current position at offset | |
file.seek(0) | |
# Update JSON file | |
json.dump(file_data, file, indent=4) | |
except (FileNotFoundError, json.JSONDecodeError): | |
# Create new file with initial data | |
with open(garments_json, "w") as file: | |
json.dump([data], file, indent=4) | |
def process_image_file(image_path, garments_json): | |
# Set the OpenAI API key from environment variables | |
openai.api_key = os.environ["OPENAI_API_KEY"] | |
base64_image = encode_image(image_path) | |
# Prepare the image content for payload | |
image_content = { | |
"type": "image_url", | |
"image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}, | |
} | |
# The text content remains the same | |
text_content = {"type": "text", "text": products} | |
# Combine the text content with the image contents | |
combined_contents = [text_content] + [image_content] | |
headers = { | |
"Content-Type": "application/json", | |
"Authorization": f"Bearer {openai.api_key}", | |
} | |
payload = { | |
"model": "gpt-4-vision-preview", | |
"messages": [{"role": "user", "content": combined_contents}], | |
"max_tokens": 4000, | |
} | |
retry = 5 | |
while retry > 0: | |
response = requests.post( | |
"https://api.openai.com/v1/chat/completions", headers=headers, json=payload | |
) | |
retry -= 1 | |
# Check if the response was successful | |
if response.status_code == 200: | |
response_json = response.json() | |
try: | |
content = response_json["choices"][0]["message"]["content"] | |
# print(content) | |
# Save the 'content' part of the response along with the image name | |
save_response_content(garments_json, image_path, content) | |
print(f"Response content saved to '{garments_json}'.") | |
break | |
except KeyError: | |
print("The 'choices' key is missing in the response. Full response:") | |
print(response_json) | |
else: | |
print( | |
f"Failed to get a successful response. Status code: {response.status_code}" | |
) | |
print("Full response:") | |
print(response.text) | |
if retry > 0: | |
print("Retrying ...") | |
def merge_array(garments, products): | |
ootd_server_url = os.environ.get("OOTD_SERVER_URL") | |
for garment in garments: | |
image = garment["product"]["image"] | |
parsed_url = urlparse(garment["product"]["image"]) | |
garment["product"]["image"] = f"{ootd_server_url}{parsed_url.path}" | |
id = re.split("[/.]", image)[-2] | |
garment["product"]["id"] = id | |
garment["product"]["name"] = garment["product"]["title"] | |
for product in products: | |
if product["id"] == id: | |
garment["product"].update(product) | |
break | |
garments.sort(key=lambda garment: int(garment["product"]["id"])) | |
return garments | |
def merge(garments_json_path, manifest_json_path): | |
# Check if file exists and has content | |
with open(manifest_json_path, "r") as manifest_file: | |
products = json.load(manifest_file) | |
with open(garments_json_path, "r+") as file: | |
# Read current data from file | |
file_data = json.load(file) | |
# Append new data | |
file_data = merge_array(file_data, products) | |
# Set file's current position at offset | |
file.seek(0) | |
# Update JSON file | |
json.dump(file_data, file, indent=4) | |
print( | |
f"Garments JSON file updated with products from '{manifest_json_path}'." | |
) | |
def garmant_id_processed(garments_json_path, id): | |
if os.path.exists(garments_json_path): | |
with open(garments_json_path, "r") as file: | |
garments = json.load(file) | |
for garment in garments: | |
product_id = re.split("[/.]", garment["product"]["image"])[-2] | |
if product_id == id: | |
return True | |
return False | |
if __name__ == "__main__": | |
load_dotenv(override=False) | |
garments_image_path = os.environ.get("GARMENT_IMAGES_PATH") or "../Assets/garments" | |
manifest_json_path = ( | |
os.environ.get("GARMENT_MANIFEST_FILE_PATH") | |
or "../Assets/garments/manifest.json" | |
) | |
garments_json_path = os.environ.get("GARMENTS_FILE_PATH") or "./data/garments.json" | |
for root, dirs, files in os.walk(garments_image_path): | |
for file in files: | |
image_path = f"{root}/{file}" | |
parts = re.split("[/.]", file) | |
if parts[-1] == "jpg": | |
print(f"Processing image file {image_path}...") | |
id = parts[-2] | |
if garmant_id_processed(garments_json_path, id): | |
print(f"Garment with id '{id}' already processed.") | |
continue | |
process_image_file(image_path, garments_json_path) | |
merge(garments_json_path, manifest_json_path) | |