Spaces:
Sleeping
Sleeping
import json | |
import os | |
import time | |
import logging | |
# Helper function to ensure directory exists | |
def ensure_directory_exists(filepath): | |
"""Ensure the directory for a given file path exists.""" | |
directory = os.path.dirname(filepath) | |
if not os.path.exists(directory): | |
os.makedirs(directory) | |
# Helper function for adaptive delay | |
def adaptive_delay(attempt, max_delay=60): | |
"""Increase wait time with each retry.""" | |
delay = min(5 * attempt, max_delay) # Max delay of max_delay seconds | |
logging.info(f"Retrying after {delay} seconds...") | |
time.sleep(delay) | |
def update_config(config, model_name=None, noise_rate=None, num_queries=None): | |
""" | |
Update the config dictionary with user-provided values. | |
Args: | |
config (dict): The configuration dictionary to update. | |
model_name (str, optional): The model name to update in the config. | |
noise_rate (float, optional): The noise rate to update in the config. | |
num_queries (int, optional): The number of queries to update in the config. | |
Returns: | |
dict: The updated configuration dictionary. | |
""" | |
if model_name: | |
config["model_name"] = model_name | |
if noise_rate is not None: # Explicitly check for None to handle 0.0 | |
config["noise_rate"] = float(noise_rate) # Ensure it's a float | |
if num_queries is not None: # Explicitly check for None to handle 0 | |
config["num_queries"] = int(num_queries) # Ensure it's an integer | |
return config | |
def load_dataset(file_name): | |
dataset = [] | |
with open('data/' + file_name, "r", encoding="utf-8") as f: | |
for line in f: | |
dataset.append(json.loads(line.strip())) # Load each JSON object per line | |
logging.info(f"Loaded {len(dataset)} entries from file {file_name}") # Check how many records were loaded | |
return dataset |