import gradio as gr from os import path from src.services.dataset_upload import init_huggingface, update_dataset from src.services.json_generator import generate_json from src.services.form_parser import form_parser from src.ui.form_components import ( create_header_tab, create_task_tab, create_measures_tab, create_system_tab, create_software_tab, create_infrastructure_tab, create_environment_tab, create_quality_tab ) css_path = path.join(path.dirname(__file__), "./assets/styles/app.css") # Initialize Hugging Face init_huggingface() def handle_submit(*inputs): """Handle form submission with optimized parsing.""" try: # Parse inputs using the structured parser parsed_data = form_parser.parse_inputs(inputs) # Extract data for generate_json function header_params = list(parsed_data["header"].values()) # Task data task_simple = parsed_data["task_simple"] taskFamily, taskStage, nbRequest = task_simple[ "taskFamily"], task_simple["taskStage"], task_simple["nbRequest"] # Dynamic sections - algorithm data algorithms = parsed_data["algorithms"] trainingType = algorithms["trainingType"] algorithmType = algorithms["algorithmType"] algorithmName = algorithms["algorithmName"] algorithmUri = algorithms["algorithmUri"] foundationModelName = algorithms["foundationModelName"] foundationModelUri = algorithms["foundationModelUri"] parametersNumber = algorithms["parametersNumber"] framework = algorithms["framework"] frameworkVersion = algorithms["frameworkVersion"] classPath = algorithms["classPath"] layersNumber = algorithms["layersNumber"] epochsNumber = algorithms["epochsNumber"] optimizer = algorithms["optimizer"] quantization = algorithms["quantization"] # Dynamic sections - dataset data dataset = parsed_data["dataset"] dataUsage = dataset["dataUsage"] dataType = dataset["dataType"] dataFormat = dataset["dataFormat"] dataSize = dataset["dataSize"] dataQuantity = dataset["dataQuantity"] shape = dataset["shape"] source = dataset["source"] sourceUri = dataset["sourceUri"] owner = dataset["owner"] # Task final data task_final = parsed_data["task_final"] measuredAccuracy, estimatedAccuracy, taskDescription = task_final[ "measuredAccuracy"], task_final["estimatedAccuracy"], task_final["taskDescription"] # Measures data measures = parsed_data["measures"] measurementMethod = measures["measurementMethod"] manufacturer = measures["manufacturer"] version = measures["version"] cpuTrackingMode = measures["cpuTrackingMode"] gpuTrackingMode = measures["gpuTrackingMode"] averageUtilizationCpu = measures["averageUtilizationCpu"] averageUtilizationGpu = measures["averageUtilizationGpu"] powerCalibrationMeasurement = measures["powerCalibrationMeasurement"] durationCalibrationMeasurement = measures["durationCalibrationMeasurement"] powerConsumption = measures["powerConsumption"] measurementDuration = measures["measurementDuration"] measurementDateTime = measures["measurementDateTime"] # System data system = parsed_data["system"] osystem, distribution, distributionVersion = system[ "osystem"], system["distribution"], system["distributionVersion"] # Software data software = parsed_data["software"] language, version_software = software["language"], software["version_software"] # Infrastructure data infra_simple = parsed_data["infrastructure_simple"] infraType, cloudProvider, cloudInstance, cloudService = infra_simple["infraType"], infra_simple[ "cloudProvider"], infra_simple["cloudInstance"], infra_simple["cloudService"] # Infrastructure components infra_components = parsed_data["infrastructure_components"] componentName = infra_components["componentName"] componentType = infra_components["componentType"] nbComponent = infra_components["nbComponent"] memorySize = infra_components["memorySize"] manufacturer_infra = infra_components["manufacturer_infra"] family = infra_components["family"] series = infra_components["series"] share = infra_components["share"] # Environment data environment = parsed_data["environment"] country, latitude, longitude, location, powerSupplierType, powerSource, powerSourceCarbonIntensity = environment["country"], environment["latitude"], environment[ "longitude"], environment["location"], environment["powerSupplierType"], environment["powerSource"], environment["powerSourceCarbonIntensity"] # Quality data quality = parsed_data["quality"]["quality"] # Call generate_json with structured parameters message, file_path, json_output = generate_json( *header_params, taskFamily, taskStage, nbRequest, trainingType, algorithmType, algorithmName, algorithmUri, foundationModelName, foundationModelUri, parametersNumber, framework, frameworkVersion, classPath, layersNumber, epochsNumber, optimizer, quantization, dataUsage, dataType, dataFormat, dataSize, dataQuantity, shape, source, sourceUri, owner, measuredAccuracy, estimatedAccuracy, taskDescription, measurementMethod, manufacturer, version, cpuTrackingMode, gpuTrackingMode, averageUtilizationCpu, averageUtilizationGpu, powerCalibrationMeasurement, durationCalibrationMeasurement, powerConsumption, measurementDuration, measurementDateTime, osystem, distribution, distributionVersion, language, version_software, infraType, cloudProvider, cloudInstance, cloudService, componentName, componentType, nbComponent, memorySize, manufacturer_infra, family, series, share, country, latitude, longitude, location, powerSupplierType, powerSource, powerSourceCarbonIntensity, quality ) except Exception as e: return f"Error: {e}", None, "", gr.Button("Share your data to the public repository", interactive=False, elem_classes="pubbutton") # Check if the message indicates validation failure if message.startswith("The json file does not correspond"): publish_button = gr.Button( "Share your data to the public repository", interactive=False, elem_classes="pubbutton") return message, file_path, json_output, publish_button publish_button = gr.Button( "Share your data to the public repository", interactive=True, elem_classes="pubbutton") return "Report sucessefully created", file_path, json_output, publish_button def handle_publi(file_path, json_output): """Handle publication to Hugging Face dataset with improved error handling.""" try: if not file_path or not json_output: return "Error: No file or data to publish." # If validation passed, proceed to update_dataset update_output = update_dataset(file_path, json_output) return update_output except Exception as e: return f"Error during publication: {str(e)}" # Create Gradio interface with gr.Blocks(css_paths=css_path) as app: gr.Markdown("## Data Collection Form") gr.Markdown("Welcome to this Huggingface space, where you can create a report on the energy consumption of an AI task in BoAmps format, by filling in a form.
" "Parts/fields in bold red are mandatory.") # Create form tabs header_components = create_header_tab() task_components = create_task_tab() measures_components = create_measures_tab() system_components = create_system_tab() software_components = create_software_tab() infrastructure_components = create_infrastructure_tab() environment_components = create_environment_tab() quality_components = create_quality_tab() # Submit and Download Buttons submit_button = gr.Button("Submit", elem_classes="subbutton") output = gr.Textbox(label="Output", lines=1) json_output = gr.Textbox(visible=False) json_file = gr.File(label="Downloadable JSON") publish_button = gr.Button( "Share your data to the public repository", interactive=False, elem_classes="pubbutton") # Event Handlers - Optimized input flattening def flatten_inputs(components): """ Recursively flatten nested lists of components with improved performance. Uses iterative approach and generator expressions for better memory efficiency. """ result = [] stack = list(reversed(components)) # Use stack to avoid recursion while stack: item = stack.pop() if isinstance(item, list): # Add items in reverse order to maintain original sequence stack.extend(reversed(item)) else: result.append(item) return result all_inputs = flatten_inputs(header_components + task_components + measures_components + system_components + software_components + infrastructure_components + environment_components + quality_components) # Validate input count matches expected structure expected_count = form_parser.get_total_input_count() if len(all_inputs) != expected_count: print( f"Warning: Input count mismatch. Expected {expected_count}, got {len(all_inputs)}") submit_button.click( handle_submit, inputs=all_inputs, outputs=[output, json_file, json_output, publish_button] ) # Event Handlers publish_button.click( handle_publi, inputs=[ json_file, json_output ], outputs=[output] ) if __name__ == "__main__": app.launch()