Spaces:
Sleeping
Sleeping
code refacto and cleaning
Browse files- README.md +92 -9
- app.py +129 -124
- src/services/{huggingface.py → dataset_upload.py} +0 -0
- src/services/form_parser.py +147 -0
- src/services/json_generator.py +152 -204
- src/services/report_builder.py +273 -0
README.md
CHANGED
@@ -11,20 +11,103 @@ license: apache-2.0
|
|
11 |
short_description: Create a report in BoAmps format
|
12 |
---
|
13 |
|
|
|
|
|
14 |
This tool is part of the initiative [BoAmps](https://github.com/Boavizta/BoAmps).
|
15 |
The purpose of the BoAmps project is to build a large, open, database of energy consumption of IT / AI tasks depending on data nature, algorithms, hardware, etc., in order to improve energy efficiency approaches based on empiric knowledge.
|
16 |
|
17 |
This space was initiated by a group of students from Sud Telecom Paris, many thanks to [Hicham FILALI](https://huggingface.co/FILALIHicham) for his work.
|
18 |
|
19 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
|
21 |
-
|
22 |
-
|
23 |
-
|
|
|
|
|
|
|
24 |
|
25 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
|
27 |
-
|
28 |
-
Activate it: >.\.venv\Scripts\activate
|
29 |
-
Install dependencies: >pipenv install -d
|
30 |
-
Launch the application: pipenv run python main.py
|
|
|
11 |
short_description: Create a report in BoAmps format
|
12 |
---
|
13 |
|
14 |
+
# BoAmps Report Creation Tool 🌿
|
15 |
+
|
16 |
This tool is part of the initiative [BoAmps](https://github.com/Boavizta/BoAmps).
|
17 |
The purpose of the BoAmps project is to build a large, open, database of energy consumption of IT / AI tasks depending on data nature, algorithms, hardware, etc., in order to improve energy efficiency approaches based on empiric knowledge.
|
18 |
|
19 |
This space was initiated by a group of students from Sud Telecom Paris, many thanks to [Hicham FILALI](https://huggingface.co/FILALIHicham) for his work.
|
20 |
|
21 |
+
## 🚀 Quick Start
|
22 |
+
|
23 |
+
### Prerequisites
|
24 |
+
- **Python** >= 3.12
|
25 |
+
|
26 |
+
|
27 |
+
### Installation Steps
|
28 |
+
|
29 |
+
1. **Clone the repository**
|
30 |
+
|
31 |
+
2. **Create and activate virtual environment (not mandatory)**
|
32 |
+
```bash
|
33 |
+
# Windows
|
34 |
+
python -m venv .venv
|
35 |
+
.\.venv\Scripts\activate
|
36 |
+
|
37 |
+
# Linux/MacOS
|
38 |
+
python -m venv .venv
|
39 |
+
source .venv/bin/activate
|
40 |
+
```
|
41 |
+
|
42 |
+
3. **Install dependencies**
|
43 |
+
```bash
|
44 |
+
pip install pipenv
|
45 |
+
pipenv install --dev
|
46 |
+
```
|
47 |
+
|
48 |
+
4. **Launch the application**
|
49 |
+
```bash
|
50 |
+
python ./app.py
|
51 |
+
```
|
52 |
+
|
53 |
+
5. **Access the application**
|
54 |
+
- Open your browser and go to `http://localhost:7860`
|
55 |
+
- The Gradio interface will be available for creating BoAmps reports
|
56 |
+
|
57 |
+
## 🏗️ Architecture Overview
|
58 |
+
|
59 |
+
### Core Components
|
60 |
+
|
61 |
+
1. **`app.py`** - Main application file
|
62 |
+
- Initializes the Gradio interface
|
63 |
+
- Orchestrates all UI components
|
64 |
+
- Handles application routing and main logic
|
65 |
|
66 |
+
2. **Services Layer (`src/services/`)**
|
67 |
+
- **`json_generator.py`**: Generates BoAmps-compliant JSON reports
|
68 |
+
- **`report_builder.py`**: Constructs structured report data
|
69 |
+
- **`form_parser.py`**: Processes and validates form inputs
|
70 |
+
- **`dataset_upload.py`**: Manages Hugging Face dataset integration
|
71 |
+
- **`util.py`**: Common utility functions
|
72 |
|
73 |
+
3. **UI Layer (`src/ui/`)**
|
74 |
+
- **`form_components.py`**: Gradio interface components for different report sections
|
75 |
+
|
76 |
+
4. **Assets & Validation (`assets/`)**
|
77 |
+
- **`validation.py`**: BoAmps schema validation logic
|
78 |
+
- **`app.css`**: Application styling
|
79 |
+
|
80 |
+
### Data Flow
|
81 |
+
|
82 |
+
```
|
83 |
+
User Input (Gradio Form)
|
84 |
+
↓
|
85 |
+
Form Parser & Validation
|
86 |
+
↓
|
87 |
+
JSON Generator
|
88 |
+
↓
|
89 |
+
Report Builder
|
90 |
+
↓
|
91 |
+
BoAmps Schema Validation
|
92 |
+
↓
|
93 |
+
JSON Report Output
|
94 |
+
```
|
95 |
+
|
96 |
+
## 🤝 Contributing
|
97 |
+
|
98 |
+
Contributions are welcome! Please:
|
99 |
+
|
100 |
+
1. Fork the repository
|
101 |
+
2. Create a feature branch
|
102 |
+
3. Make your changes
|
103 |
+
4. Submit a pull request
|
104 |
+
|
105 |
+
## 📄 License
|
106 |
+
|
107 |
+
This project is licensed under the Apache 2.0 License - see the license information in the repository header.
|
108 |
+
|
109 |
+
## 🙏 Acknowledgments
|
110 |
+
|
111 |
+
This space was initiated by a group of students from Sud Telecom Paris, many thanks to [Hicham FILALI](https://huggingface.co/FILALIHicham) for his work.
|
112 |
|
113 |
+
For more information about the BoAmps initiative, visit the [official repository](https://github.com/Boavizta/BoAmps).
|
|
|
|
|
|
app.py
CHANGED
@@ -1,7 +1,8 @@
|
|
1 |
import gradio as gr
|
2 |
from os import path
|
3 |
-
from src.services.
|
4 |
from src.services.json_generator import generate_json
|
|
|
5 |
from src.ui.form_components import (
|
6 |
create_header_tab,
|
7 |
create_task_tab,
|
@@ -19,120 +20,102 @@ init_huggingface()
|
|
19 |
|
20 |
|
21 |
def handle_submit(*inputs):
|
22 |
-
|
23 |
-
# We need to group the flattened dynamic components back into lists
|
24 |
-
|
25 |
-
idx = 0
|
26 |
-
|
27 |
-
# Header (11 components)
|
28 |
-
header_params = inputs[idx:idx+11]
|
29 |
-
idx += 11
|
30 |
-
|
31 |
-
# Task simple (3 components)
|
32 |
-
taskFamily, taskStage, nbRequest = inputs[idx:idx+3]
|
33 |
-
idx += 3
|
34 |
-
|
35 |
-
# Task algorithms (14 fields × 5 rows = 70 components)
|
36 |
-
algorithm_flat = inputs[idx:idx+70]
|
37 |
-
idx += 70
|
38 |
-
|
39 |
-
# Reconstruct algorithm lists (14 fields)
|
40 |
-
# The components are organized by field first, then row
|
41 |
-
trainingType = algorithm_flat[0:5] # positions 0-4
|
42 |
-
algorithmType = algorithm_flat[5:10] # positions 5-9
|
43 |
-
algorithmName = algorithm_flat[10:15] # positions 10-14
|
44 |
-
algorithmUri = algorithm_flat[15:20] # positions 15-19
|
45 |
-
foundationModelName = algorithm_flat[20:25] # positions 20-24
|
46 |
-
foundationModelUri = algorithm_flat[25:30] # positions 25-29
|
47 |
-
parametersNumber = algorithm_flat[30:35] # positions 30-34
|
48 |
-
framework = algorithm_flat[35:40] # positions 35-39
|
49 |
-
frameworkVersion = algorithm_flat[40:45] # positions 40-44
|
50 |
-
classPath = algorithm_flat[45:50] # positions 45-49
|
51 |
-
layersNumber = algorithm_flat[50:55] # positions 50-54
|
52 |
-
epochsNumber = algorithm_flat[55:60] # positions 55-59
|
53 |
-
optimizer = algorithm_flat[60:65] # positions 60-64
|
54 |
-
quantization = algorithm_flat[65:70] # positions 65-69
|
55 |
-
|
56 |
-
# Task dataset (9 fields × 5 rows = 45 components)
|
57 |
-
dataset_flat = inputs[idx:idx+45]
|
58 |
-
idx += 45
|
59 |
-
|
60 |
-
# Reconstruct dataset lists (9 fields)
|
61 |
-
# The components are organized by field first, then row:
|
62 |
-
# dataUsage[0-4], dataType[0-4], dataFormat[0-4], etc.
|
63 |
-
dataUsage = dataset_flat[0:5] # positions 0-4
|
64 |
-
dataType = dataset_flat[5:10] # positions 5-9
|
65 |
-
dataFormat = dataset_flat[10:15] # positions 10-14
|
66 |
-
dataSize = dataset_flat[15:20] # positions 15-19
|
67 |
-
dataQuantity = dataset_flat[20:25] # positions 20-24
|
68 |
-
shape = dataset_flat[25:30] # positions 25-29
|
69 |
-
source = dataset_flat[30:35] # positions 30-34
|
70 |
-
sourceUri = dataset_flat[35:40] # positions 35-39
|
71 |
-
owner = dataset_flat[40:45] # positions 40-44
|
72 |
-
|
73 |
-
# Task final (3 components)
|
74 |
-
measuredAccuracy, estimatedAccuracy, taskDescription = inputs[idx:idx+3]
|
75 |
-
idx += 3
|
76 |
-
|
77 |
-
# Measures dynamic section (12 fields × 5 rows = 60 components)
|
78 |
-
measures_flat = inputs[idx:idx+60]
|
79 |
-
idx += 60
|
80 |
-
|
81 |
-
# Reconstruct measures lists (12 fields)
|
82 |
-
# The components are organized by field first, then row
|
83 |
-
measurementMethod = measures_flat[0:5] # positions 0-4
|
84 |
-
manufacturer = measures_flat[5:10] # positions 5-9
|
85 |
-
version = measures_flat[10:15] # positions 10-14
|
86 |
-
cpuTrackingMode = measures_flat[15:20] # positions 15-19
|
87 |
-
gpuTrackingMode = measures_flat[20:25] # positions 20-24
|
88 |
-
averageUtilizationCpu = measures_flat[25:30] # positions 25-29
|
89 |
-
averageUtilizationGpu = measures_flat[30:35] # positions 30-34
|
90 |
-
powerCalibrationMeasurement = measures_flat[35:40] # positions 35-39
|
91 |
-
durationCalibrationMeasurement = measures_flat[40:45] # positions 40-44
|
92 |
-
powerConsumption = measures_flat[45:50] # positions 45-49
|
93 |
-
measurementDuration = measures_flat[50:55] # positions 50-54
|
94 |
-
measurementDateTime = measures_flat[55:60] # positions 55-59
|
95 |
-
# System (3 components)
|
96 |
-
osystem, distribution, distributionVersion = inputs[idx:idx+3]
|
97 |
-
idx += 3
|
98 |
-
|
99 |
-
# Software (2 components)
|
100 |
-
language, version_software = inputs[idx:idx+2]
|
101 |
-
idx += 2
|
102 |
-
|
103 |
-
# Infrastructure simple (4 components)
|
104 |
-
infraType, cloudProvider, cloudInstance, cloudService = inputs[idx:idx+4]
|
105 |
-
idx += 4
|
106 |
-
|
107 |
-
# Infrastructure components dynamic section (8 fields × 5 rows = 40 components)
|
108 |
-
infra_flat = inputs[idx:idx+40]
|
109 |
-
idx += 40
|
110 |
-
|
111 |
-
# Reconstruct infrastructure component lists (8 fields)
|
112 |
-
# The components are organized by field first, then row
|
113 |
-
componentName = infra_flat[0:5] # positions 0-4
|
114 |
-
componentType = infra_flat[5:10] # positions 5-9
|
115 |
-
nbComponent = infra_flat[10:15] # positions 10-14
|
116 |
-
memorySize = infra_flat[15:20] # positions 15-19
|
117 |
-
manufacturer_infra = infra_flat[20:25] # positions 20-24
|
118 |
-
family = infra_flat[25:30] # positions 25-29
|
119 |
-
series = infra_flat[30:35] # positions 30-34
|
120 |
-
share = infra_flat[35:40] # positions 35-39
|
121 |
-
|
122 |
-
# Environment (7 components)
|
123 |
-
country, latitude, longitude, location, powerSupplierType, powerSource, powerSourceCarbonIntensity = inputs[
|
124 |
-
idx:idx+7]
|
125 |
-
idx += 7
|
126 |
-
|
127 |
-
# Quality (1 component)
|
128 |
-
quality = inputs[idx]
|
129 |
-
idx += 1
|
130 |
-
|
131 |
-
# Continue with other sections - for now, take the remaining as they were
|
132 |
-
remaining_params = inputs[idx:]
|
133 |
-
|
134 |
-
# Call generate_json with reconstructed parameters
|
135 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
136 |
message, file_path, json_output = generate_json(
|
137 |
*header_params,
|
138 |
taskFamily, taskStage, nbRequest,
|
@@ -152,6 +135,7 @@ def handle_submit(*inputs):
|
|
152 |
powerSupplierType, powerSource, powerSourceCarbonIntensity,
|
153 |
quality
|
154 |
)
|
|
|
155 |
except Exception as e:
|
156 |
return f"Error: {e}", None, "", gr.Button("Share your data to the public repository", interactive=False, elem_classes="pubbutton")
|
157 |
|
@@ -168,9 +152,16 @@ def handle_submit(*inputs):
|
|
168 |
|
169 |
|
170 |
def handle_publi(file_path, json_output):
|
171 |
-
|
172 |
-
|
173 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
174 |
|
175 |
|
176 |
# Create Gradio interface
|
@@ -197,21 +188,35 @@ with gr.Blocks(css_paths=css_path) as app:
|
|
197 |
publish_button = gr.Button(
|
198 |
"Share your data to the public repository", interactive=False, elem_classes="pubbutton")
|
199 |
|
200 |
-
# Event Handlers -
|
201 |
def flatten_inputs(components):
|
202 |
-
"""
|
203 |
-
|
204 |
-
for
|
|
|
|
|
|
|
|
|
|
|
|
|
205 |
if isinstance(item, list):
|
206 |
-
|
|
|
207 |
else:
|
208 |
-
|
209 |
-
|
|
|
210 |
|
211 |
all_inputs = flatten_inputs(header_components + task_components + measures_components +
|
212 |
system_components + software_components + infrastructure_components +
|
213 |
environment_components + quality_components)
|
214 |
|
|
|
|
|
|
|
|
|
|
|
|
|
215 |
submit_button.click(
|
216 |
handle_submit,
|
217 |
inputs=all_inputs,
|
|
|
1 |
import gradio as gr
|
2 |
from os import path
|
3 |
+
from src.services.dataset_upload import init_huggingface, update_dataset
|
4 |
from src.services.json_generator import generate_json
|
5 |
+
from src.services.form_parser import form_parser
|
6 |
from src.ui.form_components import (
|
7 |
create_header_tab,
|
8 |
create_task_tab,
|
|
|
20 |
|
21 |
|
22 |
def handle_submit(*inputs):
|
23 |
+
"""Handle form submission with optimized parsing."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
try:
|
25 |
+
# Parse inputs using the structured parser
|
26 |
+
parsed_data = form_parser.parse_inputs(inputs)
|
27 |
+
|
28 |
+
# Extract data for generate_json function
|
29 |
+
header_params = list(parsed_data["header"].values())
|
30 |
+
|
31 |
+
# Task data
|
32 |
+
task_simple = parsed_data["task_simple"]
|
33 |
+
taskFamily, taskStage, nbRequest = task_simple[
|
34 |
+
"taskFamily"], task_simple["taskStage"], task_simple["nbRequest"]
|
35 |
+
|
36 |
+
# Dynamic sections - algorithm data
|
37 |
+
algorithms = parsed_data["algorithms"]
|
38 |
+
trainingType = algorithms["trainingType"]
|
39 |
+
algorithmType = algorithms["algorithmType"]
|
40 |
+
algorithmName = algorithms["algorithmName"]
|
41 |
+
algorithmUri = algorithms["algorithmUri"]
|
42 |
+
foundationModelName = algorithms["foundationModelName"]
|
43 |
+
foundationModelUri = algorithms["foundationModelUri"]
|
44 |
+
parametersNumber = algorithms["parametersNumber"]
|
45 |
+
framework = algorithms["framework"]
|
46 |
+
frameworkVersion = algorithms["frameworkVersion"]
|
47 |
+
classPath = algorithms["classPath"]
|
48 |
+
layersNumber = algorithms["layersNumber"]
|
49 |
+
epochsNumber = algorithms["epochsNumber"]
|
50 |
+
optimizer = algorithms["optimizer"]
|
51 |
+
quantization = algorithms["quantization"]
|
52 |
+
|
53 |
+
# Dynamic sections - dataset data
|
54 |
+
dataset = parsed_data["dataset"]
|
55 |
+
dataUsage = dataset["dataUsage"]
|
56 |
+
dataType = dataset["dataType"]
|
57 |
+
dataFormat = dataset["dataFormat"]
|
58 |
+
dataSize = dataset["dataSize"]
|
59 |
+
dataQuantity = dataset["dataQuantity"]
|
60 |
+
shape = dataset["shape"]
|
61 |
+
source = dataset["source"]
|
62 |
+
sourceUri = dataset["sourceUri"]
|
63 |
+
owner = dataset["owner"]
|
64 |
+
|
65 |
+
# Task final data
|
66 |
+
task_final = parsed_data["task_final"]
|
67 |
+
measuredAccuracy, estimatedAccuracy, taskDescription = task_final[
|
68 |
+
"measuredAccuracy"], task_final["estimatedAccuracy"], task_final["taskDescription"]
|
69 |
+
|
70 |
+
# Measures data
|
71 |
+
measures = parsed_data["measures"]
|
72 |
+
measurementMethod = measures["measurementMethod"]
|
73 |
+
manufacturer = measures["manufacturer"]
|
74 |
+
version = measures["version"]
|
75 |
+
cpuTrackingMode = measures["cpuTrackingMode"]
|
76 |
+
gpuTrackingMode = measures["gpuTrackingMode"]
|
77 |
+
averageUtilizationCpu = measures["averageUtilizationCpu"]
|
78 |
+
averageUtilizationGpu = measures["averageUtilizationGpu"]
|
79 |
+
powerCalibrationMeasurement = measures["powerCalibrationMeasurement"]
|
80 |
+
durationCalibrationMeasurement = measures["durationCalibrationMeasurement"]
|
81 |
+
powerConsumption = measures["powerConsumption"]
|
82 |
+
measurementDuration = measures["measurementDuration"]
|
83 |
+
measurementDateTime = measures["measurementDateTime"]
|
84 |
+
|
85 |
+
# System data
|
86 |
+
system = parsed_data["system"]
|
87 |
+
osystem, distribution, distributionVersion = system[
|
88 |
+
"osystem"], system["distribution"], system["distributionVersion"]
|
89 |
+
|
90 |
+
# Software data
|
91 |
+
software = parsed_data["software"]
|
92 |
+
language, version_software = software["language"], software["version_software"]
|
93 |
+
|
94 |
+
# Infrastructure data
|
95 |
+
infra_simple = parsed_data["infrastructure_simple"]
|
96 |
+
infraType, cloudProvider, cloudInstance, cloudService = infra_simple["infraType"], infra_simple[
|
97 |
+
"cloudProvider"], infra_simple["cloudInstance"], infra_simple["cloudService"]
|
98 |
+
|
99 |
+
# Infrastructure components
|
100 |
+
infra_components = parsed_data["infrastructure_components"]
|
101 |
+
componentName = infra_components["componentName"]
|
102 |
+
componentType = infra_components["componentType"]
|
103 |
+
nbComponent = infra_components["nbComponent"]
|
104 |
+
memorySize = infra_components["memorySize"]
|
105 |
+
manufacturer_infra = infra_components["manufacturer_infra"]
|
106 |
+
family = infra_components["family"]
|
107 |
+
series = infra_components["series"]
|
108 |
+
share = infra_components["share"]
|
109 |
+
|
110 |
+
# Environment data
|
111 |
+
environment = parsed_data["environment"]
|
112 |
+
country, latitude, longitude, location, powerSupplierType, powerSource, powerSourceCarbonIntensity = environment["country"], environment["latitude"], environment[
|
113 |
+
"longitude"], environment["location"], environment["powerSupplierType"], environment["powerSource"], environment["powerSourceCarbonIntensity"]
|
114 |
+
|
115 |
+
# Quality data
|
116 |
+
quality = parsed_data["quality"]["quality"]
|
117 |
+
|
118 |
+
# Call generate_json with structured parameters
|
119 |
message, file_path, json_output = generate_json(
|
120 |
*header_params,
|
121 |
taskFamily, taskStage, nbRequest,
|
|
|
135 |
powerSupplierType, powerSource, powerSourceCarbonIntensity,
|
136 |
quality
|
137 |
)
|
138 |
+
|
139 |
except Exception as e:
|
140 |
return f"Error: {e}", None, "", gr.Button("Share your data to the public repository", interactive=False, elem_classes="pubbutton")
|
141 |
|
|
|
152 |
|
153 |
|
154 |
def handle_publi(file_path, json_output):
|
155 |
+
"""Handle publication to Hugging Face dataset with improved error handling."""
|
156 |
+
try:
|
157 |
+
if not file_path or not json_output:
|
158 |
+
return "Error: No file or data to publish."
|
159 |
+
|
160 |
+
# If validation passed, proceed to update_dataset
|
161 |
+
update_output = update_dataset(file_path, json_output)
|
162 |
+
return update_output
|
163 |
+
except Exception as e:
|
164 |
+
return f"Error during publication: {str(e)}"
|
165 |
|
166 |
|
167 |
# Create Gradio interface
|
|
|
188 |
publish_button = gr.Button(
|
189 |
"Share your data to the public repository", interactive=False, elem_classes="pubbutton")
|
190 |
|
191 |
+
# Event Handlers - Optimized input flattening
|
192 |
def flatten_inputs(components):
|
193 |
+
"""
|
194 |
+
Recursively flatten nested lists of components with improved performance.
|
195 |
+
Uses iterative approach and generator expressions for better memory efficiency.
|
196 |
+
"""
|
197 |
+
result = []
|
198 |
+
stack = list(reversed(components)) # Use stack to avoid recursion
|
199 |
+
|
200 |
+
while stack:
|
201 |
+
item = stack.pop()
|
202 |
if isinstance(item, list):
|
203 |
+
# Add items in reverse order to maintain original sequence
|
204 |
+
stack.extend(reversed(item))
|
205 |
else:
|
206 |
+
result.append(item)
|
207 |
+
|
208 |
+
return result
|
209 |
|
210 |
all_inputs = flatten_inputs(header_components + task_components + measures_components +
|
211 |
system_components + software_components + infrastructure_components +
|
212 |
environment_components + quality_components)
|
213 |
|
214 |
+
# Validate input count matches expected structure
|
215 |
+
expected_count = form_parser.get_total_input_count()
|
216 |
+
if len(all_inputs) != expected_count:
|
217 |
+
print(
|
218 |
+
f"Warning: Input count mismatch. Expected {expected_count}, got {len(all_inputs)}")
|
219 |
+
|
220 |
submit_button.click(
|
221 |
handle_submit,
|
222 |
inputs=all_inputs,
|
src/services/{huggingface.py → dataset_upload.py}
RENAMED
File without changes
|
src/services/form_parser.py
ADDED
@@ -0,0 +1,147 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Form parser configuration and utilities for handling Gradio form inputs.
|
3 |
+
This module provides a centralized way to manage form structure and parsing.
|
4 |
+
"""
|
5 |
+
|
6 |
+
from dataclasses import dataclass
|
7 |
+
from typing import List, Any, Tuple
|
8 |
+
|
9 |
+
|
10 |
+
@dataclass
|
11 |
+
class FormSection:
|
12 |
+
"""Represents a section of the form with its field count."""
|
13 |
+
name: str
|
14 |
+
field_count: int
|
15 |
+
fields: List[str] = None
|
16 |
+
|
17 |
+
|
18 |
+
@dataclass
|
19 |
+
class DynamicSection:
|
20 |
+
"""Represents a dynamic section with multiple rows and fields."""
|
21 |
+
name: str
|
22 |
+
fields: List[str]
|
23 |
+
max_rows: int = 5
|
24 |
+
|
25 |
+
@property
|
26 |
+
def total_components(self) -> int:
|
27 |
+
return len(self.fields) * self.max_rows
|
28 |
+
|
29 |
+
|
30 |
+
# Form structure configuration
|
31 |
+
FORM_STRUCTURE = [
|
32 |
+
FormSection("header", 11, [
|
33 |
+
"licensing", "formatVersion", "formatVersionSpecificationUri", "reportId",
|
34 |
+
"reportDatetime", "reportStatus", "publisher_name", "publisher_division",
|
35 |
+
"publisher_projectName", "publisher_confidentialityLevel", "publisher_publicKey"
|
36 |
+
]),
|
37 |
+
|
38 |
+
FormSection("task_simple", 3, [
|
39 |
+
"taskFamily", "taskStage", "nbRequest"
|
40 |
+
]),
|
41 |
+
|
42 |
+
DynamicSection("algorithms", [
|
43 |
+
"trainingType", "algorithmType", "algorithmName", "algorithmUri",
|
44 |
+
"foundationModelName", "foundationModelUri", "parametersNumber", "framework",
|
45 |
+
"frameworkVersion", "classPath", "layersNumber", "epochsNumber", "optimizer", "quantization"
|
46 |
+
]),
|
47 |
+
|
48 |
+
DynamicSection("dataset", [
|
49 |
+
"dataUsage", "dataType", "dataFormat", "dataSize", "dataQuantity",
|
50 |
+
"shape", "source", "sourceUri", "owner"
|
51 |
+
]),
|
52 |
+
|
53 |
+
FormSection("task_final", 3, [
|
54 |
+
"measuredAccuracy", "estimatedAccuracy", "taskDescription"
|
55 |
+
]),
|
56 |
+
|
57 |
+
DynamicSection("measures", [
|
58 |
+
"measurementMethod", "manufacturer", "version", "cpuTrackingMode", "gpuTrackingMode",
|
59 |
+
"averageUtilizationCpu", "averageUtilizationGpu", "powerCalibrationMeasurement",
|
60 |
+
"durationCalibrationMeasurement", "powerConsumption", "measurementDuration", "measurementDateTime"
|
61 |
+
]),
|
62 |
+
|
63 |
+
FormSection("system", 3, [
|
64 |
+
"osystem", "distribution", "distributionVersion"
|
65 |
+
]),
|
66 |
+
|
67 |
+
FormSection("software", 2, [
|
68 |
+
"language", "version_software"
|
69 |
+
]),
|
70 |
+
|
71 |
+
FormSection("infrastructure_simple", 4, [
|
72 |
+
"infraType", "cloudProvider", "cloudInstance", "cloudService"
|
73 |
+
]),
|
74 |
+
|
75 |
+
DynamicSection("infrastructure_components", [
|
76 |
+
"componentName", "componentType", "nbComponent", "memorySize",
|
77 |
+
"manufacturer_infra", "family", "series", "share"
|
78 |
+
]),
|
79 |
+
|
80 |
+
FormSection("environment", 7, [
|
81 |
+
"country", "latitude", "longitude", "location",
|
82 |
+
"powerSupplierType", "powerSource", "powerSourceCarbonIntensity"
|
83 |
+
]),
|
84 |
+
|
85 |
+
FormSection("quality", 1, ["quality"])
|
86 |
+
]
|
87 |
+
|
88 |
+
|
89 |
+
class FormParser:
|
90 |
+
"""Utility class for parsing form inputs based on the form structure."""
|
91 |
+
|
92 |
+
def __init__(self):
|
93 |
+
self.structure = FORM_STRUCTURE
|
94 |
+
|
95 |
+
def parse_inputs(self, inputs: Tuple[Any, ...]) -> dict:
|
96 |
+
"""
|
97 |
+
Parse form inputs into a structured dictionary.
|
98 |
+
|
99 |
+
Args:
|
100 |
+
inputs: Tuple of all form input values
|
101 |
+
|
102 |
+
Returns:
|
103 |
+
dict: Parsed form data organized by sections
|
104 |
+
"""
|
105 |
+
parsed_data = {}
|
106 |
+
idx = 0
|
107 |
+
|
108 |
+
for section in self.structure:
|
109 |
+
if isinstance(section, FormSection):
|
110 |
+
# Simple section - extract values directly
|
111 |
+
section_data = inputs[idx:idx + section.field_count]
|
112 |
+
if section.fields:
|
113 |
+
parsed_data[section.name] = dict(
|
114 |
+
zip(section.fields, section_data))
|
115 |
+
else:
|
116 |
+
parsed_data[section.name] = section_data
|
117 |
+
idx += section.field_count
|
118 |
+
|
119 |
+
elif isinstance(section, DynamicSection):
|
120 |
+
# Dynamic section - extract and reshape data
|
121 |
+
flat_data = inputs[idx:idx + section.total_components]
|
122 |
+
idx += section.total_components
|
123 |
+
|
124 |
+
# Reshape flat data into field-organized lists
|
125 |
+
section_data = {}
|
126 |
+
for field_idx, field_name in enumerate(section.fields):
|
127 |
+
start_pos = field_idx * section.max_rows
|
128 |
+
end_pos = start_pos + section.max_rows
|
129 |
+
section_data[field_name] = flat_data[start_pos:end_pos]
|
130 |
+
|
131 |
+
parsed_data[section.name] = section_data
|
132 |
+
|
133 |
+
return parsed_data
|
134 |
+
|
135 |
+
def get_total_input_count(self) -> int:
|
136 |
+
"""Get the total number of expected inputs."""
|
137 |
+
total = 0
|
138 |
+
for section in self.structure:
|
139 |
+
if isinstance(section, FormSection):
|
140 |
+
total += section.field_count
|
141 |
+
elif isinstance(section, DynamicSection):
|
142 |
+
total += section.total_components
|
143 |
+
return total
|
144 |
+
|
145 |
+
|
146 |
+
# Global parser instance
|
147 |
+
form_parser = FormParser()
|
src/services/json_generator.py
CHANGED
@@ -1,9 +1,8 @@
|
|
1 |
import json
|
2 |
import tempfile
|
3 |
from datetime import datetime
|
4 |
-
import uuid
|
5 |
from assets.utils.validation import validate_boamps_schema
|
6 |
-
import
|
7 |
import os
|
8 |
|
9 |
|
@@ -94,205 +93,154 @@ def generate_json(
|
|
94 |
# Quality
|
95 |
quality
|
96 |
):
|
97 |
-
"""Generate JSON data from form inputs."""
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
-
|
242 |
-
|
243 |
-
|
244 |
-
|
245 |
-
|
246 |
-
|
247 |
-
|
248 |
-
if cloudInstance:
|
249 |
-
infrastructure["cloudInstance"] = cloudInstance
|
250 |
-
if cloudService:
|
251 |
-
infrastructure["cloudService"] = cloudService
|
252 |
-
if components_list:
|
253 |
-
infrastructure["components"] = components_list
|
254 |
-
report["infrastructure"] = infrastructure
|
255 |
-
|
256 |
-
# proceed environment
|
257 |
-
environment = {}
|
258 |
-
if country:
|
259 |
-
environment["country"] = country
|
260 |
-
if latitude:
|
261 |
-
environment["latitude"] = latitude
|
262 |
-
if longitude:
|
263 |
-
environment["longitude"] = longitude
|
264 |
-
if location:
|
265 |
-
environment["location"] = location
|
266 |
-
if powerSupplierType:
|
267 |
-
environment["powerSupplierType"] = powerSupplierType
|
268 |
-
if powerSource:
|
269 |
-
environment["powerSource"] = powerSource
|
270 |
-
if powerSourceCarbonIntensity:
|
271 |
-
environment["powerSourceCarbonIntensity"] = powerSourceCarbonIntensity
|
272 |
-
if environment:
|
273 |
-
report["environment"] = environment
|
274 |
-
|
275 |
-
# proceed quality
|
276 |
-
if quality:
|
277 |
-
report["quality"] = quality
|
278 |
-
|
279 |
-
# Validate that the schema follows the BoAmps format and so that the required fields have been completed
|
280 |
-
is_valid, message = validate_boamps_schema(report)
|
281 |
-
if not is_valid:
|
282 |
-
return message, None, ""
|
283 |
-
|
284 |
-
# Create and save the JSON file
|
285 |
-
filename = f"report_{taskStage}_{taskFamily}_{infraType}_{reportId}.json"
|
286 |
-
filename = filename.replace(" ", "-")
|
287 |
-
|
288 |
-
# Create the JSON string
|
289 |
-
json_str = json.dumps(report, indent=4, ensure_ascii=False)
|
290 |
-
|
291 |
-
# Write JSON to a temporary file with the desired filename (not permanent)
|
292 |
-
temp_dir = tempfile.gettempdir()
|
293 |
-
temp_path = os.path.join(temp_dir, filename)
|
294 |
-
with open(temp_path, "w", encoding="utf-8") as tmp:
|
295 |
-
tmp.write(json_str)
|
296 |
-
|
297 |
-
# Return logical filename, JSON string, and temp file path for upload
|
298 |
-
return message, temp_path, json_str
|
|
|
1 |
import json
|
2 |
import tempfile
|
3 |
from datetime import datetime
|
|
|
4 |
from assets.utils.validation import validate_boamps_schema
|
5 |
+
from src.services.report_builder import ReportBuilder
|
6 |
import os
|
7 |
|
8 |
|
|
|
93 |
# Quality
|
94 |
quality
|
95 |
):
|
96 |
+
"""Generate JSON data from form inputs using optimized ReportBuilder."""
|
97 |
+
|
98 |
+
try:
|
99 |
+
# Use ReportBuilder for cleaner, more maintainable code
|
100 |
+
builder = ReportBuilder()
|
101 |
+
|
102 |
+
# Build header section
|
103 |
+
header_data = {
|
104 |
+
"licensing": licensing,
|
105 |
+
"formatVersion": formatVersion,
|
106 |
+
"formatVersionSpecificationUri": formatVersionSpecificationUri,
|
107 |
+
"reportId": reportId,
|
108 |
+
"reportDatetime": reportDatetime or datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
109 |
+
"reportStatus": reportStatus,
|
110 |
+
"publisher_name": publisher_name,
|
111 |
+
"publisher_division": publisher_division,
|
112 |
+
"publisher_projectName": publisher_projectName,
|
113 |
+
"publisher_confidentialityLevel": publisher_confidentialityLevel,
|
114 |
+
"publisher_publicKey": publisher_publicKey
|
115 |
+
}
|
116 |
+
builder.add_header(header_data)
|
117 |
+
|
118 |
+
# Build task section
|
119 |
+
task_data = {
|
120 |
+
"taskStage": taskStage,
|
121 |
+
"taskFamily": taskFamily,
|
122 |
+
"nbRequest": nbRequest,
|
123 |
+
"measuredAccuracy": measuredAccuracy,
|
124 |
+
"estimatedAccuracy": estimatedAccuracy,
|
125 |
+
"taskDescription": taskDescription,
|
126 |
+
"algorithms": {
|
127 |
+
"trainingType": trainingType,
|
128 |
+
"algorithmType": algorithmType,
|
129 |
+
"algorithmName": algorithmName,
|
130 |
+
"algorithmUri": algorithmUri,
|
131 |
+
"foundationModelName": foundationModelName,
|
132 |
+
"foundationModelUri": foundationModelUri,
|
133 |
+
"parametersNumber": parametersNumber,
|
134 |
+
"framework": framework,
|
135 |
+
"frameworkVersion": frameworkVersion,
|
136 |
+
"classPath": classPath,
|
137 |
+
"layersNumber": layersNumber,
|
138 |
+
"epochsNumber": epochsNumber,
|
139 |
+
"optimizer": optimizer,
|
140 |
+
"quantization": quantization
|
141 |
+
},
|
142 |
+
"dataset": {
|
143 |
+
"dataUsage": dataUsage,
|
144 |
+
"dataType": dataType,
|
145 |
+
"dataFormat": dataFormat,
|
146 |
+
"dataSize": dataSize,
|
147 |
+
"dataQuantity": dataQuantity,
|
148 |
+
"shape": shape,
|
149 |
+
"source": source,
|
150 |
+
"sourceUri": sourceUri,
|
151 |
+
"owner": owner
|
152 |
+
}
|
153 |
+
}
|
154 |
+
builder.add_task(task_data)
|
155 |
+
|
156 |
+
# Build measures section
|
157 |
+
measures_data = {
|
158 |
+
"measurementMethod": measurementMethod,
|
159 |
+
"manufacturer": manufacturer,
|
160 |
+
"version": version,
|
161 |
+
"cpuTrackingMode": cpuTrackingMode,
|
162 |
+
"gpuTrackingMode": gpuTrackingMode,
|
163 |
+
"averageUtilizationCpu": averageUtilizationCpu,
|
164 |
+
"averageUtilizationGpu": averageUtilizationGpu,
|
165 |
+
"powerCalibrationMeasurement": powerCalibrationMeasurement,
|
166 |
+
"durationCalibrationMeasurement": durationCalibrationMeasurement,
|
167 |
+
"powerConsumption": powerConsumption,
|
168 |
+
"measurementDuration": measurementDuration,
|
169 |
+
"measurementDateTime": measurementDateTime
|
170 |
+
}
|
171 |
+
builder.add_measures(measures_data)
|
172 |
+
|
173 |
+
# Build system section
|
174 |
+
system_data = {
|
175 |
+
"osystem": osystem,
|
176 |
+
"distribution": distribution,
|
177 |
+
"distributionVersion": distributionVersion
|
178 |
+
}
|
179 |
+
builder.add_system(system_data)
|
180 |
+
|
181 |
+
# Build software section
|
182 |
+
software_data = {
|
183 |
+
"language": language,
|
184 |
+
"version_software": version_software
|
185 |
+
}
|
186 |
+
builder.add_software(software_data)
|
187 |
+
|
188 |
+
# Build infrastructure section
|
189 |
+
infrastructure_data = {
|
190 |
+
"infraType": infraType,
|
191 |
+
"cloudProvider": cloudProvider,
|
192 |
+
"cloudInstance": cloudInstance,
|
193 |
+
"cloudService": cloudService,
|
194 |
+
"components": {
|
195 |
+
"componentName": componentName,
|
196 |
+
"componentType": componentType,
|
197 |
+
"nbComponent": nbComponent,
|
198 |
+
"memorySize": memorySize,
|
199 |
+
"manufacturer": manufacturer_infra,
|
200 |
+
"family": family,
|
201 |
+
"series": series,
|
202 |
+
"share": share
|
203 |
+
}
|
204 |
+
}
|
205 |
+
builder.add_infrastructure(infrastructure_data)
|
206 |
+
|
207 |
+
# Build environment section
|
208 |
+
environment_data = {
|
209 |
+
"country": country,
|
210 |
+
"latitude": latitude,
|
211 |
+
"longitude": longitude,
|
212 |
+
"location": location,
|
213 |
+
"powerSupplierType": powerSupplierType,
|
214 |
+
"powerSource": powerSource,
|
215 |
+
"powerSourceCarbonIntensity": powerSourceCarbonIntensity
|
216 |
+
}
|
217 |
+
builder.add_environment(environment_data)
|
218 |
+
|
219 |
+
# Add quality
|
220 |
+
builder.add_quality(quality)
|
221 |
+
|
222 |
+
# Build the final report
|
223 |
+
report = builder.build()
|
224 |
+
|
225 |
+
# Validate that the schema follows the BoAmps format
|
226 |
+
is_valid, message = validate_boamps_schema(report)
|
227 |
+
if not is_valid:
|
228 |
+
return message, None, ""
|
229 |
+
|
230 |
+
# Create and save the JSON file
|
231 |
+
filename = f"report_{taskStage}_{taskFamily}_{infraType}_{reportId}.json"
|
232 |
+
filename = filename.replace(" ", "-")
|
233 |
+
|
234 |
+
# Create the JSON string
|
235 |
+
json_str = json.dumps(report, indent=4, ensure_ascii=False)
|
236 |
+
|
237 |
+
# Write JSON to a temporary file with the desired filename
|
238 |
+
temp_dir = tempfile.gettempdir()
|
239 |
+
temp_path = os.path.join(temp_dir, filename)
|
240 |
+
with open(temp_path, "w", encoding="utf-8") as tmp:
|
241 |
+
tmp.write(json_str)
|
242 |
+
|
243 |
+
return message, temp_path, json_str
|
244 |
+
|
245 |
+
except Exception as e:
|
246 |
+
return f"Error generating JSON: {str(e)}", None, ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/services/report_builder.py
ADDED
@@ -0,0 +1,273 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
JSON processing utilities for BoAmps report generation.
|
3 |
+
Provides optimized functions for data transformation and organization.
|
4 |
+
"""
|
5 |
+
|
6 |
+
from typing import Dict, List, Any, Optional
|
7 |
+
|
8 |
+
|
9 |
+
def create_section_dict(data: Dict[str, Any], required_fields: List[str] = None) -> Dict[str, Any]:
|
10 |
+
"""
|
11 |
+
Create a section dictionary, including only non-empty values.
|
12 |
+
|
13 |
+
Args:
|
14 |
+
data: Dictionary of field values
|
15 |
+
required_fields: List of fields that should always be included if provided
|
16 |
+
|
17 |
+
Returns:
|
18 |
+
Dictionary with non-empty values only, or empty dict if no meaningful values
|
19 |
+
"""
|
20 |
+
section = {}
|
21 |
+
required_fields = required_fields or []
|
22 |
+
|
23 |
+
for key, value in data.items():
|
24 |
+
# Include only if it's a required field with meaningful value, or if it's meaningful
|
25 |
+
if key in required_fields and is_meaningful_value(value):
|
26 |
+
section[key] = value
|
27 |
+
elif key not in required_fields and is_meaningful_value(value):
|
28 |
+
section[key] = value
|
29 |
+
|
30 |
+
return section
|
31 |
+
|
32 |
+
|
33 |
+
def is_meaningful_value(value: Any) -> bool:
|
34 |
+
"""
|
35 |
+
Check if a value is meaningful (not empty, not just whitespace).
|
36 |
+
|
37 |
+
Args:
|
38 |
+
value: Value to check
|
39 |
+
|
40 |
+
Returns:
|
41 |
+
True if the value is meaningful, False otherwise
|
42 |
+
"""
|
43 |
+
if value is None:
|
44 |
+
return False
|
45 |
+
if isinstance(value, str):
|
46 |
+
return value.strip() != ""
|
47 |
+
if isinstance(value, (int, float)):
|
48 |
+
return True
|
49 |
+
if isinstance(value, (list, dict)):
|
50 |
+
return len(value) > 0
|
51 |
+
return bool(value)
|
52 |
+
|
53 |
+
|
54 |
+
def process_dynamic_component_list(field_data: Dict[str, List[Any]], max_rows: int = 5) -> List[Dict[str, Any]]:
|
55 |
+
"""
|
56 |
+
Process dynamic component data into a list of component dictionaries.
|
57 |
+
Optimized version of the original process_component_list function.
|
58 |
+
|
59 |
+
Args:
|
60 |
+
field_data: Dictionary where keys are field names and values are lists of row values
|
61 |
+
max_rows: Maximum number of rows to process
|
62 |
+
|
63 |
+
Returns:
|
64 |
+
List of component dictionaries
|
65 |
+
"""
|
66 |
+
components = []
|
67 |
+
|
68 |
+
# Find the actual number of rows with data
|
69 |
+
actual_rows = 0
|
70 |
+
for field_values in field_data.values():
|
71 |
+
if field_values:
|
72 |
+
# Count non-empty values from the end
|
73 |
+
for i in range(len(field_values) - 1, -1, -1):
|
74 |
+
if is_meaningful_value(field_values[i]):
|
75 |
+
actual_rows = max(actual_rows, i + 1)
|
76 |
+
break
|
77 |
+
|
78 |
+
# Create components for rows that have data
|
79 |
+
for row_idx in range(min(actual_rows, max_rows)):
|
80 |
+
component = {}
|
81 |
+
|
82 |
+
# Add fields that have meaningful values for this row
|
83 |
+
for field_name, field_values in field_data.items():
|
84 |
+
if row_idx < len(field_values) and is_meaningful_value(field_values[row_idx]):
|
85 |
+
component[field_name] = field_values[row_idx]
|
86 |
+
|
87 |
+
# Only add component if it has at least one field
|
88 |
+
if component:
|
89 |
+
components.append(component)
|
90 |
+
|
91 |
+
return components
|
92 |
+
|
93 |
+
|
94 |
+
def create_publisher_section(data: Dict[str, Any]) -> Optional[Dict[str, Any]]:
|
95 |
+
"""
|
96 |
+
Create publisher section with proper validation.
|
97 |
+
|
98 |
+
Args:
|
99 |
+
data: Dictionary containing all header data
|
100 |
+
|
101 |
+
Returns:
|
102 |
+
Publisher dictionary or None if no publisher data
|
103 |
+
"""
|
104 |
+
publisher_fields = {
|
105 |
+
"name": data.get("publisher_name"),
|
106 |
+
"division": data.get("publisher_division"),
|
107 |
+
"projectName": data.get("publisher_projectName"),
|
108 |
+
"confidentialityLevel": data.get("publisher_confidentialityLevel"),
|
109 |
+
"publicKey": data.get("publisher_publicKey")
|
110 |
+
}
|
111 |
+
|
112 |
+
publisher = create_section_dict(
|
113 |
+
publisher_fields, required_fields=["confidentialityLevel"])
|
114 |
+
return publisher if publisher else None
|
115 |
+
|
116 |
+
|
117 |
+
class ReportBuilder:
|
118 |
+
"""
|
119 |
+
Builder class for creating BoAmps reports with optimized data processing.
|
120 |
+
"""
|
121 |
+
|
122 |
+
def __init__(self):
|
123 |
+
self.report = {}
|
124 |
+
|
125 |
+
def add_header(self, header_data: Dict[str, Any]) -> 'ReportBuilder':
|
126 |
+
"""Add header section to the report."""
|
127 |
+
header_fields = {
|
128 |
+
"licensing": header_data.get("licensing"),
|
129 |
+
"formatVersion": header_data.get("formatVersion"),
|
130 |
+
"formatVersionSpecificationUri": header_data.get("formatVersionSpecificationUri"),
|
131 |
+
"reportId": header_data.get("reportId"),
|
132 |
+
"reportDatetime": header_data.get("reportDatetime"),
|
133 |
+
"reportStatus": header_data.get("reportStatus")
|
134 |
+
}
|
135 |
+
|
136 |
+
header = create_section_dict(header_fields, required_fields=[
|
137 |
+
"reportId", "reportDatetime"])
|
138 |
+
|
139 |
+
# Add publisher if available
|
140 |
+
publisher = create_publisher_section(header_data)
|
141 |
+
if publisher:
|
142 |
+
header["publisher"] = publisher
|
143 |
+
|
144 |
+
if header:
|
145 |
+
self.report["header"] = header
|
146 |
+
|
147 |
+
return self
|
148 |
+
|
149 |
+
def add_task(self, task_data: Dict[str, Any]) -> 'ReportBuilder':
|
150 |
+
"""Add task section to the report."""
|
151 |
+
task = {}
|
152 |
+
|
153 |
+
# Simple task fields
|
154 |
+
simple_fields = {
|
155 |
+
"taskStage": task_data.get("taskStage"),
|
156 |
+
"taskFamily": task_data.get("taskFamily"),
|
157 |
+
"nbRequest": task_data.get("nbRequest"),
|
158 |
+
"measuredAccuracy": task_data.get("measuredAccuracy"),
|
159 |
+
"estimatedAccuracy": task_data.get("estimatedAccuracy"),
|
160 |
+
"taskDescription": task_data.get("taskDescription")
|
161 |
+
}
|
162 |
+
|
163 |
+
task.update(create_section_dict(simple_fields,
|
164 |
+
required_fields=["taskStage", "taskFamily"]))
|
165 |
+
|
166 |
+
# Process algorithms
|
167 |
+
if "algorithms" in task_data:
|
168 |
+
algorithms = process_dynamic_component_list(
|
169 |
+
task_data["algorithms"])
|
170 |
+
if algorithms:
|
171 |
+
task["algorithms"] = algorithms
|
172 |
+
|
173 |
+
# Process dataset
|
174 |
+
if "dataset" in task_data:
|
175 |
+
dataset = process_dynamic_component_list(task_data["dataset"])
|
176 |
+
if dataset:
|
177 |
+
task["dataset"] = dataset
|
178 |
+
|
179 |
+
self.report["task"] = task
|
180 |
+
return self
|
181 |
+
|
182 |
+
def add_measures(self, measures_data: Dict[str, List[Any]]) -> 'ReportBuilder':
|
183 |
+
"""Add measures section to the report."""
|
184 |
+
measures = process_dynamic_component_list(measures_data)
|
185 |
+
if measures:
|
186 |
+
self.report["measures"] = measures
|
187 |
+
return self
|
188 |
+
|
189 |
+
def add_system(self, system_data: Dict[str, Any]) -> 'ReportBuilder':
|
190 |
+
"""Add system section to the report."""
|
191 |
+
system_fields = {
|
192 |
+
"os": system_data.get("osystem"),
|
193 |
+
"distribution": system_data.get("distribution"),
|
194 |
+
"distributionVersion": system_data.get("distributionVersion")
|
195 |
+
}
|
196 |
+
|
197 |
+
system = create_section_dict(system_fields, required_fields=["os"])
|
198 |
+
# Only add system section if it has meaningful values
|
199 |
+
if system:
|
200 |
+
self.report["system"] = system
|
201 |
+
return self
|
202 |
+
|
203 |
+
def add_software(self, software_data: Dict[str, Any]) -> 'ReportBuilder':
|
204 |
+
"""Add software section to the report."""
|
205 |
+
software_fields = {
|
206 |
+
"language": software_data.get("language"),
|
207 |
+
"version": software_data.get("version_software")
|
208 |
+
}
|
209 |
+
|
210 |
+
software = create_section_dict(
|
211 |
+
software_fields, required_fields=["language"])
|
212 |
+
# Only add software section if it has meaningful values
|
213 |
+
if software:
|
214 |
+
self.report["software"] = software
|
215 |
+
return self
|
216 |
+
|
217 |
+
def add_infrastructure(self, infra_data: Dict[str, Any]) -> 'ReportBuilder':
|
218 |
+
"""Add infrastructure section to the report."""
|
219 |
+
infrastructure = {}
|
220 |
+
|
221 |
+
# Simple infrastructure fields
|
222 |
+
simple_fields = {
|
223 |
+
"infraType": infra_data.get("infraType"),
|
224 |
+
"cloudProvider": infra_data.get("cloudProvider"),
|
225 |
+
"cloudInstance": infra_data.get("cloudInstance"),
|
226 |
+
"cloudService": infra_data.get("cloudService")
|
227 |
+
}
|
228 |
+
|
229 |
+
# Add simple fields only if they have meaningful values
|
230 |
+
simple_infra = create_section_dict(
|
231 |
+
simple_fields, required_fields=["infraType"])
|
232 |
+
infrastructure.update(simple_infra)
|
233 |
+
|
234 |
+
# Process components
|
235 |
+
if "components" in infra_data:
|
236 |
+
components = process_dynamic_component_list(
|
237 |
+
infra_data["components"])
|
238 |
+
if components:
|
239 |
+
infrastructure["components"] = components
|
240 |
+
|
241 |
+
# Only add infrastructure section if it has meaningful content
|
242 |
+
if infrastructure:
|
243 |
+
self.report["infrastructure"] = infrastructure
|
244 |
+
return self
|
245 |
+
|
246 |
+
def add_environment(self, env_data: Dict[str, Any]) -> 'ReportBuilder':
|
247 |
+
"""Add environment section to the report."""
|
248 |
+
env_fields = {
|
249 |
+
"country": env_data.get("country"),
|
250 |
+
"latitude": env_data.get("latitude"),
|
251 |
+
"longitude": env_data.get("longitude"),
|
252 |
+
"location": env_data.get("location"),
|
253 |
+
"powerSupplierType": env_data.get("powerSupplierType"),
|
254 |
+
"powerSource": env_data.get("powerSource"),
|
255 |
+
"powerSourceCarbonIntensity": env_data.get("powerSourceCarbonIntensity")
|
256 |
+
}
|
257 |
+
|
258 |
+
environment = create_section_dict(
|
259 |
+
env_fields, required_fields=["country"])
|
260 |
+
# Only add environment section if it has meaningful values
|
261 |
+
if environment:
|
262 |
+
self.report["environment"] = environment
|
263 |
+
return self
|
264 |
+
|
265 |
+
def add_quality(self, quality_value: Any) -> 'ReportBuilder':
|
266 |
+
"""Add quality field to the report."""
|
267 |
+
if is_meaningful_value(quality_value):
|
268 |
+
self.report["quality"] = quality_value
|
269 |
+
return self
|
270 |
+
|
271 |
+
def build(self) -> Dict[str, Any]:
|
272 |
+
"""Build and return the final report."""
|
273 |
+
return self.report
|