Spaces:
Running
Running
Vu Minh Chien
commited on
Commit
·
3197697
1
Parent(s):
e9a14f9
update rule unit
Browse files- routes/predict.py +6 -23
routes/predict.py
CHANGED
|
@@ -4,6 +4,7 @@ import shutil
|
|
| 4 |
import pandas as pd
|
| 5 |
import traceback
|
| 6 |
import sys
|
|
|
|
| 7 |
from pathlib import Path
|
| 8 |
from fastapi import APIRouter, UploadFile, File, HTTPException, Depends, Body
|
| 9 |
from fastapi.responses import FileResponse
|
|
@@ -319,7 +320,11 @@ async def predict(
|
|
| 319 |
|
| 320 |
# 出力_集計用単位 mapping - use unit mapper result
|
| 321 |
if "出力_集計用単位" in df_output_data.columns:
|
| 322 |
-
df_output_data["出力_集計用単位"] =
|
|
|
|
|
|
|
|
|
|
|
|
|
| 323 |
|
| 324 |
# 出力_確率度 mapping - use the name similarity as main probability
|
| 325 |
if "出力_名称類似度" in df_output_data.columns:
|
|
@@ -339,28 +344,6 @@ async def predict(
|
|
| 339 |
# Debug: Print available columns to see what we have
|
| 340 |
print(f"Available columns after processing: {list(df_output_data.columns)}")
|
| 341 |
|
| 342 |
-
# Final check and fallback for missing output columns
|
| 343 |
-
# if (
|
| 344 |
-
# "出力_中科目" not in df_output_data.columns
|
| 345 |
-
# or df_output_data["出力_中科目"].eq("").all()
|
| 346 |
-
# ):
|
| 347 |
-
# df_output_data["出力_中科目"] = df_output_data.get("中科目", "")
|
| 348 |
-
|
| 349 |
-
# if (
|
| 350 |
-
# "出力_項目名" not in df_output_data.columns
|
| 351 |
-
# or df_output_data["出力_項目名"].eq("").all()
|
| 352 |
-
# ):
|
| 353 |
-
# df_output_data["出力_項目名"] = df_output_data.get("名称", "")
|
| 354 |
-
|
| 355 |
-
# if (
|
| 356 |
-
# "出力_単位" not in df_output_data.columns
|
| 357 |
-
# or df_output_data["出力_単位"].eq("").all()
|
| 358 |
-
# ):
|
| 359 |
-
# df_output_data["出力_単位"] = df_output_data.get("単位", "")
|
| 360 |
-
|
| 361 |
-
# if "出力_確率度" not in df_output_data.columns:
|
| 362 |
-
# df_output_data["出力_確率度"] = 0 # Default confidence score
|
| 363 |
-
|
| 364 |
# Define output columns in exact order as shown in Excel
|
| 365 |
output_columns = [
|
| 366 |
"ID",
|
|
|
|
| 4 |
import pandas as pd
|
| 5 |
import traceback
|
| 6 |
import sys
|
| 7 |
+
import numpy as np
|
| 8 |
from pathlib import Path
|
| 9 |
from fastapi import APIRouter, UploadFile, File, HTTPException, Depends, Body
|
| 10 |
from fastapi.responses import FileResponse
|
|
|
|
| 320 |
|
| 321 |
# 出力_集計用単位 mapping - use unit mapper result
|
| 322 |
if "出力_集計用単位" in df_output_data.columns:
|
| 323 |
+
df_output_data["出力_集計用単位"] = np.where(
|
| 324 |
+
df_output_data["集計単位"] != "",
|
| 325 |
+
df_output_data["集計単位"],
|
| 326 |
+
df_output_data["出力_集計用単位"]
|
| 327 |
+
)
|
| 328 |
|
| 329 |
# 出力_確率度 mapping - use the name similarity as main probability
|
| 330 |
if "出力_名称類似度" in df_output_data.columns:
|
|
|
|
| 344 |
# Debug: Print available columns to see what we have
|
| 345 |
print(f"Available columns after processing: {list(df_output_data.columns)}")
|
| 346 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 347 |
# Define output columns in exact order as shown in Excel
|
| 348 |
output_columns = [
|
| 349 |
"ID",
|