Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,12 +1,17 @@
|
|
| 1 |
-
"""NetCom β WooCommerce transformer (Try 2 schema β 100-parallel,
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
"""
|
| 11 |
|
| 12 |
from __future__ import annotations
|
|
@@ -24,58 +29,46 @@ import gradio_client.utils
|
|
| 24 |
import openai
|
| 25 |
import pandas as pd
|
| 26 |
|
| 27 |
-
#
|
| 28 |
_original = gradio_client.utils._json_schema_to_python_type
|
| 29 |
-
|
| 30 |
-
|
| 31 |
def _fixed_json_schema_to_python_type(schema, defs=None): # type: ignore
|
| 32 |
if isinstance(schema, bool):
|
| 33 |
return "any"
|
| 34 |
return _original(schema, defs)
|
|
|
|
| 35 |
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
)
|
| 40 |
-
|
| 41 |
-
# -------- Tiny disk cache ----------------------------------------------------
|
| 42 |
-
CACHE_DIR = Path("ai_response_cache")
|
| 43 |
-
CACHE_DIR.mkdir(exist_ok=True)
|
| 44 |
-
|
| 45 |
|
| 46 |
def _cache_path(p: str) -> Path:
|
| 47 |
return CACHE_DIR / f"{hashlib.md5(p.encode()).hexdigest()}.json"
|
| 48 |
|
| 49 |
-
|
| 50 |
def _get_cached(p: str) -> str | None:
|
| 51 |
try:
|
| 52 |
return json.loads(_cache_path(p).read_text("utf-8"))["response"]
|
| 53 |
except Exception:
|
| 54 |
return None
|
| 55 |
|
| 56 |
-
|
| 57 |
def _set_cache(p: str, r: str) -> None:
|
| 58 |
try:
|
| 59 |
_cache_path(p).write_text(json.dumps({"prompt": p, "response": r}), "utf-8")
|
| 60 |
except Exception:
|
| 61 |
pass
|
| 62 |
|
| 63 |
-
|
| 64 |
-
#
|
| 65 |
-
_SEM = asyncio.Semaphore(100) # β€100 concurrent OpenAI calls
|
| 66 |
_inflight: dict[str, asyncio.Future] = {} # prompt β Future
|
| 67 |
|
| 68 |
-
|
| 69 |
async def _gpt_async(client: openai.AsyncOpenAI, prompt: str) -> str:
|
| 70 |
-
"""Single LLM call with cache, concurrency cap, and de-duplication."""
|
| 71 |
cached = _get_cached(prompt)
|
| 72 |
if cached is not None:
|
| 73 |
return cached
|
| 74 |
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
return await existing
|
| 79 |
|
| 80 |
loop = asyncio.get_running_loop()
|
| 81 |
|
|
@@ -100,12 +93,8 @@ async def _gpt_async(client: openai.AsyncOpenAI, prompt: str) -> str:
|
|
| 100 |
finally:
|
| 101 |
_inflight.pop(prompt, None)
|
| 102 |
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
lst: list[str], instruction: str, client: openai.AsyncOpenAI
|
| 106 |
-
) -> list[str]:
|
| 107 |
-
"""Vectorised helper β returns an output list matching *lst* length."""
|
| 108 |
-
out: list[str] = ["" for _ in lst]
|
| 109 |
idx, prompts = [], []
|
| 110 |
for i, txt in enumerate(lst):
|
| 111 |
if isinstance(txt, str) and txt.strip():
|
|
@@ -119,109 +108,122 @@ async def _batch_async(
|
|
| 119 |
out[idx[j]] = val
|
| 120 |
return out
|
| 121 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 122 |
|
| 123 |
-
# -------- Core converter -----------------------------------------------------
|
| 124 |
DEFAULT_PREREQ = (
|
| 125 |
"No specific prerequisites are required for this course. Basic computer literacy and "
|
| 126 |
"familiarity with fundamental concepts in the subject area are recommended for the best "
|
| 127 |
"learning experience."
|
| 128 |
)
|
| 129 |
|
| 130 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 131 |
def _read(path: str) -> pd.DataFrame:
|
| 132 |
if path.lower().endswith((".xlsx", ".xls")):
|
| 133 |
return pd.read_excel(path)
|
| 134 |
return pd.read_csv(path, encoding="latin1")
|
| 135 |
|
| 136 |
-
|
| 137 |
-
async def _enrich_dataframe(
|
| 138 |
-
df: pd.DataFrame, dcol: str, ocol: str, pcol: str, acol: str
|
| 139 |
-
) -> tuple[list[str], list[str], list[str], list[str], list[str]]:
|
| 140 |
-
"""Run all LLM batches concurrently and return the five enrichment columns."""
|
| 141 |
async with openai.AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY")) as client:
|
| 142 |
sdesc, ldesc, fobj, fout = await asyncio.gather(
|
| 143 |
-
_batch_async(
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
),
|
| 148 |
-
_batch_async(
|
| 149 |
-
df.get(dcol, "").fillna("").tolist(),
|
| 150 |
-
"Condense this description to a maximum of 750 characters in paragraph format, with clean formatting:",
|
| 151 |
-
client,
|
| 152 |
-
),
|
| 153 |
-
_batch_async(
|
| 154 |
-
df.get(ocol, "").fillna("").tolist(),
|
| 155 |
-
"Format these objectives into a bullet list with clean formatting. Start each bullet with 'β’ ':",
|
| 156 |
-
client,
|
| 157 |
-
),
|
| 158 |
-
_batch_async(
|
| 159 |
-
df.get(acol, "").fillna("").tolist(),
|
| 160 |
-
"Format this agenda into a bullet list with clean formatting. Start each bullet with 'β’ ':",
|
| 161 |
-
client,
|
| 162 |
-
),
|
| 163 |
)
|
| 164 |
|
| 165 |
-
# prerequisites
|
| 166 |
prereq_raw = df.get(pcol, "").fillna("").tolist()
|
| 167 |
-
fpre
|
| 168 |
for req in prereq_raw:
|
| 169 |
if not str(req).strip():
|
| 170 |
fpre.append(DEFAULT_PREREQ)
|
| 171 |
else:
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
"Format these prerequisites into a bullet list with clean formatting. Start each bullet with 'β’ ':",
|
| 175 |
-
client,
|
| 176 |
-
)
|
| 177 |
-
fpre.append(formatted[0])
|
| 178 |
|
| 179 |
return sdesc, ldesc, fobj, fout, fpre
|
| 180 |
|
| 181 |
-
|
| 182 |
-
def convert(
|
| 183 |
-
|
| 184 |
-
"Amazon Web Services": "/wp-content/uploads/2025/04/aws.png",
|
| 185 |
-
"AWS": "/wp-content/uploads/2025/04/aws.png",
|
| 186 |
-
"Cisco": "/wp-content/uploads/2025/04/cisco-e1738593292198-1.webp",
|
| 187 |
-
"Microsoft": "/wp-content/uploads/2025/04/Microsoft-e1737494120985-1.png",
|
| 188 |
-
"Google Cloud": "/wp-content/uploads/2025/04/Google_Cloud.png",
|
| 189 |
-
"EC Council": "/wp-content/uploads/2025/04/Ec_Council.png",
|
| 190 |
-
"ITIL": "/wp-content/uploads/2025/04/ITIL.webp",
|
| 191 |
-
"PMI": "/wp-content/uploads/2025/04/PMI.png",
|
| 192 |
-
"Comptia": "/wp-content/uploads/2025/04/Comptia.png",
|
| 193 |
-
"Autodesk": "/wp-content/uploads/2025/04/autodesk.png",
|
| 194 |
-
"ISC2": "/wp-content/uploads/2025/04/ISC2.png",
|
| 195 |
-
"AICerts": "/wp-content/uploads/2025/04/aicerts-logo-1.png",
|
| 196 |
-
}
|
| 197 |
-
|
| 198 |
-
df = _read(path)
|
| 199 |
df.columns = df.columns.str.strip()
|
| 200 |
|
| 201 |
-
first_col = lambda *
|
| 202 |
-
|
| 203 |
dcol = first_col("Description", "Decription")
|
| 204 |
ocol = first_col("Objectives", "objectives")
|
| 205 |
pcol = first_col("RequiredPrerequisite", "Required Pre-requisite")
|
| 206 |
acol = first_col("Outline")
|
| 207 |
-
dur
|
| 208 |
-
sid
|
| 209 |
|
| 210 |
if dur not in df.columns:
|
| 211 |
df[dur] = ""
|
| 212 |
|
| 213 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
| 214 |
sdesc, ldesc, fobj, fout, fpre = asyncio.run(
|
| 215 |
_enrich_dataframe(df, dcol, ocol, pcol, acol)
|
| 216 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 217 |
|
| 218 |
-
|
| 219 |
-
df["Condensed_Description"] = ldesc
|
| 220 |
-
df["Formatted_Objectives"] = fobj
|
| 221 |
-
df["Formatted_Agenda"] = fout
|
| 222 |
-
df["Formatted_Prerequisites"] = fpre
|
| 223 |
-
|
| 224 |
-
# ---------- Schedule aggregation --------------------------------------
|
| 225 |
df["Course Start Date"] = pd.to_datetime(df["Course Start Date"], errors="coerce")
|
| 226 |
df["Date_fmt"] = df["Course Start Date"].dt.strftime("%-m/%-d/%Y")
|
| 227 |
|
|
@@ -231,7 +233,6 @@ def convert(path: str) -> BytesIO:
|
|
| 231 |
.apply(lambda s: ",".join(s.dropna().unique()))
|
| 232 |
.reset_index(name="Dates")
|
| 233 |
)
|
| 234 |
-
|
| 235 |
t_agg = (
|
| 236 |
dsorted.groupby("Course ID", group_keys=False)
|
| 237 |
.apply(
|
|
@@ -245,10 +246,8 @@ def convert(path: str) -> BytesIO:
|
|
| 245 |
)
|
| 246 |
.reset_index(name="Times")
|
| 247 |
)
|
| 248 |
-
|
| 249 |
parents = dsorted.drop_duplicates("Course ID").merge(d_agg).merge(t_agg)
|
| 250 |
|
| 251 |
-
# ---------- Parent / child product rows --------------------------------
|
| 252 |
parent = pd.DataFrame(
|
| 253 |
{
|
| 254 |
"Type": "variable",
|
|
@@ -334,66 +333,39 @@ def convert(path: str) -> BytesIO:
|
|
| 334 |
|
| 335 |
all_rows = pd.concat([parent, child], ignore_index=True)
|
| 336 |
order = [
|
| 337 |
-
"Type",
|
| 338 |
-
"
|
| 339 |
-
"
|
| 340 |
-
"
|
| 341 |
-
"
|
| 342 |
-
"
|
| 343 |
-
"
|
| 344 |
-
"Tax status",
|
| 345 |
-
"In stock?",
|
| 346 |
-
"Stock",
|
| 347 |
-
"Sold individually?",
|
| 348 |
-
"Regular price",
|
| 349 |
-
"Categories",
|
| 350 |
-
"Images",
|
| 351 |
-
"Parent",
|
| 352 |
-
"Brands",
|
| 353 |
-
"Attribute 1 name",
|
| 354 |
-
"Attribute 1 value(s)",
|
| 355 |
-
"Attribute 1 visible",
|
| 356 |
-
"Attribute 1 global",
|
| 357 |
-
"Attribute 2 name",
|
| 358 |
-
"Attribute 2 value(s)",
|
| 359 |
-
"Attribute 2 visible",
|
| 360 |
-
"Attribute 2 global",
|
| 361 |
-
"Attribute 3 name",
|
| 362 |
-
"Attribute 3 value(s)",
|
| 363 |
-
"Attribute 3 visible",
|
| 364 |
-
"Attribute 3 global",
|
| 365 |
-
"Meta: outline",
|
| 366 |
-
"Meta: days",
|
| 367 |
-
"Meta: location",
|
| 368 |
-
"Meta: overview",
|
| 369 |
-
"Meta: objectives",
|
| 370 |
-
"Meta: prerequisites",
|
| 371 |
-
"Meta: agenda",
|
| 372 |
]
|
| 373 |
-
|
| 374 |
out = BytesIO()
|
| 375 |
all_rows[order].to_csv(out, index=False, encoding="utf-8-sig")
|
| 376 |
out.seek(0)
|
| 377 |
return out
|
| 378 |
|
| 379 |
-
|
| 380 |
-
|
| 381 |
-
|
| 382 |
-
csv_bytes = convert(upload.name)
|
| 383 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmp:
|
| 384 |
tmp.write(csv_bytes.getvalue())
|
| 385 |
-
|
| 386 |
-
return path
|
| 387 |
-
|
| 388 |
|
| 389 |
ui = gr.Interface(
|
| 390 |
-
fn=
|
| 391 |
-
inputs=
|
| 392 |
-
label="Upload NetCom
|
| 393 |
-
|
|
|
|
| 394 |
outputs=gr.File(label="Download WooCommerce CSV"),
|
| 395 |
title="NetCom β WooCommerce CSV Processor (Try 2)",
|
| 396 |
-
description=
|
|
|
|
|
|
|
|
|
|
|
|
|
| 397 |
analytics_enabled=False,
|
| 398 |
)
|
| 399 |
|
|
|
|
| 1 |
+
"""NetCom β WooCommerce transformer (Try 2 schema β persistent cache, 100-parallel,
|
| 2 |
+
duplicate-safe, relative-logo paths, cache-preload)
|
| 3 |
+
==================================================================================
|
| 4 |
+
|
| 5 |
+
*Accept a NetCom schedule (CSV/XLSX) and **optionally** a *previous* WooCommerce
|
| 6 |
+
CSV; output the fresh WooCommerce CSV.*
|
| 7 |
+
|
| 8 |
+
New in this revision
|
| 9 |
+
--------------------
|
| 10 |
+
* **Relative** image paths kept (WooCommerce resolves them to your own domain).
|
| 11 |
+
* Second optional file-input lets you *pre-load* the on-disk cache from a prior
|
| 12 |
+
run, so already-processed courses skip OpenAI completely.
|
| 13 |
+
* Everything else (persistent cache in `/data`, 100-parallel semaphore,
|
| 14 |
+
in-flight de-duplication, pandas compatibility fix) remains unchanged.
|
| 15 |
"""
|
| 16 |
|
| 17 |
from __future__ import annotations
|
|
|
|
| 29 |
import openai
|
| 30 |
import pandas as pd
|
| 31 |
|
| 32 |
+
# ββ Gradio bool-schema hot-patch βββββββββββββββββββββββββββββββββββββββββββββ
|
| 33 |
_original = gradio_client.utils._json_schema_to_python_type
|
|
|
|
|
|
|
| 34 |
def _fixed_json_schema_to_python_type(schema, defs=None): # type: ignore
|
| 35 |
if isinstance(schema, bool):
|
| 36 |
return "any"
|
| 37 |
return _original(schema, defs)
|
| 38 |
+
gradio_client.utils._json_schema_to_python_type = _fixed_json_schema_to_python_type # type: ignore
|
| 39 |
|
| 40 |
+
# ββ Persistent disk cache (HF Spaces uses /data) βββββββββββββββββββββββββββββ
|
| 41 |
+
_PERSISTENT_ROOT = Path("/data")
|
| 42 |
+
CACHE_DIR = (_PERSISTENT_ROOT if _PERSISTENT_ROOT.exists() else Path(".")) / "ai_response_cache"
|
| 43 |
+
CACHE_DIR.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
|
| 45 |
def _cache_path(p: str) -> Path:
|
| 46 |
return CACHE_DIR / f"{hashlib.md5(p.encode()).hexdigest()}.json"
|
| 47 |
|
|
|
|
| 48 |
def _get_cached(p: str) -> str | None:
|
| 49 |
try:
|
| 50 |
return json.loads(_cache_path(p).read_text("utf-8"))["response"]
|
| 51 |
except Exception:
|
| 52 |
return None
|
| 53 |
|
|
|
|
| 54 |
def _set_cache(p: str, r: str) -> None:
|
| 55 |
try:
|
| 56 |
_cache_path(p).write_text(json.dumps({"prompt": p, "response": r}), "utf-8")
|
| 57 |
except Exception:
|
| 58 |
pass
|
| 59 |
|
| 60 |
+
# ββ OpenAI helpers: 100-parallel + de-dup ββββββββββββββββββββββββββββββββββββ
|
| 61 |
+
_SEM = asyncio.Semaphore(100) # β€100 concurrent OpenAI calls
|
|
|
|
| 62 |
_inflight: dict[str, asyncio.Future] = {} # prompt β Future
|
| 63 |
|
|
|
|
| 64 |
async def _gpt_async(client: openai.AsyncOpenAI, prompt: str) -> str:
|
|
|
|
| 65 |
cached = _get_cached(prompt)
|
| 66 |
if cached is not None:
|
| 67 |
return cached
|
| 68 |
|
| 69 |
+
running = _inflight.get(prompt)
|
| 70 |
+
if running is not None:
|
| 71 |
+
return await running
|
|
|
|
| 72 |
|
| 73 |
loop = asyncio.get_running_loop()
|
| 74 |
|
|
|
|
| 93 |
finally:
|
| 94 |
_inflight.pop(prompt, None)
|
| 95 |
|
| 96 |
+
async def _batch_async(lst, instruction: str, client):
|
| 97 |
+
out = ["" for _ in lst]
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98 |
idx, prompts = [], []
|
| 99 |
for i, txt in enumerate(lst):
|
| 100 |
if isinstance(txt, str) and txt.strip():
|
|
|
|
| 108 |
out[idx[j]] = val
|
| 109 |
return out
|
| 110 |
|
| 111 |
+
# ββ Instructions (reuse across preload & gen) ββββββββββββββββββββββββββββββββ
|
| 112 |
+
DESC_SHORT = "Create a concise 250-character summary of this course description:"
|
| 113 |
+
DESC_LONG = "Condense this description to a maximum of 750 characters in paragraph format, with clean formatting:"
|
| 114 |
+
OBJECTIVES = "Format these objectives into a bullet list with clean formatting. Start each bullet with 'β’ ':"
|
| 115 |
+
AGENDA = "Format this agenda into a bullet list with clean formatting. Start each bullet with 'β’ ':"
|
| 116 |
+
PREREQ = "Format these prerequisites into a bullet list with clean formatting. Start each bullet with 'β’ ':"
|
| 117 |
+
|
| 118 |
+
# ββ Logo map (relative paths, with common aliases) βββββββββββββββββββββββββββ
|
| 119 |
+
logos = {
|
| 120 |
+
"Amazon Web Services": "/wp-content/uploads/2025/04/aws.png",
|
| 121 |
+
"AWS": "/wp-content/uploads/2025/04/aws.png",
|
| 122 |
+
"Cisco": "/wp-content/uploads/2025/04/cisco-e1738593292198-1.webp",
|
| 123 |
+
"Microsoft": "/wp-content/uploads/2025/04/Microsoft-e1737494120985-1.png",
|
| 124 |
+
"Google Cloud": "/wp-content/uploads/2025/04/Google_Cloud.png",
|
| 125 |
+
"EC Council": "/wp-content/uploads/2025/04/Ec_Council.png",
|
| 126 |
+
"ITIL": "/wp-content/uploads/2025/04/ITIL.webp",
|
| 127 |
+
"PMI": "/wp-content/uploads/2025/04/PMI.png",
|
| 128 |
+
"Comptia": "/wp-content/uploads/2025/04/Comptia.png",
|
| 129 |
+
"Autodesk": "/wp-content/uploads/2025/04/autodesk.png",
|
| 130 |
+
"ISC2": "/wp-content/uploads/2025/04/ISC2.png",
|
| 131 |
+
"AICerts": "/wp-content/uploads/2025/04/aicerts-logo-1.png",
|
| 132 |
+
}
|
| 133 |
|
|
|
|
| 134 |
DEFAULT_PREREQ = (
|
| 135 |
"No specific prerequisites are required for this course. Basic computer literacy and "
|
| 136 |
"familiarity with fundamental concepts in the subject area are recommended for the best "
|
| 137 |
"learning experience."
|
| 138 |
)
|
| 139 |
|
| 140 |
+
# ββ Cache-preload from previous WooCommerce CSV ββββββββββββββββββββββββββββββ
|
| 141 |
+
def _preload_cache(prev_csv: str, df_new: pd.DataFrame, dcol, ocol, pcol, acol):
|
| 142 |
+
"""Seed the on-disk cache with completions from an earlier WooCommerce CSV."""
|
| 143 |
+
try:
|
| 144 |
+
prev = pd.read_csv(prev_csv, encoding="utf-8-sig")
|
| 145 |
+
except Exception:
|
| 146 |
+
return
|
| 147 |
+
|
| 148 |
+
prev_parent = prev[prev["Type"].str.startswith("variable", na=False)]
|
| 149 |
+
prev_map = {row["SKU"]: row for _, row in prev_parent.iterrows()} # SKU == Course ID
|
| 150 |
+
|
| 151 |
+
for _, row in df_new.iterrows():
|
| 152 |
+
cid = row["Course ID"]
|
| 153 |
+
if cid not in prev_map:
|
| 154 |
+
continue
|
| 155 |
+
old = prev_map[cid]
|
| 156 |
+
|
| 157 |
+
desc = str(row[dcol])
|
| 158 |
+
obj = str(row[ocol])
|
| 159 |
+
ag = str(row[acol])
|
| 160 |
+
pre = str(row[pcol])
|
| 161 |
+
|
| 162 |
+
_set_cache(f"{DESC_SHORT}\n\nText: {desc}", old.get("Short description", ""))
|
| 163 |
+
_set_cache(f"{DESC_LONG}\n\nText: {desc}", old.get("Description", ""))
|
| 164 |
+
_set_cache(f"{OBJECTIVES}\n\nText: {obj}", old.get("Meta: objectives", ""))
|
| 165 |
+
_set_cache(f"{AGENDA}\n\nText: {ag}", old.get("Meta: agenda", ""))
|
| 166 |
+
if pre.strip():
|
| 167 |
+
_set_cache(f"{PREREQ}\n\nText: {pre}", old.get("Meta: prerequisites", ""))
|
| 168 |
+
|
| 169 |
+
# ββ Helper: read user file (CSV or Excel) ββββββββββββββββββββββββββββββββββββ
|
| 170 |
def _read(path: str) -> pd.DataFrame:
|
| 171 |
if path.lower().endswith((".xlsx", ".xls")):
|
| 172 |
return pd.read_excel(path)
|
| 173 |
return pd.read_csv(path, encoding="latin1")
|
| 174 |
|
| 175 |
+
# ββ Enrichment step (async batched LLM) ββββββββββββββββββββββββββββββββββββββ
|
| 176 |
+
async def _enrich_dataframe(df, dcol, ocol, pcol, acol):
|
|
|
|
|
|
|
|
|
|
| 177 |
async with openai.AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY")) as client:
|
| 178 |
sdesc, ldesc, fobj, fout = await asyncio.gather(
|
| 179 |
+
_batch_async(df.get(dcol, "").fillna("").tolist(), DESC_SHORT, client),
|
| 180 |
+
_batch_async(df.get(dcol, "").fillna("").tolist(), DESC_LONG, client),
|
| 181 |
+
_batch_async(df.get(ocol, "").fillna("").tolist(), OBJECTIVES, client),
|
| 182 |
+
_batch_async(df.get(acol, "").fillna("").tolist(), AGENDA, client),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 183 |
)
|
| 184 |
|
|
|
|
| 185 |
prereq_raw = df.get(pcol, "").fillna("").tolist()
|
| 186 |
+
fpre = []
|
| 187 |
for req in prereq_raw:
|
| 188 |
if not str(req).strip():
|
| 189 |
fpre.append(DEFAULT_PREREQ)
|
| 190 |
else:
|
| 191 |
+
out = await _batch_async([req], PREREQ, client)
|
| 192 |
+
fpre.append(out[0])
|
|
|
|
|
|
|
|
|
|
|
|
|
| 193 |
|
| 194 |
return sdesc, ldesc, fobj, fout, fpre
|
| 195 |
|
| 196 |
+
# ββ Main converter βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 197 |
+
def convert(schedule_path: str, prev_csv_path: str | None = None) -> BytesIO:
|
| 198 |
+
df = _read(schedule_path)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 199 |
df.columns = df.columns.str.strip()
|
| 200 |
|
| 201 |
+
first_col = lambda *cand: next((c for c in cand if c in df.columns), None)
|
|
|
|
| 202 |
dcol = first_col("Description", "Decription")
|
| 203 |
ocol = first_col("Objectives", "objectives")
|
| 204 |
pcol = first_col("RequiredPrerequisite", "Required Pre-requisite")
|
| 205 |
acol = first_col("Outline")
|
| 206 |
+
dur = first_col("Duration") or "Duration"
|
| 207 |
+
sid = first_col("Course SID", "Course SID")
|
| 208 |
|
| 209 |
if dur not in df.columns:
|
| 210 |
df[dur] = ""
|
| 211 |
|
| 212 |
+
# optional cache preload
|
| 213 |
+
if prev_csv_path:
|
| 214 |
+
_preload_cache(prev_csv_path, df, dcol, ocol, pcol, acol)
|
| 215 |
+
|
| 216 |
+
# async-enrich via LLM
|
| 217 |
sdesc, ldesc, fobj, fout, fpre = asyncio.run(
|
| 218 |
_enrich_dataframe(df, dcol, ocol, pcol, acol)
|
| 219 |
)
|
| 220 |
+
df["Short_Description"] = sdesc
|
| 221 |
+
df["Condensed_Description"] = ldesc
|
| 222 |
+
df["Formatted_Objectives"] = fobj
|
| 223 |
+
df["Formatted_Agenda"] = fout
|
| 224 |
+
df["Formatted_Prerequisites"]= fpre
|
| 225 |
|
| 226 |
+
# schedule aggregation
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 227 |
df["Course Start Date"] = pd.to_datetime(df["Course Start Date"], errors="coerce")
|
| 228 |
df["Date_fmt"] = df["Course Start Date"].dt.strftime("%-m/%-d/%Y")
|
| 229 |
|
|
|
|
| 233 |
.apply(lambda s: ",".join(s.dropna().unique()))
|
| 234 |
.reset_index(name="Dates")
|
| 235 |
)
|
|
|
|
| 236 |
t_agg = (
|
| 237 |
dsorted.groupby("Course ID", group_keys=False)
|
| 238 |
.apply(
|
|
|
|
| 246 |
)
|
| 247 |
.reset_index(name="Times")
|
| 248 |
)
|
|
|
|
| 249 |
parents = dsorted.drop_duplicates("Course ID").merge(d_agg).merge(t_agg)
|
| 250 |
|
|
|
|
| 251 |
parent = pd.DataFrame(
|
| 252 |
{
|
| 253 |
"Type": "variable",
|
|
|
|
| 333 |
|
| 334 |
all_rows = pd.concat([parent, child], ignore_index=True)
|
| 335 |
order = [
|
| 336 |
+
"Type","SKU","Name","Published","Visibility in catalog","Short description","Description",
|
| 337 |
+
"Tax status","In stock?","Stock","Sold individually?","Regular price","Categories","Images",
|
| 338 |
+
"Parent","Brands","Attribute 1 name","Attribute 1 value(s)","Attribute 1 visible","Attribute 1 global",
|
| 339 |
+
"Attribute 2 name","Attribute 2 value(s)","Attribute 2 visible","Attribute 2 global",
|
| 340 |
+
"Attribute 3 name","Attribute 3 value(s)","Attribute 3 visible","Attribute 3 global",
|
| 341 |
+
"Meta: outline","Meta: days","Meta: location","Meta: overview","Meta: objectives",
|
| 342 |
+
"Meta: prerequisites","Meta: agenda",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 343 |
]
|
|
|
|
| 344 |
out = BytesIO()
|
| 345 |
all_rows[order].to_csv(out, index=False, encoding="utf-8-sig")
|
| 346 |
out.seek(0)
|
| 347 |
return out
|
| 348 |
|
| 349 |
+
# ββ Gradio interface βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 350 |
+
def process_files(schedule: gr.File, previous: gr.File | None) -> str:
|
| 351 |
+
csv_bytes = convert(schedule.name, previous.name if previous else None)
|
|
|
|
| 352 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmp:
|
| 353 |
tmp.write(csv_bytes.getvalue())
|
| 354 |
+
return tmp.name
|
|
|
|
|
|
|
| 355 |
|
| 356 |
ui = gr.Interface(
|
| 357 |
+
fn=process_files,
|
| 358 |
+
inputs=[
|
| 359 |
+
gr.File(label="Upload NetCom schedule (.csv/.xlsx/.xls)", file_types=[".csv", ".xlsx", ".xls"]),
|
| 360 |
+
gr.File(label="Previous WooCommerce CSV (optional)", file_types=[".csv"], optional=True),
|
| 361 |
+
],
|
| 362 |
outputs=gr.File(label="Download WooCommerce CSV"),
|
| 363 |
title="NetCom β WooCommerce CSV Processor (Try 2)",
|
| 364 |
+
description=(
|
| 365 |
+
"1. Upload the **latest NetCom schedule** file.\n"
|
| 366 |
+
"2. *(Optional)* Upload the **WooCommerce CSV** generated by a previous run to "
|
| 367 |
+
"pre-load the cache and skip already-processed courses."
|
| 368 |
+
),
|
| 369 |
analytics_enabled=False,
|
| 370 |
)
|
| 371 |
|