Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,17 +1,19 @@
|
|
| 1 |
-
"""NetCom β WooCommerce transformer (Try
|
| 2 |
-
duplicate
|
| 3 |
-
|
| 4 |
|
| 5 |
-
|
| 6 |
-
CSV; output the fresh WooCommerce CSV
|
| 7 |
|
| 8 |
-
New
|
| 9 |
--------------------
|
| 10 |
-
* **
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
*
|
| 14 |
-
|
|
|
|
|
|
|
| 15 |
"""
|
| 16 |
|
| 17 |
from __future__ import annotations
|
|
@@ -23,44 +25,64 @@ import os
|
|
| 23 |
import tempfile
|
| 24 |
from io import BytesIO
|
| 25 |
from pathlib import Path
|
|
|
|
| 26 |
|
| 27 |
import gradio as gr
|
| 28 |
import gradio_client.utils
|
| 29 |
import openai
|
| 30 |
import pandas as pd
|
| 31 |
|
| 32 |
-
# ββ Gradio bool
|
| 33 |
_original = gradio_client.utils._json_schema_to_python_type
|
|
|
|
| 34 |
def _fixed_json_schema_to_python_type(schema, defs=None): # type: ignore
|
| 35 |
if isinstance(schema, bool):
|
| 36 |
return "any"
|
| 37 |
return _original(schema, defs)
|
|
|
|
| 38 |
gradio_client.utils._json_schema_to_python_type = _fixed_json_schema_to_python_type # type: ignore
|
| 39 |
|
| 40 |
-
# ββ Persistent disk cache (HF Spaces uses /data)
|
| 41 |
_PERSISTENT_ROOT = Path("/data")
|
| 42 |
CACHE_DIR = (_PERSISTENT_ROOT if _PERSISTENT_ROOT.exists() else Path(".")) / "ai_response_cache"
|
| 43 |
CACHE_DIR.mkdir(parents=True, exist_ok=True)
|
| 44 |
|
|
|
|
| 45 |
def _cache_path(p: str) -> Path:
|
| 46 |
return CACHE_DIR / f"{hashlib.md5(p.encode()).hexdigest()}.json"
|
| 47 |
|
|
|
|
| 48 |
def _get_cached(p: str) -> str | None:
|
| 49 |
try:
|
| 50 |
return json.loads(_cache_path(p).read_text("utf-8"))["response"]
|
| 51 |
except Exception:
|
| 52 |
return None
|
| 53 |
|
|
|
|
| 54 |
def _set_cache(p: str, r: str) -> None:
|
| 55 |
try:
|
| 56 |
_cache_path(p).write_text(json.dumps({"prompt": p, "response": r}), "utf-8")
|
| 57 |
except Exception:
|
| 58 |
pass
|
| 59 |
|
| 60 |
-
# ββ
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
_SEM = asyncio.Semaphore(100) # β€100 concurrent OpenAI calls
|
| 62 |
_inflight: dict[str, asyncio.Future] = {} # prompt β Future
|
| 63 |
|
|
|
|
| 64 |
async def _gpt_async(client: openai.AsyncOpenAI, prompt: str) -> str:
|
| 65 |
cached = _get_cached(prompt)
|
| 66 |
if cached is not None:
|
|
@@ -93,6 +115,7 @@ async def _gpt_async(client: openai.AsyncOpenAI, prompt: str) -> str:
|
|
| 93 |
finally:
|
| 94 |
_inflight.pop(prompt, None)
|
| 95 |
|
|
|
|
| 96 |
async def _batch_async(lst, instruction: str, client):
|
| 97 |
out = ["" for _ in lst]
|
| 98 |
idx, prompts = [], []
|
|
@@ -105,30 +128,30 @@ async def _batch_async(lst, instruction: str, client):
|
|
| 105 |
|
| 106 |
responses = await asyncio.gather(*[_gpt_async(client, p) for p in prompts])
|
| 107 |
for j, val in enumerate(responses):
|
| 108 |
-
out[idx[j]] = val
|
| 109 |
return out
|
| 110 |
|
| 111 |
-
# ββ Instructions (reuse across preload & gen)
|
| 112 |
DESC_SHORT = "Create a concise 250-character summary of this course description:"
|
| 113 |
DESC_LONG = "Condense this description to a maximum of 750 characters in paragraph format, with clean formatting:"
|
| 114 |
OBJECTIVES = "Format these objectives into a bullet list with clean formatting. Start each bullet with 'β’ ':"
|
| 115 |
AGENDA = "Format this agenda into a bullet list with clean formatting. Start each bullet with 'β’ ':"
|
| 116 |
PREREQ = "Format these prerequisites into a bullet list with clean formatting. Start each bullet with 'β’ ':"
|
| 117 |
|
| 118 |
-
# ββ Logo map (relative paths, with common aliases)
|
| 119 |
logos = {
|
| 120 |
-
"Amazon Web Services": "/wp-content/uploads/2025/04/aws.png",
|
| 121 |
-
"AWS": "/wp-content/uploads/2025/04/aws.png",
|
| 122 |
-
"Cisco": "/wp-content/uploads/2025/04/cisco-e1738593292198-1.webp",
|
| 123 |
-
"Microsoft": "/wp-content/uploads/2025/04/Microsoft-e1737494120985-1.png",
|
| 124 |
-
"Google Cloud": "/wp-content/uploads/2025/04/Google_Cloud.png",
|
| 125 |
-
"EC Council": "/wp-content/uploads/2025/04/Ec_Council.png",
|
| 126 |
-
"ITIL": "/wp-content/uploads/2025/04/ITIL.webp",
|
| 127 |
-
"PMI": "/wp-content/uploads/2025/04/PMI.png",
|
| 128 |
-
"Comptia": "/wp-content/uploads/2025/04/Comptia.png",
|
| 129 |
-
"Autodesk": "/wp-content/uploads/2025/04/autodesk.png",
|
| 130 |
-
"ISC2": "/wp-content/uploads/2025/04/ISC2.png",
|
| 131 |
-
"AICerts": "/wp-content/uploads/2025/04/aicerts-logo-1.png",
|
| 132 |
}
|
| 133 |
|
| 134 |
DEFAULT_PREREQ = (
|
|
@@ -137,9 +160,10 @@ DEFAULT_PREREQ = (
|
|
| 137 |
"learning experience."
|
| 138 |
)
|
| 139 |
|
| 140 |
-
# ββ Cache
|
|
|
|
| 141 |
def _preload_cache(prev_csv: str, df_new: pd.DataFrame, dcol, ocol, pcol, acol):
|
| 142 |
-
"""Seed the on
|
| 143 |
try:
|
| 144 |
prev = pd.read_csv(prev_csv, encoding="utf-8-sig")
|
| 145 |
except Exception:
|
|
@@ -159,14 +183,15 @@ def _preload_cache(prev_csv: str, df_new: pd.DataFrame, dcol, ocol, pcol, acol):
|
|
| 159 |
ag = str(row[acol])
|
| 160 |
pre = str(row[pcol])
|
| 161 |
|
| 162 |
-
_set_cache(f"{DESC_SHORT}\n\nText: {desc}", old.get("Short description", ""))
|
| 163 |
-
_set_cache(f"{DESC_LONG}\n\nText: {desc}",
|
| 164 |
-
_set_cache(f"{OBJECTIVES}\n\nText: {obj}", old.get("Meta: objectives", ""))
|
| 165 |
-
_set_cache(f"{AGENDA}\n\nText: {ag}", old.get("Meta: agenda", ""))
|
| 166 |
if pre.strip():
|
| 167 |
-
_set_cache(f"{PREREQ}\n\nText: {pre}", old.get("Meta: prerequisites", ""))
|
|
|
|
|
|
|
| 168 |
|
| 169 |
-
# ββ Helper: read user file (CSV or Excel) ββββββββββββββββββββββββββββββββββββ
|
| 170 |
def _read(path: str) -> pd.DataFrame:
|
| 171 |
if path.lower().endswith((".xlsx", ".xls")):
|
| 172 |
return pd.read_excel(path)
|
|
@@ -191,9 +216,17 @@ async def _enrich_dataframe(df, dcol, ocol, pcol, acol):
|
|
| 191 |
out = await _batch_async([req], PREREQ, client)
|
| 192 |
fpre.append(out[0])
|
| 193 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 194 |
return sdesc, ldesc, fobj, fout, fpre
|
| 195 |
|
| 196 |
-
# ββ Main converter
|
|
|
|
| 197 |
def convert(schedule_path: str, prev_csv_path: str | None = None) -> BytesIO:
|
| 198 |
df = _read(schedule_path)
|
| 199 |
df.columns = df.columns.str.strip()
|
|
@@ -203,36 +236,43 @@ def convert(schedule_path: str, prev_csv_path: str | None = None) -> BytesIO:
|
|
| 203 |
ocol = first_col("Objectives", "objectives")
|
| 204 |
pcol = first_col("RequiredPrerequisite", "Required Pre-requisite")
|
| 205 |
acol = first_col("Outline")
|
| 206 |
-
dur = first_col("Duration") or "Duration"
|
| 207 |
sid = first_col("Course SID", "Course SID")
|
| 208 |
|
| 209 |
-
if dur not in df.columns:
|
| 210 |
-
df[dur] = ""
|
| 211 |
-
|
| 212 |
# optional cache preload
|
| 213 |
if prev_csv_path:
|
| 214 |
_preload_cache(prev_csv_path, df, dcol, ocol, pcol, acol)
|
| 215 |
|
| 216 |
-
# async
|
| 217 |
sdesc, ldesc, fobj, fout, fpre = asyncio.run(
|
| 218 |
_enrich_dataframe(df, dcol, ocol, pcol, acol)
|
| 219 |
)
|
| 220 |
-
df["Short_Description"] = sdesc
|
| 221 |
-
df["Condensed_Description"] = ldesc
|
| 222 |
-
df["Formatted_Objectives"] = fobj
|
| 223 |
-
df["Formatted_Agenda"] = fout
|
| 224 |
-
df["Formatted_Prerequisites"]= fpre
|
| 225 |
|
| 226 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 227 |
df["Course Start Date"] = pd.to_datetime(df["Course Start Date"], errors="coerce")
|
| 228 |
df["Date_fmt"] = df["Course Start Date"].dt.strftime("%-m/%-d/%Y")
|
| 229 |
|
| 230 |
dsorted = df.sort_values(["Course ID", "Course Start Date"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 231 |
d_agg = (
|
| 232 |
dsorted.groupby("Course ID")["Date_fmt"]
|
| 233 |
-
.apply(lambda s: ",".join(s.dropna().unique()))
|
| 234 |
.reset_index(name="Dates")
|
| 235 |
)
|
|
|
|
| 236 |
t_agg = (
|
| 237 |
dsorted.groupby("Course ID", group_keys=False)
|
| 238 |
.apply(
|
|
@@ -246,7 +286,16 @@ def convert(schedule_path: str, prev_csv_path: str | None = None) -> BytesIO:
|
|
| 246 |
)
|
| 247 |
.reset_index(name="Times")
|
| 248 |
)
|
| 249 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 250 |
|
| 251 |
parent = pd.DataFrame(
|
| 252 |
{
|
|
@@ -279,7 +328,7 @@ def convert(schedule_path: str, prev_csv_path: str | None = None) -> BytesIO:
|
|
| 279 |
"Attribute 3 visible": "visible",
|
| 280 |
"Attribute 3 global": 1,
|
| 281 |
"Meta: outline": parents["Formatted_Agenda"],
|
| 282 |
-
"Meta: days": parents[
|
| 283 |
"Meta: location": "Virtual",
|
| 284 |
"Meta: overview": parents["Target Audience"],
|
| 285 |
"Meta: objectives": parents["Formatted_Objectives"],
|
|
@@ -322,7 +371,7 @@ def convert(schedule_path: str, prev_csv_path: str | None = None) -> BytesIO:
|
|
| 322 |
"Attribute 3 visible": "visible",
|
| 323 |
"Attribute 3 global": 1,
|
| 324 |
"Meta: outline": dsorted["Formatted_Agenda"],
|
| 325 |
-
"Meta: days": dsorted[
|
| 326 |
"Meta: location": "Virtual",
|
| 327 |
"Meta: overview": dsorted["Target Audience"],
|
| 328 |
"Meta: objectives": dsorted["Formatted_Objectives"],
|
|
@@ -346,7 +395,8 @@ def convert(schedule_path: str, prev_csv_path: str | None = None) -> BytesIO:
|
|
| 346 |
out.seek(0)
|
| 347 |
return out
|
| 348 |
|
| 349 |
-
# ββ Gradio interface
|
|
|
|
| 350 |
def process_files(schedule: gr.File, previous: gr.File | None) -> str:
|
| 351 |
csv_bytes = convert(schedule.name, previous.name if previous else None)
|
| 352 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmp:
|
|
@@ -360,7 +410,7 @@ ui = gr.Interface(
|
|
| 360 |
gr.File(label="Previous WooCommerce CSV (optional)", file_types=[".csv"]),
|
| 361 |
],
|
| 362 |
outputs=gr.File(label="Download WooCommerce CSV"),
|
| 363 |
-
title="NetCom β WooCommerce CSV Processor (Try
|
| 364 |
description=(
|
| 365 |
"1. Upload the **latest NetCom schedule** file.\n"
|
| 366 |
"2. *(Optional)* Upload the **WooCommerce CSV** generated by a previous run to "
|
|
|
|
| 1 |
+
"""NetCom β WooCommerce transformer (TryΒ 3Β schema β metaβdays calc, sorted attributes, deduped AI sections, persistent cache, 100βparallel,
|
| 2 |
+
duplicateβsafe, relativeβlogo paths, cacheβpreload)
|
| 3 |
+
==============================================================================
|
| 4 |
|
| 5 |
+
Accept a NetCom schedule (CSV/XLSX) and **optionally** a *previous* WooCommerce
|
| 6 |
+
CSV; output the fresh WooCommerce CSV.
|
| 7 |
|
| 8 |
+
NewΒ in this revision
|
| 9 |
--------------------
|
| 10 |
+
* **MetaΒ days** automatically calculated as the inclusive span (in days) between
|
| 11 |
+
the earliest and latest course dates for each CourseΒ ID.
|
| 12 |
+
* **AttributeΒ 1 (Date)** lists are now guaranteed to be sorted chronologically.
|
| 13 |
+
* All AIβgenerated sections (descriptions, objectives, agenda, prerequisites)
|
| 14 |
+
are postβprocessed to **deduplicate any repeated lines** inside each section.
|
| 15 |
+
* Everything else (persistent cache in `/data`, 100βparallel semaphore,
|
| 16 |
+
inβflight deβduplication, pandas compatibility fix) remains unchanged.
|
| 17 |
"""
|
| 18 |
|
| 19 |
from __future__ import annotations
|
|
|
|
| 25 |
import tempfile
|
| 26 |
from io import BytesIO
|
| 27 |
from pathlib import Path
|
| 28 |
+
from typing import List
|
| 29 |
|
| 30 |
import gradio as gr
|
| 31 |
import gradio_client.utils
|
| 32 |
import openai
|
| 33 |
import pandas as pd
|
| 34 |
|
| 35 |
+
# ββ Gradio boolβschema hotβpatch ββββββββββββββββββββββββββββββββββββββββββββ
|
| 36 |
_original = gradio_client.utils._json_schema_to_python_type
|
| 37 |
+
|
| 38 |
def _fixed_json_schema_to_python_type(schema, defs=None): # type: ignore
|
| 39 |
if isinstance(schema, bool):
|
| 40 |
return "any"
|
| 41 |
return _original(schema, defs)
|
| 42 |
+
|
| 43 |
gradio_client.utils._json_schema_to_python_type = _fixed_json_schema_to_python_type # type: ignore
|
| 44 |
|
| 45 |
+
# ββ Persistent disk cache (HF Spaces uses /data) ββββββββββββββββββββββββββββ
|
| 46 |
_PERSISTENT_ROOT = Path("/data")
|
| 47 |
CACHE_DIR = (_PERSISTENT_ROOT if _PERSISTENT_ROOT.exists() else Path(".")) / "ai_response_cache"
|
| 48 |
CACHE_DIR.mkdir(parents=True, exist_ok=True)
|
| 49 |
|
| 50 |
+
|
| 51 |
def _cache_path(p: str) -> Path:
|
| 52 |
return CACHE_DIR / f"{hashlib.md5(p.encode()).hexdigest()}.json"
|
| 53 |
|
| 54 |
+
|
| 55 |
def _get_cached(p: str) -> str | None:
|
| 56 |
try:
|
| 57 |
return json.loads(_cache_path(p).read_text("utf-8"))["response"]
|
| 58 |
except Exception:
|
| 59 |
return None
|
| 60 |
|
| 61 |
+
|
| 62 |
def _set_cache(p: str, r: str) -> None:
|
| 63 |
try:
|
| 64 |
_cache_path(p).write_text(json.dumps({"prompt": p, "response": r}), "utf-8")
|
| 65 |
except Exception:
|
| 66 |
pass
|
| 67 |
|
| 68 |
+
# ββ Helpers ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 69 |
+
|
| 70 |
+
def _dedup_lines(txt: str) -> str:
|
| 71 |
+
"""Remove duplicated lines while preserving order inside a block of text."""
|
| 72 |
+
seen = set()
|
| 73 |
+
out: List[str] = []
|
| 74 |
+
for raw in txt.splitlines():
|
| 75 |
+
line = raw.rstrip()
|
| 76 |
+
if line and line not in seen:
|
| 77 |
+
out.append(line)
|
| 78 |
+
seen.add(line)
|
| 79 |
+
return "\n".join(out)
|
| 80 |
+
|
| 81 |
+
# ββ OpenAI helpers: 100βparallel + deβdup βββββββββββββββββββββββββββββββββββ
|
| 82 |
_SEM = asyncio.Semaphore(100) # β€100 concurrent OpenAI calls
|
| 83 |
_inflight: dict[str, asyncio.Future] = {} # prompt β Future
|
| 84 |
|
| 85 |
+
|
| 86 |
async def _gpt_async(client: openai.AsyncOpenAI, prompt: str) -> str:
|
| 87 |
cached = _get_cached(prompt)
|
| 88 |
if cached is not None:
|
|
|
|
| 115 |
finally:
|
| 116 |
_inflight.pop(prompt, None)
|
| 117 |
|
| 118 |
+
|
| 119 |
async def _batch_async(lst, instruction: str, client):
|
| 120 |
out = ["" for _ in lst]
|
| 121 |
idx, prompts = [], []
|
|
|
|
| 128 |
|
| 129 |
responses = await asyncio.gather(*[_gpt_async(client, p) for p in prompts])
|
| 130 |
for j, val in enumerate(responses):
|
| 131 |
+
out[idx[j]] = _dedup_lines(val)
|
| 132 |
return out
|
| 133 |
|
| 134 |
+
# ββ Instructions (reuse across preload & gen) βββββββββββββββββββββββββββββββ
|
| 135 |
DESC_SHORT = "Create a concise 250-character summary of this course description:"
|
| 136 |
DESC_LONG = "Condense this description to a maximum of 750 characters in paragraph format, with clean formatting:"
|
| 137 |
OBJECTIVES = "Format these objectives into a bullet list with clean formatting. Start each bullet with 'β’ ':"
|
| 138 |
AGENDA = "Format this agenda into a bullet list with clean formatting. Start each bullet with 'β’ ':"
|
| 139 |
PREREQ = "Format these prerequisites into a bullet list with clean formatting. Start each bullet with 'β’ ':"
|
| 140 |
|
| 141 |
+
# ββ Logo map (relative paths, with common aliases) ββββββββββββββββββββββββββ
|
| 142 |
logos = {
|
| 143 |
+
"Amazon Web Services": "https://staging.greathorizonslearning.com/wp-content/uploads/2025/04/aws.png",
|
| 144 |
+
"AWS": "https://staging.greathorizonslearning.com/wp-content/uploads/2025/04/aws.png",
|
| 145 |
+
"Cisco": "https://staging.greathorizonslearning.com/wp-content/uploads/2025/04/cisco-e1738593292198-1.webp",
|
| 146 |
+
"Microsoft": "https://staging.greathorizonslearning.com/wp-content/uploads/2025/04/Microsoft-e1737494120985-1.png",
|
| 147 |
+
"Google Cloud": "https://staging.greathorizonslearning.com/wp-content/uploads/2025/04/Google_Cloud.png",
|
| 148 |
+
"EC Council": "https://staging.greathorizonslearning.com/wp-content/uploads/2025/04/Ec_Council.png",
|
| 149 |
+
"ITIL": "https://staging.greathorizonslearning.com/wp-content/uploads/2025/04/ITIL.webp",
|
| 150 |
+
"PMI": "https://staging.greathorizonslearning.com/wp-content/uploads/2025/04/PMI.png",
|
| 151 |
+
"Comptia": "https://staging.greathorizonslearning.com/wp-content/uploads/2025/04/Comptia.png",
|
| 152 |
+
"Autodesk": "https://staging.greathorizonslearning.com/wp-content/uploads/2025/04/autodesk.png",
|
| 153 |
+
"ISC2": "https://staging.greathorizonslearning.com/wp-content/uploads/2025/04/ISC2.png",
|
| 154 |
+
"AICerts": "https://staging.greathorizonslearning.com/wp-content/uploads/2025/04/aicerts-logo-1.png",
|
| 155 |
}
|
| 156 |
|
| 157 |
DEFAULT_PREREQ = (
|
|
|
|
| 160 |
"learning experience."
|
| 161 |
)
|
| 162 |
|
| 163 |
+
# ββ Cacheβpreload from previous WooCommerce CSV βββββββββββββββββββββββββββββ
|
| 164 |
+
|
| 165 |
def _preload_cache(prev_csv: str, df_new: pd.DataFrame, dcol, ocol, pcol, acol):
|
| 166 |
+
"""Seed the onβdisk cache with completions from an earlier WooCommerce CSV."""
|
| 167 |
try:
|
| 168 |
prev = pd.read_csv(prev_csv, encoding="utf-8-sig")
|
| 169 |
except Exception:
|
|
|
|
| 183 |
ag = str(row[acol])
|
| 184 |
pre = str(row[pcol])
|
| 185 |
|
| 186 |
+
_set_cache(f"{DESC_SHORT}\n\nText: {desc}", _dedup_lines(old.get("Short description", "")))
|
| 187 |
+
_set_cache(f"{DESC_LONG}\n\nText: {desc}", _dedup_lines(old.get("Description", "")))
|
| 188 |
+
_set_cache(f"{OBJECTIVES}\n\nText: {obj}", _dedup_lines(old.get("Meta: objectives", "")))
|
| 189 |
+
_set_cache(f"{AGENDA}\n\nText: {ag}", _dedup_lines(old.get("Meta: agenda", "")))
|
| 190 |
if pre.strip():
|
| 191 |
+
_set_cache(f"{PREREQ}\n\nText: {pre}", _dedup_lines(old.get("Meta: prerequisites", "")))
|
| 192 |
+
|
| 193 |
+
# ββ Helper: read user file (CSV or Excel) βββββββββββββββββββββββββββββββββββ
|
| 194 |
|
|
|
|
| 195 |
def _read(path: str) -> pd.DataFrame:
|
| 196 |
if path.lower().endswith((".xlsx", ".xls")):
|
| 197 |
return pd.read_excel(path)
|
|
|
|
| 216 |
out = await _batch_async([req], PREREQ, client)
|
| 217 |
fpre.append(out[0])
|
| 218 |
|
| 219 |
+
# Ensure everything is deduped (safety).
|
| 220 |
+
sdesc = [_dedup_lines(t) for t in sdesc]
|
| 221 |
+
ldesc = [_dedup_lines(t) for t in ldesc]
|
| 222 |
+
fobj = [_dedup_lines(t) for t in fobj]
|
| 223 |
+
fout = [_dedup_lines(t) for t in fout]
|
| 224 |
+
fpre = [_dedup_lines(t) for t in fpre]
|
| 225 |
+
|
| 226 |
return sdesc, ldesc, fobj, fout, fpre
|
| 227 |
|
| 228 |
+
# ββ Main converter ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 229 |
+
|
| 230 |
def convert(schedule_path: str, prev_csv_path: str | None = None) -> BytesIO:
|
| 231 |
df = _read(schedule_path)
|
| 232 |
df.columns = df.columns.str.strip()
|
|
|
|
| 236 |
ocol = first_col("Objectives", "objectives")
|
| 237 |
pcol = first_col("RequiredPrerequisite", "Required Pre-requisite")
|
| 238 |
acol = first_col("Outline")
|
| 239 |
+
dur = first_col("Duration") or "Duration" # kept for backwardβcompat (unused)
|
| 240 |
sid = first_col("Course SID", "Course SID")
|
| 241 |
|
|
|
|
|
|
|
|
|
|
| 242 |
# optional cache preload
|
| 243 |
if prev_csv_path:
|
| 244 |
_preload_cache(prev_csv_path, df, dcol, ocol, pcol, acol)
|
| 245 |
|
| 246 |
+
# asyncβenrich via LLM
|
| 247 |
sdesc, ldesc, fobj, fout, fpre = asyncio.run(
|
| 248 |
_enrich_dataframe(df, dcol, ocol, pcol, acol)
|
| 249 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 250 |
|
| 251 |
+
df["Short_Description"] = sdesc
|
| 252 |
+
df["Condensed_Description"] = ldesc
|
| 253 |
+
df["Formatted_Objectives"] = fobj
|
| 254 |
+
df["Formatted_Agenda"] = fout
|
| 255 |
+
df["Formatted_Prerequisites"] = fpre
|
| 256 |
+
|
| 257 |
+
# schedule aggregation & metaβdays calculation
|
| 258 |
df["Course Start Date"] = pd.to_datetime(df["Course Start Date"], errors="coerce")
|
| 259 |
df["Date_fmt"] = df["Course Start Date"].dt.strftime("%-m/%-d/%Y")
|
| 260 |
|
| 261 |
dsorted = df.sort_values(["Course ID", "Course Start Date"])
|
| 262 |
+
|
| 263 |
+
# "MetaDays" = inclusive span between earliest & latest dates per CourseΒ ID
|
| 264 |
+
meta_days = (
|
| 265 |
+
dsorted.groupby("Course ID")["Course Start Date"].agg(lambda s: (s.max() - s.min()).days + 1)
|
| 266 |
+
.reset_index(name="MetaDays")
|
| 267 |
+
)
|
| 268 |
+
|
| 269 |
+
# AttributeΒ 1 list β ensure chronological order
|
| 270 |
d_agg = (
|
| 271 |
dsorted.groupby("Course ID")["Date_fmt"]
|
| 272 |
+
.apply(lambda s: ",".join(sorted(s.dropna().unique(), key=lambda x: pd.to_datetime(x))))
|
| 273 |
.reset_index(name="Dates")
|
| 274 |
)
|
| 275 |
+
|
| 276 |
t_agg = (
|
| 277 |
dsorted.groupby("Course ID", group_keys=False)
|
| 278 |
.apply(
|
|
|
|
| 286 |
)
|
| 287 |
.reset_index(name="Times")
|
| 288 |
)
|
| 289 |
+
|
| 290 |
+
parents = (
|
| 291 |
+
dsorted.drop_duplicates("Course ID")
|
| 292 |
+
.merge(d_agg)
|
| 293 |
+
.merge(t_agg)
|
| 294 |
+
.merge(meta_days)
|
| 295 |
+
)
|
| 296 |
+
|
| 297 |
+
# propagate MetaDays to each schedule row
|
| 298 |
+
dsorted = dsorted.merge(meta_days, on="Course ID", how="left")
|
| 299 |
|
| 300 |
parent = pd.DataFrame(
|
| 301 |
{
|
|
|
|
| 328 |
"Attribute 3 visible": "visible",
|
| 329 |
"Attribute 3 global": 1,
|
| 330 |
"Meta: outline": parents["Formatted_Agenda"],
|
| 331 |
+
"Meta: days": parents["MetaDays"],
|
| 332 |
"Meta: location": "Virtual",
|
| 333 |
"Meta: overview": parents["Target Audience"],
|
| 334 |
"Meta: objectives": parents["Formatted_Objectives"],
|
|
|
|
| 371 |
"Attribute 3 visible": "visible",
|
| 372 |
"Attribute 3 global": 1,
|
| 373 |
"Meta: outline": dsorted["Formatted_Agenda"],
|
| 374 |
+
"Meta: days": dsorted["MetaDays"],
|
| 375 |
"Meta: location": "Virtual",
|
| 376 |
"Meta: overview": dsorted["Target Audience"],
|
| 377 |
"Meta: objectives": dsorted["Formatted_Objectives"],
|
|
|
|
| 395 |
out.seek(0)
|
| 396 |
return out
|
| 397 |
|
| 398 |
+
# ββ Gradio interface ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 399 |
+
|
| 400 |
def process_files(schedule: gr.File, previous: gr.File | None) -> str:
|
| 401 |
csv_bytes = convert(schedule.name, previous.name if previous else None)
|
| 402 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmp:
|
|
|
|
| 410 |
gr.File(label="Previous WooCommerce CSV (optional)", file_types=[".csv"]),
|
| 411 |
],
|
| 412 |
outputs=gr.File(label="Download WooCommerce CSV"),
|
| 413 |
+
title="NetCom β WooCommerce CSV Processor (TryΒ 3)",
|
| 414 |
description=(
|
| 415 |
"1. Upload the **latest NetCom schedule** file.\n"
|
| 416 |
"2. *(Optional)* Upload the **WooCommerce CSV** generated by a previous run to "
|