Deploy app with advanced slogan generator
Browse files
app.py
CHANGED
@@ -1,14 +1,19 @@
|
|
1 |
-
|
2 |
import gradio as gr
|
3 |
import pandas as pd
|
4 |
import numpy as np
|
5 |
from sentence_transformers import SentenceTransformer
|
6 |
-
import faiss
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
|
8 |
-
# === Load embedding model ===
|
9 |
embed_model = SentenceTransformer("sentence-transformers/all-mpnet-base-v2")
|
10 |
|
11 |
-
# Dummy dataset (
|
12 |
data = pd.DataFrame({
|
13 |
"name": ["HowDidIDo", "Museotainment", "Movitr"],
|
14 |
"tagline": ["Online evaluation platform", "PacMan & Louvre meet", "Crowdsourced video translation"],
|
@@ -19,7 +24,7 @@ data = pd.DataFrame({
|
|
19 |
]
|
20 |
})
|
21 |
|
22 |
-
# Build FAISS index
|
23 |
data_vecs = embed_model.encode(data["description"].tolist())
|
24 |
faiss.normalize_L2(data_vecs)
|
25 |
index = faiss.IndexFlatIP(data_vecs.shape[1])
|
@@ -33,80 +38,80 @@ def recommend(query, top_k=3):
|
|
33 |
results["score"] = scores[0]
|
34 |
return results[["name", "tagline", "description", "score"]]
|
35 |
|
36 |
-
|
37 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
prompt = (
|
39 |
-
"You are a creative
|
40 |
-
"
|
41 |
-
"
|
42 |
-
"
|
43 |
-
"
|
44 |
-
|
45 |
-
"
|
46 |
-
"
|
47 |
-
"
|
48 |
-
"
|
49 |
-
"
|
50 |
-
"Slogan: Less Guessing. More Healing.
|
51 |
-
|
52 |
-
"
|
53 |
-
"Description: Payments for small online stores
|
54 |
-
"
|
55 |
-
"Slogan: Built to Grow with Your Cart.
|
56 |
-
|
57 |
-
"
|
58 |
-
"Description: Neurotech headset to boost focus
|
59 |
-
"
|
60 |
-
"Slogan: Train Your Brain to Win.
|
61 |
-
|
62 |
-
"
|
63 |
)
|
64 |
-
if ctx:
|
65 |
-
prompt += f"Similar taglines (style only):
|
66 |
-
{ctx}
|
67 |
-
|
68 |
-
"
|
69 |
-
prompt += f"Description: {query_text}
|
70 |
-
Slogans:"
|
71 |
|
72 |
input_ids = GEN_TOK(prompt, return_tensors="pt").input_ids.to(DEVICE)
|
73 |
outputs = GEN_MODEL.generate(
|
74 |
input_ids,
|
75 |
-
max_new_tokens=
|
76 |
do_sample=True,
|
77 |
top_k=60,
|
78 |
top_p=0.92,
|
79 |
-
temperature=1.
|
80 |
-
num_return_sequences=n_samples
|
81 |
-
repetition_penalty=1.08
|
82 |
)
|
83 |
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
"):
|
90 |
-
s = _clean_slogan(line)
|
91 |
-
if not s:
|
92 |
-
continue
|
93 |
-
if len(s.split()) < 2 or len(s.split()) > 8:
|
94 |
-
continue
|
95 |
-
if _is_blocked_slogan(s):
|
96 |
-
continue
|
97 |
-
cand_set.add(_titlecase_soft(s))
|
98 |
-
|
99 |
-
if not cand_set:
|
100 |
-
return _clean_slogan(GEN_TOK.decode(outputs[0], skip_special_tokens=True))
|
101 |
|
102 |
-
|
103 |
-
if not scored:
|
104 |
return _clean_slogan(GEN_TOK.decode(outputs[0], skip_special_tokens=True))
|
105 |
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
|
|
|
|
|
|
|
110 |
def pipeline(user_input):
|
111 |
recs = recommend(user_input, top_k=3)
|
112 |
slogan = generate_slogan(user_input)
|
|
|
|
|
1 |
import gradio as gr
|
2 |
import pandas as pd
|
3 |
import numpy as np
|
4 |
from sentence_transformers import SentenceTransformer
|
5 |
+
import faiss, re, torch
|
6 |
+
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
7 |
+
|
8 |
+
# === Load models ===
|
9 |
+
GEN_TOK = AutoTokenizer.from_pretrained("google/flan-t5-base")
|
10 |
+
GEN_MODEL = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-base")
|
11 |
+
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
12 |
+
GEN_MODEL = GEN_MODEL.to(DEVICE)
|
13 |
|
|
|
14 |
embed_model = SentenceTransformer("sentence-transformers/all-mpnet-base-v2")
|
15 |
|
16 |
+
# Dummy dataset (demo only)
|
17 |
data = pd.DataFrame({
|
18 |
"name": ["HowDidIDo", "Museotainment", "Movitr"],
|
19 |
"tagline": ["Online evaluation platform", "PacMan & Louvre meet", "Crowdsourced video translation"],
|
|
|
24 |
]
|
25 |
})
|
26 |
|
27 |
+
# === Build FAISS index ===
|
28 |
data_vecs = embed_model.encode(data["description"].tolist())
|
29 |
faiss.normalize_L2(data_vecs)
|
30 |
index = faiss.IndexFlatIP(data_vecs.shape[1])
|
|
|
38 |
results["score"] = scores[0]
|
39 |
return results[["name", "tagline", "description", "score"]]
|
40 |
|
41 |
+
# ==========================================================
|
42 |
+
# Advanced Slogan Generator (with cleaning & filtering)
|
43 |
+
# ==========================================================
|
44 |
+
BLOCK_PATTERNS = [
|
45 |
+
r"^[A-Z][a-z]+ [A-Z][a-z]+ (Platform|Solution|System|Application|Marketplace)$",
|
46 |
+
r"^[A-Z][a-z]+ [A-Z][a-z]+$",
|
47 |
+
r"^[A-Z][a-z]+$"
|
48 |
+
]
|
49 |
+
|
50 |
+
FORBIDDEN = {"app","assistant","platform","solution","system","marketplace",
|
51 |
+
"ai","machine learning","augmented reality","virtual reality","empower"}
|
52 |
+
|
53 |
+
def _is_blocked_slogan(s: str) -> bool:
|
54 |
+
if not s: return True
|
55 |
+
s_low = s.lower()
|
56 |
+
for pat in BLOCK_PATTERNS:
|
57 |
+
if re.match(pat, s.strip()):
|
58 |
+
return True
|
59 |
+
for w in FORBIDDEN:
|
60 |
+
if w in s_low:
|
61 |
+
return True
|
62 |
+
return False
|
63 |
+
|
64 |
+
def _clean_slogan(text: str, max_words: int = 8) -> str:
|
65 |
+
text = text.strip().split("\n")[0]
|
66 |
+
text = re.sub(r"[\"โโโโ]", "", text)
|
67 |
+
text = re.sub(r"\s+", " ", text).strip()
|
68 |
+
words = text.split()
|
69 |
+
if len(words) > max_words:
|
70 |
+
text = " ".join(words[:max_words])
|
71 |
+
return text
|
72 |
+
|
73 |
+
def generate_slogan(query_text: str, n_samples: int = 16) -> str:
|
74 |
prompt = (
|
75 |
+
"You are a creative branding expert. Write ONE short, original, catchy startup slogan (max 8 words).\n"
|
76 |
+
"Forbidden words: app, assistant, platform, solution, system, marketplace, AI, machine learning, augmented reality, virtual reality, empower.\n"
|
77 |
+
"Do NOT copy the description. Focus on benefits and vivid verbs. Return ONLY the slogan.\n\n"
|
78 |
+
"Examples:\n"
|
79 |
+
"Description: AI assistant for doctors to prioritize patient cases\n"
|
80 |
+
"Slogan: Less Guessing. More Healing.\n\n"
|
81 |
+
"Description: Payments for small online stores\n"
|
82 |
+
"Slogan: Built to Grow with Your Cart.\n\n"
|
83 |
+
"Description: Neurotech headset to boost focus\n"
|
84 |
+
"Slogan: Train Your Brain to Win.\n\n"
|
85 |
+
f"Description: {query_text}\nSlogan:"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
86 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
87 |
|
88 |
input_ids = GEN_TOK(prompt, return_tensors="pt").input_ids.to(DEVICE)
|
89 |
outputs = GEN_MODEL.generate(
|
90 |
input_ids,
|
91 |
+
max_new_tokens=16,
|
92 |
do_sample=True,
|
93 |
top_k=60,
|
94 |
top_p=0.92,
|
95 |
+
temperature=1.1,
|
96 |
+
num_return_sequences=n_samples
|
|
|
97 |
)
|
98 |
|
99 |
+
candidates = []
|
100 |
+
for out in outputs:
|
101 |
+
s = _clean_slogan(GEN_TOK.decode(out, skip_special_tokens=True))
|
102 |
+
if 2 <= len(s.split()) <= 8 and not _is_blocked_slogan(s):
|
103 |
+
candidates.append(s)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
104 |
|
105 |
+
if not candidates:
|
|
|
106 |
return _clean_slogan(GEN_TOK.decode(outputs[0], skip_special_tokens=True))
|
107 |
|
108 |
+
# ืืขืืคื ืืืืจื ~5 ืืืืื
|
109 |
+
candidates.sort(key=lambda x: abs(len(x.split()) - 5))
|
110 |
+
return candidates[0]
|
111 |
|
112 |
+
# ==========================================================
|
113 |
+
# Pipeline
|
114 |
+
# ==========================================================
|
115 |
def pipeline(user_input):
|
116 |
recs = recommend(user_input, top_k=3)
|
117 |
slogan = generate_slogan(user_input)
|