3v324v23 commited on
Commit
18681b5
ยท
1 Parent(s): da49eac

Deploy app with advanced slogan generator

Browse files
Files changed (1) hide show
  1. app.py +69 -64
app.py CHANGED
@@ -1,14 +1,19 @@
1
-
2
  import gradio as gr
3
  import pandas as pd
4
  import numpy as np
5
  from sentence_transformers import SentenceTransformer
6
- import faiss
 
 
 
 
 
 
 
7
 
8
- # === Load embedding model ===
9
  embed_model = SentenceTransformer("sentence-transformers/all-mpnet-base-v2")
10
 
11
- # Dummy dataset (for demo) โ€“ replace with your full startup dataset
12
  data = pd.DataFrame({
13
  "name": ["HowDidIDo", "Museotainment", "Movitr"],
14
  "tagline": ["Online evaluation platform", "PacMan & Louvre meet", "Crowdsourced video translation"],
@@ -19,7 +24,7 @@ data = pd.DataFrame({
19
  ]
20
  })
21
 
22
- # Build FAISS index
23
  data_vecs = embed_model.encode(data["description"].tolist())
24
  faiss.normalize_L2(data_vecs)
25
  index = faiss.IndexFlatIP(data_vecs.shape[1])
@@ -33,80 +38,80 @@ def recommend(query, top_k=3):
33
  results["score"] = scores[0]
34
  return results[["name", "tagline", "description", "score"]]
35
 
36
- def generate_slogan(query_text, neighbors_df=None, n_samples=16):
37
- ctx = _neighbor_context(neighbors_df)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  prompt = (
39
- "You are a creative brand copywriter. Write short, original, memorable startup slogans (max 8 words).
40
- "
41
- "Forbidden words: app, assistant, platform, solution, system, marketplace, AI, machine learning, augmented reality, virtual reality, decentralized, empower.
42
- "
43
- "Focus on clear benefits and vivid verbs. Do not copy the description. Return ONLY a list, one slogan per line.
44
-
45
- "
46
- "Good Examples:
47
- "
48
- "Description: AI assistant for doctors to prioritize patient cases
49
- "
50
- "Slogan: Less Guessing. More Healing.
51
-
52
- "
53
- "Description: Payments for small online stores
54
- "
55
- "Slogan: Built to Grow with Your Cart.
56
-
57
- "
58
- "Description: Neurotech headset to boost focus
59
- "
60
- "Slogan: Train Your Brain to Win.
61
-
62
- "
63
  )
64
- if ctx:
65
- prompt += f"Similar taglines (style only):
66
- {ctx}
67
-
68
- "
69
- prompt += f"Description: {query_text}
70
- Slogans:"
71
 
72
  input_ids = GEN_TOK(prompt, return_tensors="pt").input_ids.to(DEVICE)
73
  outputs = GEN_MODEL.generate(
74
  input_ids,
75
- max_new_tokens=24,
76
  do_sample=True,
77
  top_k=60,
78
  top_p=0.92,
79
- temperature=1.2,
80
- num_return_sequences=n_samples,
81
- repetition_penalty=1.08
82
  )
83
 
84
- raw_cands = [GEN_TOK.decode(o, skip_special_tokens=True) for o in outputs]
85
-
86
- cand_set = set()
87
- for txt in raw_cands:
88
- for line in txt.split("
89
- "):
90
- s = _clean_slogan(line)
91
- if not s:
92
- continue
93
- if len(s.split()) < 2 or len(s.split()) > 8:
94
- continue
95
- if _is_blocked_slogan(s):
96
- continue
97
- cand_set.add(_titlecase_soft(s))
98
-
99
- if not cand_set:
100
- return _clean_slogan(GEN_TOK.decode(outputs[0], skip_special_tokens=True))
101
 
102
- scored = _score_candidates(query_text, sorted(cand_set), neighbors_df)
103
- if not scored:
104
  return _clean_slogan(GEN_TOK.decode(outputs[0], skip_special_tokens=True))
105
 
106
- scored.sort(key=lambda x: x[1], reverse=True)
107
- return scored[0][0]
108
-
109
 
 
 
 
110
  def pipeline(user_input):
111
  recs = recommend(user_input, top_k=3)
112
  slogan = generate_slogan(user_input)
 
 
1
  import gradio as gr
2
  import pandas as pd
3
  import numpy as np
4
  from sentence_transformers import SentenceTransformer
5
+ import faiss, re, torch
6
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
7
+
8
+ # === Load models ===
9
+ GEN_TOK = AutoTokenizer.from_pretrained("google/flan-t5-base")
10
+ GEN_MODEL = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-base")
11
+ DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
12
+ GEN_MODEL = GEN_MODEL.to(DEVICE)
13
 
 
14
  embed_model = SentenceTransformer("sentence-transformers/all-mpnet-base-v2")
15
 
16
+ # Dummy dataset (demo only)
17
  data = pd.DataFrame({
18
  "name": ["HowDidIDo", "Museotainment", "Movitr"],
19
  "tagline": ["Online evaluation platform", "PacMan & Louvre meet", "Crowdsourced video translation"],
 
24
  ]
25
  })
26
 
27
+ # === Build FAISS index ===
28
  data_vecs = embed_model.encode(data["description"].tolist())
29
  faiss.normalize_L2(data_vecs)
30
  index = faiss.IndexFlatIP(data_vecs.shape[1])
 
38
  results["score"] = scores[0]
39
  return results[["name", "tagline", "description", "score"]]
40
 
41
+ # ==========================================================
42
+ # Advanced Slogan Generator (with cleaning & filtering)
43
+ # ==========================================================
44
+ BLOCK_PATTERNS = [
45
+ r"^[A-Z][a-z]+ [A-Z][a-z]+ (Platform|Solution|System|Application|Marketplace)$",
46
+ r"^[A-Z][a-z]+ [A-Z][a-z]+$",
47
+ r"^[A-Z][a-z]+$"
48
+ ]
49
+
50
+ FORBIDDEN = {"app","assistant","platform","solution","system","marketplace",
51
+ "ai","machine learning","augmented reality","virtual reality","empower"}
52
+
53
+ def _is_blocked_slogan(s: str) -> bool:
54
+ if not s: return True
55
+ s_low = s.lower()
56
+ for pat in BLOCK_PATTERNS:
57
+ if re.match(pat, s.strip()):
58
+ return True
59
+ for w in FORBIDDEN:
60
+ if w in s_low:
61
+ return True
62
+ return False
63
+
64
+ def _clean_slogan(text: str, max_words: int = 8) -> str:
65
+ text = text.strip().split("\n")[0]
66
+ text = re.sub(r"[\"โ€œโ€โ€˜โ€™]", "", text)
67
+ text = re.sub(r"\s+", " ", text).strip()
68
+ words = text.split()
69
+ if len(words) > max_words:
70
+ text = " ".join(words[:max_words])
71
+ return text
72
+
73
+ def generate_slogan(query_text: str, n_samples: int = 16) -> str:
74
  prompt = (
75
+ "You are a creative branding expert. Write ONE short, original, catchy startup slogan (max 8 words).\n"
76
+ "Forbidden words: app, assistant, platform, solution, system, marketplace, AI, machine learning, augmented reality, virtual reality, empower.\n"
77
+ "Do NOT copy the description. Focus on benefits and vivid verbs. Return ONLY the slogan.\n\n"
78
+ "Examples:\n"
79
+ "Description: AI assistant for doctors to prioritize patient cases\n"
80
+ "Slogan: Less Guessing. More Healing.\n\n"
81
+ "Description: Payments for small online stores\n"
82
+ "Slogan: Built to Grow with Your Cart.\n\n"
83
+ "Description: Neurotech headset to boost focus\n"
84
+ "Slogan: Train Your Brain to Win.\n\n"
85
+ f"Description: {query_text}\nSlogan:"
 
 
 
 
 
 
 
 
 
 
 
 
 
86
  )
 
 
 
 
 
 
 
87
 
88
  input_ids = GEN_TOK(prompt, return_tensors="pt").input_ids.to(DEVICE)
89
  outputs = GEN_MODEL.generate(
90
  input_ids,
91
+ max_new_tokens=16,
92
  do_sample=True,
93
  top_k=60,
94
  top_p=0.92,
95
+ temperature=1.1,
96
+ num_return_sequences=n_samples
 
97
  )
98
 
99
+ candidates = []
100
+ for out in outputs:
101
+ s = _clean_slogan(GEN_TOK.decode(out, skip_special_tokens=True))
102
+ if 2 <= len(s.split()) <= 8 and not _is_blocked_slogan(s):
103
+ candidates.append(s)
 
 
 
 
 
 
 
 
 
 
 
 
104
 
105
+ if not candidates:
 
106
  return _clean_slogan(GEN_TOK.decode(outputs[0], skip_special_tokens=True))
107
 
108
+ # ื”ืขื“ืคื” ืœืื•ืจืš ~5 ืžื™ืœื™ื
109
+ candidates.sort(key=lambda x: abs(len(x.split()) - 5))
110
+ return candidates[0]
111
 
112
+ # ==========================================================
113
+ # Pipeline
114
+ # ==========================================================
115
  def pipeline(user_input):
116
  recs = recommend(user_input, top_k=3)
117
  slogan = generate_slogan(user_input)