Spaces:

asaf1602
/

sloganAI

Sleeping

App Files Files Community

3v324v23 commited on Aug 23

Commit

18681b5

1 Parent(s): da49eac

Deploy app with advanced slogan generator

Browse files

Files changed (1) hide show

app.py +69 -64

app.py CHANGED Viewed

@@ -1,14 +1,19 @@
 import gradio as gr
 import pandas as pd
 import numpy as np
 from sentence_transformers import SentenceTransformer
-import faiss
-# === Load embedding model ===
 embed_model = SentenceTransformer("sentence-transformers/all-mpnet-base-v2")
-# Dummy dataset (for demo) – replace with your full startup dataset
 data = pd.DataFrame({
     "name": ["HowDidIDo", "Museotainment", "Movitr"],
     "tagline": ["Online evaluation platform", "PacMan & Louvre meet", "Crowdsourced video translation"],
@@ -19,7 +24,7 @@ data = pd.DataFrame({
     ]
 })
-# Build FAISS index
 data_vecs = embed_model.encode(data["description"].tolist())
 faiss.normalize_L2(data_vecs)
 index = faiss.IndexFlatIP(data_vecs.shape[1])
@@ -33,80 +38,80 @@ def recommend(query, top_k=3):
     results["score"] = scores[0]
     return results[["name", "tagline", "description", "score"]]
-def generate_slogan(query_text, neighbors_df=None, n_samples=16):
-    ctx = _neighbor_context(neighbors_df)
     prompt = (
-        "You are a creative brand copywriter. Write short, original, memorable startup slogans (max 8 words).
-"
-        "Forbidden words: app, assistant, platform, solution, system, marketplace, AI, machine learning, augmented reality, virtual reality, decentralized, empower.
-"
-        "Focus on clear benefits and vivid verbs. Do not copy the description. Return ONLY a list, one slogan per line.
-"
-        "Good Examples:
-"
-        "Description: AI assistant for doctors to prioritize patient cases
-"
-        "Slogan: Less Guessing. More Healing.
-"
-        "Description: Payments for small online stores
-"
-        "Slogan: Built to Grow with Your Cart.
-"
-        "Description: Neurotech headset to boost focus
-"
-        "Slogan: Train Your Brain to Win.
-"
     )
-    if ctx:
-        prompt += f"Similar taglines (style only):
-{ctx}
-"
-    prompt += f"Description: {query_text}
-Slogans:"
     input_ids = GEN_TOK(prompt, return_tensors="pt").input_ids.to(DEVICE)
     outputs = GEN_MODEL.generate(
         input_ids,
-        max_new_tokens=24,
         do_sample=True,
         top_k=60,
         top_p=0.92,
-        temperature=1.2,
-        num_return_sequences=n_samples,
-        repetition_penalty=1.08
     )
-    raw_cands = [GEN_TOK.decode(o, skip_special_tokens=True) for o in outputs]
-    cand_set = set()
-    for txt in raw_cands:
-        for line in txt.split("
-"):
-            s = _clean_slogan(line)
-            if not s:
-                continue
-            if len(s.split()) < 2 or len(s.split()) > 8:
-                continue
-            if _is_blocked_slogan(s):
-                continue
-            cand_set.add(_titlecase_soft(s))
-    if not cand_set:
-        return _clean_slogan(GEN_TOK.decode(outputs[0], skip_special_tokens=True))
-    scored = _score_candidates(query_text, sorted(cand_set), neighbors_df)
-    if not scored:
         return _clean_slogan(GEN_TOK.decode(outputs[0], skip_special_tokens=True))
-    scored.sort(key=lambda x: x[1], reverse=True)
-    return scored[0][0]
 def pipeline(user_input):
     recs = recommend(user_input, top_k=3)
     slogan = generate_slogan(user_input)

 import gradio as gr
 import pandas as pd
 import numpy as np
 from sentence_transformers import SentenceTransformer
+import faiss, re, torch
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+# === Load models ===
+GEN_TOK   = AutoTokenizer.from_pretrained("google/flan-t5-base")
+GEN_MODEL = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-base")
+DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+GEN_MODEL = GEN_MODEL.to(DEVICE)
 embed_model = SentenceTransformer("sentence-transformers/all-mpnet-base-v2")
+# Dummy dataset (demo only)
 data = pd.DataFrame({
     "name": ["HowDidIDo", "Museotainment", "Movitr"],
     "tagline": ["Online evaluation platform", "PacMan & Louvre meet", "Crowdsourced video translation"],
     ]
 })
+# === Build FAISS index ===
 data_vecs = embed_model.encode(data["description"].tolist())
 faiss.normalize_L2(data_vecs)
 index = faiss.IndexFlatIP(data_vecs.shape[1])
     results["score"] = scores[0]
     return results[["name", "tagline", "description", "score"]]
+# ==========================================================
+# Advanced Slogan Generator (with cleaning & filtering)
+# ==========================================================
+BLOCK_PATTERNS = [
+    r"^[A-Z][a-z]+ [A-Z][a-z]+ (Platform|Solution|System|Application|Marketplace)$",
+    r"^[A-Z][a-z]+ [A-Z][a-z]+$",
+    r"^[A-Z][a-z]+$"
+]
+FORBIDDEN = {"app","assistant","platform","solution","system","marketplace",
+             "ai","machine learning","augmented reality","virtual reality","empower"}
+def _is_blocked_slogan(s: str) -> bool:
+    if not s: return True
+    s_low = s.lower()
+    for pat in BLOCK_PATTERNS:
+        if re.match(pat, s.strip()):
+            return True
+    for w in FORBIDDEN:
+        if w in s_low:
+            return True
+    return False
+def _clean_slogan(text: str, max_words: int = 8) -> str:
+    text = text.strip().split("\n")[0]
+    text = re.sub(r"[\"“”‘’]", "", text)
+    text = re.sub(r"\s+", " ", text).strip()
+    words = text.split()
+    if len(words) > max_words:
+        text = " ".join(words[:max_words])
+    return text
+def generate_slogan(query_text: str, n_samples: int = 16) -> str:
     prompt = (
+        "You are a creative branding expert. Write ONE short, original, catchy startup slogan (max 8 words).\n"
+        "Forbidden words: app, assistant, platform, solution, system, marketplace, AI, machine learning, augmented reality, virtual reality, empower.\n"
+        "Do NOT copy the description. Focus on benefits and vivid verbs. Return ONLY the slogan.\n\n"
+        "Examples:\n"
+        "Description: AI assistant for doctors to prioritize patient cases\n"
+        "Slogan: Less Guessing. More Healing.\n\n"
+        "Description: Payments for small online stores\n"
+        "Slogan: Built to Grow with Your Cart.\n\n"
+        "Description: Neurotech headset to boost focus\n"
+        "Slogan: Train Your Brain to Win.\n\n"
+        f"Description: {query_text}\nSlogan:"
     )
     input_ids = GEN_TOK(prompt, return_tensors="pt").input_ids.to(DEVICE)
     outputs = GEN_MODEL.generate(
         input_ids,
+        max_new_tokens=16,
         do_sample=True,
         top_k=60,
         top_p=0.92,
+        temperature=1.1,
+        num_return_sequences=n_samples
     )
+    candidates = []
+    for out in outputs:
+        s = _clean_slogan(GEN_TOK.decode(out, skip_special_tokens=True))
+        if 2 <= len(s.split()) <= 8 and not _is_blocked_slogan(s):
+            candidates.append(s)
+    if not candidates:
         return _clean_slogan(GEN_TOK.decode(outputs[0], skip_special_tokens=True))
+    # העדפה לאורך ~5 מילים
+    candidates.sort(key=lambda x: abs(len(x.split()) - 5))
+    return candidates[0]
+# ==========================================================
+# Pipeline
+# ==========================================================
 def pipeline(user_input):
     recs = recommend(user_input, top_k=3)
     slogan = generate_slogan(user_input)