Spaces:

srikol
/

SriGPT

Sleeping

File size: 6,834 Bytes

20da24e
 
 
c9419d3
6c2bf18
4f65160
ca3721d
4f65160
 
12dd2a6
4f65160
6c2bf18
e4198a1
20da24e
12dd2a6
4f65160
20da24e
12dd2a6
4f65160
c9419d3
 
 
 
 
 
 
f5208cd
 
 
e4198a1
20da24e
 
 
 
 
 
 
 
f29d72e
20da24e
 
 
6f59468
e4198a1
4f65160
f5208cd
 
 
 
c9419d3
 
4f65160
 
f5208cd
 
 
20da24e
4f65160
20da24e
f5208cd
c9419d3
20da24e
4f65160
c9419d3
90e3639
 
6c2bf18
e4198a1
f5208cd
c9419d3
 
 
6c2bf18
4f65160
20da24e
 
c9419d3
20da24e
c9419d3
 
 
ea61695
4f65160
 
 
 
20da24e
4f65160
 
20da24e
4f65160
 
20da24e
 
4f65160
 
 
20da24e
 
4f65160
ca3721d
20da24e
 
f5208cd
4f65160
 
ca3721d
20da24e
 
4f65160
20da24e
 
 
 
 
 
 
 
 
 
 
c9419d3
 
e4198a1
5584643
c9419d3
 
5584643
ea61695
4da3146
 
 
f5208cd
ca3721d
 
 
f5208cd
c9419d3
ca3721d
20da24e
e4198a1
80e95d4
952ac40
c9419d3
20da24e
e4198a1
 
 
 
 
 
f5208cd
ea61695
e4198a1
 
 
 
 
 
f5208cd
952ac40
4da3146
 
 
952ac40
 
 
ea61695
e4198a1

# app.py ────────────────────────────────────────────────────────────────
# Pin Gradio ≤ 3.31.0 in requirements.txt so <a target="_blank"> is kept
# and place architecture.png beside this file.
import os, re, faiss, zipfile, warnings, gradio as gr
from pathlib import Path
from typing import List
from sentence_transformers import SentenceTransformer
from PyPDF2 import PdfReader
from docx import Document
from docx.opc.exceptions import PackageNotFoundError
from openai import OpenAI

# ───────── 0. résumé → plain-text ──────────────────────────────────────
FILE = Path("my_resume.pdf")

def read_pdf(p: Path) -> str:
    return " ".join(pg.extract_text() or "" for pg in PdfReader(p).pages)

def read_docx(p: Path) -> str:
    return " ".join(par.text for par in Document(p).paragraphs if par.text.strip())

try:
    raw = read_docx(FILE)
except (PackageNotFoundError, KeyError, zipfile.BadZipFile):
    warnings.warn("Reading résumé as PDF")
    raw = read_pdf(FILE)

text = re.sub(r"\s+", " ", raw).strip()

# ───────── 0-bis. extra searchable metadata ───────────────────────────
LINK_MD = '<a href="https://www.linkedin.com/in/sriharideep/" target="_blank">' \
          'LinkedIn Profile</a>'
BLOG_MD = '<a href="https://sfdcbrewery.github.io/" target="_blank">' \
          'Technical Blog</a>'
ARCH_MD = (
    "ARCHITECTURE NOTE – The bot follows a Retrieval-Augmented Generation "
    "(RAG) design: PDF → 180-token chunks → MiniLM-L6 embeddings → FAISS "
    "similarity search → GPT-3.5-turbo answer constrained to context."
)

# make them retrievable by the RAG index (even though we’ll short-circuit)
text += f" LinkedIn: {LINK_MD} Blog: {BLOG_MD} {ARCH_MD}"

# ───────── 1. text → embeddings → FAISS ───────────────────────────────
def chunkify(t: str, max_tok: int = 180) -> List[str]:
    out, buf, n = [], [], 0
    for s in re.split(r"(?<=[.!?])\s+", t):
        w = len(s.split())
        if n + w > max_tok:
            out.append(" ".join(buf)); buf, n = [], 0
        buf.append(s); n += w
    if buf:
        out.append(" ".join(buf))
    return out

CHUNKS = chunkify(text)

embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
vecs = embedder.encode(CHUNKS, convert_to_numpy=True)
faiss.normalize_L2(vecs)
index = faiss.IndexFlatIP(vecs.shape[1]); index.add(vecs)

def retrieve(q: str, k: int = 4):
    qv = embedder.encode([q], convert_to_numpy=True); faiss.normalize_L2(qv)
    sims, idx = index.search(qv, k)
    return sims[0], [CHUNKS[i] for i in idx[0]]

# ───────── 2. OpenAI client ────────────────────────────────────────────
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
MODEL  = "gpt-3.5-turbo-0125"
SYSTEM = ("You are a helpful assistant. Answer ONLY with facts in the context. "
          "If missing, reply exactly: \"I don't know based on the resume.\"")

def overlap(a: str, b: str) -> bool:
    return bool(set(re.findall(r"\w+", a.lower())) &
                set(re.findall(r"\w+", b.lower())))

# ───────── 2-bis. guard words & static answers ─────────────────────────
SAFE = {"experience","project","certification","certifications","education",
        "skill","skills","summary","company","companies","role","linkedin",
        "website","blog","portfolio","architecture"}

STATIC_ANSWERS = {
    "linkedin": LINK_MD,
    "linked-in": LINK_MD,
    "blog": BLOG_MD,
    "architecture": ARCH_MD
}

# ───────── 2-ter. generator ───────────────────────────────────────────
def generate(msg: str) -> str:
    lower_msg = msg.lower().strip()

    # A. serve static responses verbatim
    for key, val in STATIC_ANSWERS.items():
        if key in lower_msg:
            return val

    # B. resume-related check
    if not (SAFE & set(re.findall(r"\w+", lower_msg))):
        return "Please ask something related to my résumé."

    # C. retrieve
    sims, ctxs = retrieve(msg)
    min_sim = 0.10 if len(msg.split()) < 3 else 0.25
    if max(sims) < min_sim:
        return "I don't know based on the resume."

    # D. GPT-3.5-turbo
    ctx = "\n".join(ctxs)
    ans = client.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": SYSTEM},
            {"role": "user",   "content": f"Context:\n{ctx}"},
            {"role": "user",   "content": f"Question: {msg}"}
        ],
        max_tokens=256,
        temperature=0.2
    ).choices[0].message.content.strip()

    return ans if overlap(ans, ctx) else "I don't know based on the resume."

# ───────── 3. Gradio UI ────────────────────────────────────────────────
quick = [
    "Professional Summary","Education details","Experience",
    "Certifications","Skills","LinkedIn","Blog","Architecture"
]

def set_quick_q(q):
    return q

with gr.Blocks(theme="soft") as demo:
    with gr.Row(equal_height=True):
        with gr.Column(min_width=170, scale=0):
            gr.Markdown("### Quick questions")
            btns = [gr.Button(q) for q in quick]

        with gr.Column(scale=4):
            chat  = gr.Chatbot(type="messages", label="Résumé Bot", height=520)
            inp   = gr.Textbox(placeholder="Ask about my résumé…", show_label=False)
            state = gr.State([])
            quick_q = gr.Textbox(visible=False)   # Hidden textbox for quick button input

    # ENTER
    def user_submit(msg, hist):
        ans  = generate(msg)
        hist = hist + [{"role":"user","content":msg},
                       {"role":"assistant","content":ans}]
        return "", hist, hist

    inp.submit(user_submit, [inp, state], [inp, chat, state])

    def quick_send(hist, q):
        ans = generate(q)
        hist = hist + [{"role":"user","content":q},
                       {"role":"assistant","content":ans}]
        return hist, hist

    for b, q in zip(btns, quick):
        b.click(
            fn=lambda v=None, val=q: val,   # sets quick_q value to q
            inputs=None,
            outputs=quick_q
        ).then(
            quick_send, [state, quick_q], [chat, state]
        )

if __name__ == "__main__":
    demo.launch(share=True)