File size: 6,834 Bytes
20da24e
 
 
c9419d3
6c2bf18
4f65160
ca3721d
4f65160
 
12dd2a6
4f65160
6c2bf18
e4198a1
20da24e
12dd2a6
4f65160
20da24e
12dd2a6
4f65160
c9419d3
 
 
 
 
 
 
f5208cd
 
 
e4198a1
20da24e
 
 
 
 
 
 
 
f29d72e
20da24e
 
 
6f59468
e4198a1
4f65160
f5208cd
 
 
 
c9419d3
 
4f65160
 
f5208cd
 
 
20da24e
4f65160
20da24e
f5208cd
c9419d3
20da24e
4f65160
c9419d3
90e3639
 
6c2bf18
e4198a1
f5208cd
c9419d3
 
 
6c2bf18
4f65160
20da24e
 
c9419d3
20da24e
c9419d3
 
 
ea61695
4f65160
 
 
 
20da24e
4f65160
 
20da24e
4f65160
 
20da24e
 
4f65160
 
 
20da24e
 
4f65160
ca3721d
20da24e
 
f5208cd
4f65160
 
ca3721d
20da24e
 
4f65160
20da24e
 
 
 
 
 
 
 
 
 
 
c9419d3
 
e4198a1
5584643
c9419d3
 
5584643
ea61695
4da3146
 
 
f5208cd
ca3721d
 
 
f5208cd
c9419d3
ca3721d
20da24e
e4198a1
80e95d4
952ac40
c9419d3
20da24e
e4198a1
 
 
 
 
 
f5208cd
ea61695
e4198a1
 
 
 
 
 
f5208cd
952ac40
4da3146
 
 
952ac40
 
 
ea61695
e4198a1
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
# app.py ────────────────────────────────────────────────────────────────
# Pin Gradio ≀ 3.31.0 in requirements.txt so <a target="_blank"> is kept
# and place architecture.png beside this file.
import os, re, faiss, zipfile, warnings, gradio as gr
from pathlib import Path
from typing import List
from sentence_transformers import SentenceTransformer
from PyPDF2 import PdfReader
from docx import Document
from docx.opc.exceptions import PackageNotFoundError
from openai import OpenAI

# ───────── 0. rΓ©sumΓ© β†’ plain-text ──────────────────────────────────────
FILE = Path("my_resume.pdf")

def read_pdf(p: Path) -> str:
    return " ".join(pg.extract_text() or "" for pg in PdfReader(p).pages)

def read_docx(p: Path) -> str:
    return " ".join(par.text for par in Document(p).paragraphs if par.text.strip())

try:
    raw = read_docx(FILE)
except (PackageNotFoundError, KeyError, zipfile.BadZipFile):
    warnings.warn("Reading rΓ©sumΓ© as PDF")
    raw = read_pdf(FILE)

text = re.sub(r"\s+", " ", raw).strip()

# ───────── 0-bis. extra searchable metadata ───────────────────────────
LINK_MD = '<a href="https://www.linkedin.com/in/sriharideep/" target="_blank">' \
          'LinkedIn Profile</a>'
BLOG_MD = '<a href="https://sfdcbrewery.github.io/" target="_blank">' \
          'Technical Blog</a>'
ARCH_MD = (
    "ARCHITECTURE NOTE – The bot follows a Retrieval-Augmented Generation "
    "(RAG) design: PDF β†’ 180-token chunks β†’ MiniLM-L6 embeddings β†’ FAISS "
    "similarity search β†’ GPT-3.5-turbo answer constrained to context."
)

# make them retrievable by the RAG index (even though we’ll short-circuit)
text += f" LinkedIn: {LINK_MD} Blog: {BLOG_MD} {ARCH_MD}"

# ───────── 1. text β†’ embeddings β†’ FAISS ───────────────────────────────
def chunkify(t: str, max_tok: int = 180) -> List[str]:
    out, buf, n = [], [], 0
    for s in re.split(r"(?<=[.!?])\s+", t):
        w = len(s.split())
        if n + w > max_tok:
            out.append(" ".join(buf)); buf, n = [], 0
        buf.append(s); n += w
    if buf:
        out.append(" ".join(buf))
    return out

CHUNKS = chunkify(text)

embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
vecs = embedder.encode(CHUNKS, convert_to_numpy=True)
faiss.normalize_L2(vecs)
index = faiss.IndexFlatIP(vecs.shape[1]); index.add(vecs)

def retrieve(q: str, k: int = 4):
    qv = embedder.encode([q], convert_to_numpy=True); faiss.normalize_L2(qv)
    sims, idx = index.search(qv, k)
    return sims[0], [CHUNKS[i] for i in idx[0]]

# ───────── 2. OpenAI client ────────────────────────────────────────────
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
MODEL  = "gpt-3.5-turbo-0125"
SYSTEM = ("You are a helpful assistant. Answer ONLY with facts in the context. "
          "If missing, reply exactly: \"I don't know based on the resume.\"")

def overlap(a: str, b: str) -> bool:
    return bool(set(re.findall(r"\w+", a.lower())) &
                set(re.findall(r"\w+", b.lower())))

# ───────── 2-bis. guard words & static answers ─────────────────────────
SAFE = {"experience","project","certification","certifications","education",
        "skill","skills","summary","company","companies","role","linkedin",
        "website","blog","portfolio","architecture"}

STATIC_ANSWERS = {
    "linkedin": LINK_MD,
    "linked-in": LINK_MD,
    "blog": BLOG_MD,
    "architecture": ARCH_MD
}

# ───────── 2-ter. generator ───────────────────────────────────────────
def generate(msg: str) -> str:
    lower_msg = msg.lower().strip()

    # A. serve static responses verbatim
    for key, val in STATIC_ANSWERS.items():
        if key in lower_msg:
            return val

    # B. resume-related check
    if not (SAFE & set(re.findall(r"\w+", lower_msg))):
        return "Please ask something related to my rΓ©sumΓ©."

    # C. retrieve
    sims, ctxs = retrieve(msg)
    min_sim = 0.10 if len(msg.split()) < 3 else 0.25
    if max(sims) < min_sim:
        return "I don't know based on the resume."

    # D. GPT-3.5-turbo
    ctx = "\n".join(ctxs)
    ans = client.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": SYSTEM},
            {"role": "user",   "content": f"Context:\n{ctx}"},
            {"role": "user",   "content": f"Question: {msg}"}
        ],
        max_tokens=256,
        temperature=0.2
    ).choices[0].message.content.strip()

    return ans if overlap(ans, ctx) else "I don't know based on the resume."

# ───────── 3. Gradio UI ────────────────────────────────────────────────
quick = [
    "Professional Summary","Education details","Experience",
    "Certifications","Skills","LinkedIn","Blog","Architecture"
]

def set_quick_q(q):
    return q

with gr.Blocks(theme="soft") as demo:
    with gr.Row(equal_height=True):
        with gr.Column(min_width=170, scale=0):
            gr.Markdown("### Quick questions")
            btns = [gr.Button(q) for q in quick]

        with gr.Column(scale=4):
            chat  = gr.Chatbot(type="messages", label="RΓ©sumΓ© Bot", height=520)
            inp   = gr.Textbox(placeholder="Ask about my rΓ©sumé…", show_label=False)
            state = gr.State([])
            quick_q = gr.Textbox(visible=False)   # Hidden textbox for quick button input

    # ENTER
    def user_submit(msg, hist):
        ans  = generate(msg)
        hist = hist + [{"role":"user","content":msg},
                       {"role":"assistant","content":ans}]
        return "", hist, hist

    inp.submit(user_submit, [inp, state], [inp, chat, state])

    def quick_send(hist, q):
        ans = generate(q)
        hist = hist + [{"role":"user","content":q},
                       {"role":"assistant","content":ans}]
        return hist, hist

    for b, q in zip(btns, quick):
        b.click(
            fn=lambda v=None, val=q: val,   # sets quick_q value to q
            inputs=None,
            outputs=quick_q
        ).then(
            quick_send, [state, quick_q], [chat, state]
        )

if __name__ == "__main__":
    demo.launch(share=True)