BioMike commited on
Commit
2659381
·
1 Parent(s): aa378dc
.env ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ GLICLASS_MODEL_PATH=knowledgator/gliclass_msmarco_merged
2
+ MAX_DOCS=8
app.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dotenv import load_dotenv
2
+ import gradio as gr
3
+ load_dotenv()
4
+
5
+ from interfaces import landing_interface, main_pipeline
6
+
7
+ demo = gr.TabbedInterface([landing_interface, main_pipeline],
8
+ ["Introduction", "Reranking"],
9
+ title="GLiClass SandBox",
10
+ theme=gr.themes.Base())
11
+
12
+ demo.queue()
13
+ demo.launch(debug=True, share=True)
interfaces/__init__.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ from .landing import landing_interface
2
+ from .main_pipeline import main_pipeline
interfaces/landing.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+
3
+
4
+ with open('materials/introduction.html', 'r', encoding='utf-8') as file:
5
+ html_description = file.read()
6
+
7
+ with gr.Blocks() as landing_interface:
8
+ gr.HTML(html_description)
9
+
10
+ with gr.Accordion("How to run this model locally", open=False):
11
+ gr.Markdown(
12
+ """
13
+ ## Installation
14
+ To use this model, you must install the GLiClass Python library:
15
+ ```
16
+ !pip install gliclass
17
+ ```
18
+
19
+ ## Usage
20
+ Once you've downloaded the GLiClass library, you can import the GLiClassModel and ZeroShotClassificationPipeline classes.
21
+ """
22
+ )
23
+ gr.Code(
24
+ '''
25
+ from gliclass import GLiClassModel, ZeroShotClassificationPipeline
26
+ from transformers import AutoTokenizer
27
+
28
+ model = GLiClassModel.from_pretrained("knowledgator/gliclass-small-v1")
29
+ tokenizer = AutoTokenizer.from_pretrained("knowledgator/gliclass-small-v1")
30
+
31
+ pipeline = ZeroShotClassificationPipeline(model, tokenizer, classification_type='multi-label', device='cuda:0')
32
+
33
+ text = "One day I will see the world!"
34
+ labels = ["travel", "dreams", "sport", "science", "politics"]
35
+ results = pipeline(text, labels, threshold=0.5)[0] #because we have one text
36
+
37
+ for result in results:
38
+ print(result["label"], "=>", result["score"])
39
+ ''',
40
+ language="python",
41
+ )
interfaces/main_pipeline.py ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import torch
3
+ import gradio as gr
4
+ from typing import List
5
+ from transformers import AutoTokenizer
6
+ from gliclass import GLiClassModel, ZeroShotClassificationPipeline
7
+
8
+
9
+ device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
10
+
11
+
12
+ model = GLiClassModel.from_pretrained(os.getenv("GLICLASS_MODEL_PATH")).eval().to(device)
13
+ tokenizer = AutoTokenizer.from_pretrained(os.getenv("GLICLASS_MODEL_PATH"))
14
+ multi_label_pipeline = ZeroShotClassificationPipeline(model, tokenizer, classification_type='multi-label', device=device)
15
+
16
+
17
+ example_1 =[
18
+ "I want to live in New York.",
19
+ 'York is a cathedral city in North Yorkshire, England, with Roman origins',
20
+ 'San Francisco,[23] officially the City and County of San Francisco, is a commercial, financial, and cultural center within Northern California, United States.',
21
+ 'New York, often called New York City (NYC),[b] is the most populous city in the United States',
22
+ "New York City is the third album by electronica group Brazilian Girls, released in 2008.",
23
+ "New York City was an American R&B vocal group.",
24
+ "New York City is an album by the Peter Malick Group featuring Norah Jones.",
25
+ "New York City: The Album is the debut studio album by American rapper Troy Ave. ",
26
+ '"New York City" is a song by British new wave band The Armoury Show',
27
+ ]
28
+
29
+ example_2 = [
30
+ "Looking for waterproof hiking boots that can handle freezing temperatures and rugged terrain.",
31
+ "TrailMaster X200 – waterproof boots with Vibram Arctic Grip soles, rated for -20°C and rocky paths.",
32
+ "UrbanStep Sneakers – stylish and breathable, not designed for rugged use or cold weather.",
33
+ "AlpineShield GTX – Gore-Tex lining, insulated to -15°C, ideal for mountain hiking.",
34
+ "Desert Trek Sandals – open-toe design, breathable and lightweight, not waterproof.",
35
+ "SummitPro Winter Boots – fleece-lined, waterproof up to ankle depth, tested to -5°C.",
36
+ "Marathon Lite – road-running shoes with shock-absorbing soles, non-waterproof.",
37
+ "TrailMaster X100 – waterproof boots with basic insulation, effective down to 0°C.",
38
+ "Climber Pro GTX – reinforced toe cap, Gore-Tex membrane, insulated to -20°C, certified for alpine routes."
39
+ ]
40
+
41
+ example_3 = [
42
+ "Our users are reporting 504 Gateway Timeout errors when accessing the app during peak hours.",
43
+ "A 504 Gateway Timeout indicates that a server did not receive a timely response from another server upstream.",
44
+ "A 502 Bad Gateway occurs when the server, acting as a gateway, receives an invalid response from the upstream server.",
45
+ "Common causes of 504 errors include high server load, network congestion, or misconfigured backend timeouts.",
46
+ "A 403 Forbidden error suggests that the server is refusing to authorize the request, often due to permissions.",
47
+ "To resolve 504 errors, check server logs, backend service availability, and increase timeout settings if necessary.",
48
+ "A 408 Request Timeout is returned when the client fails to send a complete request in time.",
49
+ "A 500 Internal Server Error is a generic error indicating that the server encountered an unexpected condition.",
50
+ "Network latency monitoring tools can help identify bottlenecks that may cause 504 errors during high traffic periods."
51
+ ]
52
+
53
+ example_4 = [
54
+ "A 45-year-old male presents with persistent cough, night sweats, low-grade fever, and weight loss over 3 months.",
55
+ "Lung cancer can cause cough and weight loss; however, it often includes hemoptysis and may show a solitary mass on imaging.",
56
+ "Bronchiectasis is characterized by chronic productive cough and recurrent infections but usually lacks significant weight loss.",
57
+ "Pneumonia presents acutely with high fever, productive cough, and may show lobar consolidation on imaging.",
58
+ "Sarcoidosis may cause cough and weight loss, with bilateral hilar lymphadenopathy seen on chest X-ray.",
59
+ "Tuberculosis typically presents with chronic cough, night sweats, weight loss, and may show upper lobe infiltrates on chest X-ray.",
60
+ "Chronic obstructive pulmonary disease (COPD) often involves chronic cough and dyspnea but is less associated with night sweats.",
61
+ "Fungal lung infections like histoplasmosis can mimic TB symptoms but are more common in specific endemic regions.",
62
+ "Gastroesophageal reflux disease (GERD) can cause chronic cough, but without systemic symptoms like weight loss or fever."
63
+ ]
64
+
65
+ example_5 = [
66
+ "How can I set up a recurring payment for my monthly rent via online banking?",
67
+ "A standing order allows you to set up automatic fixed-amount payments on a regular schedule (e.g., monthly rent) through your bank.",
68
+ "A direct debit authorizes a third party to withdraw variable amounts from your account, typically used for utility bills.",
69
+ "Wire transfers are typically one-off payments that do not recur automatically.",
70
+ "You can schedule a one-time payment for a future date using the online banking portal, but it won’t repeat monthly.",
71
+ "Bank-issued cashier’s checks are used for large payments but require manual setup each time.",
72
+ "To set up recurring credit card payments, navigate to your card provider’s auto-pay settings (note: for card bills only).",
73
+ "Standing orders can be modified or canceled at any time via your online banking dashboard.",
74
+ "International transfers may incur additional fees and are not ideal for domestic rent payments."
75
+ ]
76
+
77
+ examples = [
78
+ example + [""] * (int(os.getenv("MAX_DOCS")) - len(example) -1) for example in [example_1, example_2, example_3, example_4, example_5]
79
+ ]
80
+
81
+
82
+ def classification(*args) -> List[str]:
83
+ labels = [arg for arg in args[1:]]
84
+ labels = list(filter(None, labels))
85
+ query = args[0]
86
+
87
+ results = sorted(multi_label_pipeline(query, labels, threshold=0.0)[0], key=lambda x: x["score"], reverse=True)
88
+ docs = []
89
+ scores = []
90
+ for predict in results:
91
+ docs.append(predict["label"])
92
+ scores.append(round(predict["score"], 2))
93
+ for _ in range(int(os.getenv("MAX_DOCS")) - len(docs)):
94
+ docs.append("")
95
+ scores.append("")
96
+ return docs + scores
97
+
98
+ with gr.Blocks(title="GLiClass-Reranker") as main_pipeline:
99
+ inputs = []
100
+ outputs = []
101
+ query = gr.Textbox(
102
+ value=examples[0][0], label="Text query", placeholder="Enter your query here", lines=10
103
+ )
104
+ submit_btn = gr.Button("Rerank")
105
+ inputs.append(query)
106
+ for i in range(int(os.getenv("MAX_DOCS"))):
107
+ with gr.Group():
108
+ doc_input = gr.Textbox(
109
+ value=examples[0][1+i],
110
+ label=f"Document {i}",
111
+ placeholder="Enter your labels here (comma separated)",
112
+ scale=2,
113
+ )
114
+ score_output = gr.Textbox(
115
+ label=f"Score {i}",
116
+ placeholder="Score will appear here",
117
+ scale=2,
118
+ )
119
+ inputs.append(doc_input)
120
+ outputs.append(score_output)
121
+ outputs = inputs[1:] + outputs
122
+ examples = gr.Examples(
123
+ examples=examples,
124
+ fn=classification,
125
+ inputs=inputs,
126
+ outputs=outputs,
127
+ cache_examples=True,
128
+ )
129
+
130
+ submit_btn.click(
131
+ fn=classification, inputs=inputs, outputs=outputs
132
+ )
materials/introduction.html ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>GLiClass Reranker</title>
7
+ <style>
8
+ body {
9
+ font-family: Arial, sans-serif;
10
+ margin: 10px;
11
+ line-height: 1.6;
12
+ }
13
+ .link-button {
14
+ display: inline-flex;
15
+ align-items: center;
16
+ justify-content: center;
17
+ margin: 10px;
18
+ padding: 10px;
19
+ background-color: white;
20
+ border: 1px solid grey;
21
+ color: #007bff;
22
+ text-decoration: none;
23
+ border-radius: 10px;
24
+ text-align: center;
25
+ vertical-align: middle;
26
+ box-sizing: border-box;
27
+ }
28
+ .link-button:hover {
29
+ background-color: #c0dcfc;
30
+ }
31
+ .link-button img {
32
+ height: 20px;
33
+ width: auto;
34
+ display: block;
35
+ }
36
+ .links-container {
37
+ text-align: center;
38
+ margin: auto;
39
+ display: flex;
40
+ justify-content: center;
41
+ flex-wrap: wrap;
42
+ }
43
+ </style>
44
+ </head>
45
+ <body>
46
+ <h2>Welcome to GliClass Reranker!</h2>
47
+ <h3>GLiClass: Generalist and Lightweight Model for Sequence Classification:</h3>
48
+ <ol>
49
+ <li><b>Reranker in RAG pipelines</b></li>
50
+ <li><b>Zero-Shot text classification</b></li>
51
+ <li><b>Topic Classification</b></li>
52
+ <li><b>Sentiment Analysis</b></li>
53
+ </ol>
54
+ <h3>What is GliClass?</h3>
55
+ <p>This is an efficient zero-shot classifier inspired by <a href="https://github.com/urchade/GLiNER/tree/main">"GLiNER"</a> work. It demonstrates the same performance as a cross-encoder while being more compute-efficient because classification is done at a single forward path. It can be used for topic classification, sentiment analysis and as a reranker in RAG pipelines.<p>
56
+ <h3>Remember, information extraction is not just about data; it's about insights. Let's uncover those insights together!💫</h3>
57
+ <!-- Links Section -->
58
+ <div class="links-container">
59
+ <a href="https://www.knowledgator.com/" class="link-button" target="_blank"><img src="https://assets-global.website-files.com/65902be8ba48a05dfdb73331/6590476fcc8e8f35b2332781_Group%201000002504%20(1).png" alt="Visit our website"></a>
60
+ <a href="https://www.linkedin.com/company/knowledgator/" class="link-button" target="_blank"><img src="https://upload.wikimedia.org/wikipedia/commons/thumb/0/01/LinkedIn_Logo.svg/1280px-LinkedIn_Logo.svg.png" alt="Follow on LinkedIn"></a>
61
+ <a href="https://huggingface.co/knowledgator/" class="link-button" target="_blank"><img src="https://vectorseek.com/wp-content/uploads/2023/12/Hugging-Face-Logo-Vector.svg-.png" alt="Hugging Face Profile"></a>
62
+ <a href="https://twitter.com/knowledgator" class="link-button" target="_blank"><img src="https://freepnglogo.com/images/all_img/1691832278twitter-x-logo-png.png" alt="Follow on X"></a>
63
+ <a href="https://blog.knowledgator.com/" class="link-button" target="_blank"><img src="https://upload.wikimedia.org/wikipedia/commons/thumb/0/0d/Medium_%28website%29_logo.svg/2560px-Medium_%28website%29_logo.svg.png" alt="Follow on Medium"></a>
64
+ <a href="https://discord.com/invite/dkyeAgs9DG" class="link-button" target="_blank"><img src="https://assets-global.website-files.com/6257adef93867e50d84d30e2/636e0b52aa9e99b832574a53_full_logo_blurple_RGB.png" alt="Join our Discord"></a>
65
+ </div>
66
+ </body>
67
+ </html>
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ gliclass
2
+ gradio
3
+ dotenv