Spaces:
Runtime error
Runtime error
lambdaofgod
commited on
Commit
·
426db28
1
Parent(s):
818e811
gradio app setup
Browse files- app.py +62 -0
- requirements.txt +1 -0
app.py
ADDED
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import logging
|
2 |
+
|
3 |
+
import datasets
|
4 |
+
from findkit import indexes
|
5 |
+
|
6 |
+
|
7 |
+
import gradio as gr
|
8 |
+
|
9 |
+
logging.basicConfig(level="INFO")
|
10 |
+
|
11 |
+
|
12 |
+
def get_html_retrieval_results(retrieval_result, show_only_one_match_per_episode):
|
13 |
+
if show_only_one_match_per_episode:
|
14 |
+
retrieval_result = retrieval_result.drop_duplicates(subset=["episode"])
|
15 |
+
if len(retrieval_result) > 0:
|
16 |
+
retrieval_result_html = retrieval_result.to_html(render_links=True, index=False)
|
17 |
+
return retrieval_result_html
|
18 |
+
else:
|
19 |
+
return ""
|
20 |
+
|
21 |
+
|
22 |
+
def get_retrieval_results(findkit_index, query, n_retrieved_results):
|
23 |
+
retrieval_results_df = findkit_index.find_similar(query, n_retrieved_results)
|
24 |
+
return retrieval_results_df.rename({"distance": "bm25_score"})
|
25 |
+
|
26 |
+
|
27 |
+
def setup_df():
|
28 |
+
podcast_dataset = datasets.load_dataset("lambdaofgod/lex_fridman_podcast")["train"]
|
29 |
+
df = podcast_dataset.to_pandas()
|
30 |
+
return df.dropna()
|
31 |
+
|
32 |
+
|
33 |
+
def setup_index():
|
34 |
+
df = setup_df()
|
35 |
+
return indexes.InMemoryBM25Index.build(df["text"], df)
|
36 |
+
|
37 |
+
|
38 |
+
findkit_index = setup_index()
|
39 |
+
|
40 |
+
|
41 |
+
def show_retrieval_results(query, n_retrieved_results, show_only_one_match_per_episode):
|
42 |
+
retrieval_results_df = get_retrieval_results(
|
43 |
+
findkit_index, query, n_retrieved_results
|
44 |
+
)
|
45 |
+
return get_html_retrieval_results(
|
46 |
+
retrieval_results_df, show_only_one_match_per_episode
|
47 |
+
)
|
48 |
+
|
49 |
+
|
50 |
+
show_only_one_match_per_episode = gr.Checkbox(
|
51 |
+
label="show only one match per episode", value=False
|
52 |
+
)
|
53 |
+
n_retrieved_results = gr.Number(label="number of results", value=10, precision=0)
|
54 |
+
query = gr.Textbox(label="input query", value="artificial life")
|
55 |
+
|
56 |
+
demo = gr.Interface(
|
57 |
+
fn=show_retrieval_results,
|
58 |
+
inputs=[query, n_retrieved_results, show_only_one_match_per_episode],
|
59 |
+
outputs="html",
|
60 |
+
)
|
61 |
+
|
62 |
+
demo.launch()
|
requirements.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
git+https://github.com/lambdaofgod/findkit
|