Spaces:
Running
Running
import streamlit as st | |
from st_utils import bm25_search, semantic_search, hf_api, paginator | |
from huggingface_hub import ModelSearchArguments | |
import webbrowser | |
from numerize.numerize import numerize | |
import math | |
st.set_page_config( | |
page_title="HF Search Engine", | |
page_icon="π", | |
layout="wide", | |
initial_sidebar_state="auto", | |
) | |
### SIDEBAR | |
search_backend = st.sidebar.selectbox( | |
"Search method", | |
["semantic", "bm25", "hfapi"], | |
format_func=lambda x: {"hfapi": "Keyword search", "bm25": "BM25 search", "semantic": "Semantic Search"}[x], | |
) | |
limit_results = int(st.sidebar.number_input("Limit results", min_value=0, value=10)) | |
sort_by = st.sidebar.selectbox( | |
"Sort by", | |
[None, "downloads", "likes", "lastModified"], | |
format_func=lambda x: {None: "Relevance", "downloads": "Most downloads", "likes": "Most likes", "lastModified": "Recently updated"}[x], | |
) | |
st.sidebar.markdown("# Filters") | |
args = ModelSearchArguments() | |
library = st.sidebar.multiselect( | |
"Library", args.library.values(), format_func=lambda x: {v: k for k, v in args.library.items()}[x] | |
) | |
task = st.sidebar.multiselect( | |
"Task", args.pipeline_tag.values(), format_func=lambda x: {v: k for k, v in args.pipeline_tag.items()}[x] | |
) | |
### MAIN PAGE | |
st.markdown( | |
"<h1 style='text-align: center; '>ππ€ HF Search Engine</h1>", | |
unsafe_allow_html=True, | |
) | |
# Search bar | |
search_query = st.text_input("Search for a model in HuggingFace", value="", max_chars=None, key=None, type="default") | |
if search_query != "": | |
filters = { | |
"library": library, | |
"task": task, | |
} | |
if search_backend == "hfapi": | |
res = hf_api(search_query, limit_results, sort_by, filters) | |
elif search_backend == "semantic": | |
res = semantic_search(search_query, limit_results, sort_by, filters) | |
elif search_backend == "bm25": | |
res = bm25_search(search_query, limit_results, sort_by, filters) | |
hit_list, hits_count = res["hits"], res["count"] | |
hit_list = [ | |
{ | |
"modelId": hit["modelId"], | |
"tags": hit["tags"], | |
"downloads": hit["downloads"], | |
"likes": hit["likes"], | |
"readme": hit.get("readme", None), | |
} | |
for hit in hit_list | |
] | |
if hit_list: | |
st.write(f"Search results ({hits_count}):") | |
if hits_count > 100: | |
shown_results = 100 | |
else: | |
shown_results = hits_count | |
for i, hit in paginator( | |
f"Select results (showing {shown_results} of {hits_count} results)", | |
hit_list, | |
): | |
col1, col2, col3 = st.columns([5, 1, 1]) | |
col1.metric("Model", hit["modelId"]) | |
col2.metric("NΒ° downloads", numerize(hit["downloads"]) if hit["downloads"] and not math.isnan(hit["downloads"]) else "N/A") | |
col3.metric("NΒ° likes", numerize(hit["likes"]) if hit["likes"] and not math.isnan(hit["likes"]) else "N/A") | |
st.button( | |
f"View model on π€", | |
on_click=lambda hit=hit: webbrowser.open(f"https://huggingface.co/{hit['modelId']}", new=2), | |
key=f"{i}-{hit['modelId']}", | |
) | |
st.write(f"**Tags:** {' β’ '.join(hit['tags'])}") | |
if hit["readme"]: | |
with st.expander("See README"): | |
st.write(hit["readme"]) | |
# TODO: embed huggingface spaces | |
# import streamlit.components.v1 as components | |
# components.html( | |
# f""" | |
# <link rel="stylesheet" href="https://gradio.s3-us-west-2.amazonaws.com/2.6.2/static/bundle.css"> | |
# <div id="target"></div> | |
# <script src="https://gradio.s3-us-west-2.amazonaws.com/2.6.2/static/bundle.js"></script> | |
# <script> | |
# launchGradioFromSpaces("abidlabs/question-answering", "#target") | |
# </script> | |
# """, | |
# height=400, | |
# ) | |
st.markdown("---") | |
else: | |
st.write(f"No Search results, please try again with different keywords") | |
st.markdown( | |
"<h6 style='text-align: center; color: #808080;'>Made with β€οΈ By <a href='https://github.com/NouamaneTazi'>Nouamane</a> - Checkout complete project <a href='https://github.com/NouamaneTazi/hf_search'>here</a></h6>", | |
unsafe_allow_html=True, | |
) |