Spaces:
Sleeping
Sleeping
File size: 8,481 Bytes
19c197e 314e308 19c197e be35644 19c197e ce348fb 43baccc 616fdd5 43baccc a254087 38a598e 616fdd5 19c197e 4d7bc24 5111500 4d7bc24 f3fb7cf bc64858 19c197e a0f9cd0 68a533c f3fb7cf 2daa8ec 19c197e f0338f9 616fdd5 f0338f9 bd326a6 76e0010 9f55c0c 06f19b7 b1416f5 06f19b7 b3f2aff 8587a98 b3f2aff 06f19b7 b3f2aff 76e0010 587bb73 b3f2aff 9483377 314e308 828a1a3 314e308 c4a3b6b 314e308 587bb73 314e308 76e0010 616fdd5 587bb73 76e0010 587bb73 4a1ee55 770c64d b1416f5 3466d28 5169878 4e013ca b1416f5 35a46f6 b1416f5 12fc320 b1416f5 bc64858 63674ad 310b704 63674ad 1432853 63674ad 310b704 63674ad bc64858 63674ad 1f71f79 63674ad 1f71f79 63674ad 1f71f79 2ea496c 19c197e 2ea496c 992e84a 165fe32 19c197e 165fe32 19c197e 165fe32 19c197e 165fe32 19c197e 165fe32 19c197e 165fe32 bd326a6 165fe32 ed0e30c 35a46f6 d31ee13 0effeae d31ee13 0c419b8 d31ee13 0effeae d31ee13 19c197e bd326a6 2daa8ec |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 |
import streamlit as st
from transformers import pipeline
from io import StringIO
unmasker = pipeline('fill-mask', model='dsfsi/zabantu-sot-ven-170m')
st.set_page_config(layout="wide")
def fill_mask(sentences):
results = {}
warnings = []
for sentence in sentences:
if "<mask>" in sentence:
unmasked = unmasker(sentence)
results[sentence] = unmasked
else:
warnings.append(f"Warning: No <mask> token found in sentence: {sentence}")
return results, warnings
def replace_mask(sentence, predicted_word):
return sentence.replace("<mask>", f"**{predicted_word}**")
st.write(f"")
img1, img2, img3 = st.columns(3)
with img2:
with st.container(border=False):
st.image("logo_transparent_small.png")
st.markdown("""
<div style='text-align: center;'>
<a href='https://github.com/dsfsi' target='_blank'>Github</a> |
<a href='https://docs.google.com/forms/d/e/1FAIpQLSf7S36dyAUPx2egmXbFpnTBuzoRulhL5Elu-N1eoMhaO7v10w/viewform' target='_blank'>Feedback Form</a> |
<a href='https://huggingface.co/papers/1911.02116' target='_blank'>arxiv</a>
</div>
""", unsafe_allow_html=True)
st.markdown("""
<div style='text-align: center;'>
<h2>Fill Mask | Zabantu-sot-ven-170m</h2>
</div>
""", unsafe_allow_html=True)
st.write(f"")
st.markdown("This is a variant of Zabantu pre-trained on a multilingual dataset of Tshivenda(ven) and Sotho family(Northern Sotho, Southern Sotho, Setswana) sentences on a transformer network with 170 million traininable parameters.")
with st.expander("More information about the space"):
st.write('''
Authors: Alexis Conneau, Kartikay Khandelwal, Naman Goyal, Vishrav Chaudhary, Guillaume Wenzek, Francisco Guzmán, Edouard Grave, Myle Ott, Luke Zettlemoyer, Veselin Stoyanov
''')
cit1,cit2 = st.columns(2)
# with cit1:
# with cit2:
col1, col2 = st.columns(2)
if 'text_input' not in st.session_state:
st.session_state['text_input'] = ""
if 'warnings' not in st.session_state:
st.session_state['warnings'] = []
with col1:
with st.container(border=True):
st.markdown("Input :clipboard:")
select_options = ['Choose option', 'Enter text input', 'Upload a file(csv/txt)']
sample_sentence = "Vhana vhane vha kha ḓi bva u bebwa vha kha khombo ya u <mask> nga Listeriosis."
option_selected = st.selectbox(f"Select an input option:", select_options, index=0)
if option_selected == 'Enter text input':
text_input = st.text_area(
"Enter sentences with <mask> token(one sentence per line):",
value=st.session_state['text_input']
)
input_sentences = text_input.split("\n")
if st.button("Submit",use_container_width=True):
result, warnings = fill_mask(input_sentences)
st.session_state['warnings'] = warnings
if option_selected == 'Upload a file(csv/txt)':
uploaded_file = st.file_uploader("Choose a file-(one sentence per line)")
if uploaded_file is not None:
stringio = StringIO(uploaded_file.getvalue().decode("utf-8"))
string_data = stringio.read()
input_sentences = string_data.split("\n")
if st.button("Submit",use_container_width=True):
result, warnings = fill_mask(input_sentences)
st.session_state['warnings'] = warnings
if st.session_state['warnings']:
for warning in st.session_state['warnings']:
st.warning(warning)
st.markdown("Example")
st.code(sample_sentence, wrap_lines=True)
if st.button("Test Example",use_container_width=True):
result, warnings = fill_mask(sample_sentence.split("\n"))
with col2:
with st.container(border=True):
st.markdown("Output :bar_chart:")
if 'result' in locals() and result:
if len(result) == 1:
for sentence, predictions in result.items():
for prediction in predictions:
predicted_word = prediction['token_str']
score = prediction['score'] * 100
st.markdown(f"""
<div class="bar">
<div class="bar-fill" style="width: {score}%;"></div>
</div>
<div class="container">
<div style="align-items: left;">{predicted_word}</div>
<div style="align-items: center;">{score:.2f}%</div>
</div>
""", unsafe_allow_html=True)
else:
index = 0
for sentence, predictions in result.items():
index += 1
if predictions:
top_prediction = predictions[0]
predicted_word = top_prediction['token_str']
score = top_prediction['score'] * 100
st.markdown(f"""
<div class="bar">
<div class="bar-fill" style="width: {score}%;"></div>
</div>
<div class="container">
<div style="align-items: left;">{predicted_word} (line {index})</div>
<div style="align-items: right;">{score:.2f}%</div>
</div>
""", unsafe_allow_html=True)
if 'result' in locals():
if result:
line = 0
for sentence, predictions in result.items():
line += 1
predicted_word = predictions[0]['token_str']
full_sentence = replace_mask(sentence, predicted_word)
st.write(f"**Sentence {line}:** {full_sentence }")
css = """
<style>
footer {display:none !important;}
.gr-button-primary {
z-index: 14;
height: 43px;
width: 130px;
left: 0px;
top: 0px;
padding: 0px;
cursor: pointer !important;
background: none rgb(17, 20, 45) !important;
border: none !important;
text-align: center !important;
font-family: Poppins !important;
font-size: 14px !important;
font-weight: 500 !important;
color: rgb(255, 255, 255) !important;
line-height: 1 !important;
border-radius: 12px !important;
transition: box-shadow 200ms ease 0s, background 200ms ease 0s !important;
box-shadow: none !important;
}
.gr-button-primary:hover{
z-index: 14;
height: 43px;
width: 130px;
left: 0px;
top: 0px;
padding: 0px;
cursor: pointer !important;
background: none rgb(66, 133, 244) !important;
border: none !important;
text-align: center !important;
font-family: Poppins !important;
font-size: 14px !important;
font-weight: 500 !important;
color: rgb(255, 255, 255) !important;
line-height: 1 !important;
border-radius: 12px !important;
transition: box-shadow 200ms ease 0s, background 200ms ease 0s !important;
box-shadow: rgb(0 0 0 / 23%) 0px 1px 7px 0px !important;
}
.hover\:bg-orange-50:hover {
--tw-bg-opacity: 1 !important;
background-color: rgb(229,225,255) !important;
}
.to-orange-200 {
--tw-gradient-to: rgb(37 56 133 / 37%) !important;
}
.from-orange-400 {
--tw-gradient-from: rgb(17, 20, 45) !important;
--tw-gradient-to: rgb(255 150 51 / 0);
--tw-gradient-stops: var(--tw-gradient-from), var(--tw-gradient-to) !important;
}
.group-hover\:from-orange-500{
--tw-gradient-from:rgb(17, 20, 45) !important;
--tw-gradient-to: rgb(37 56 133 / 37%);
--tw-gradient-stops: var(--tw-gradient-from), var(--tw-gradient-to) !important;
}
.group:hover .group-hover\:text-orange-500{
--tw-text-opacity: 1 !important;
color:rgb(37 56 133 / var(--tw-text-opacity)) !important;
}
.container {
display: flex;
justify-content: space-between;
align-items: center;
margin-bottom: 5px;
width: 100%;
}
.bar {
# width: 70%;
background-color: #e6e6e6;
border-radius: 12px;
overflow: hidden;
margin-right: 10px;
height: 5px;
}
.bar-fill {
background-color: #17152e;
height: 100%;
border-radius: 12px;
}
</style>
"""
st.markdown(css, unsafe_allow_html=True) |