rahideer commited on
Commit
9c05cea
Β·
verified Β·
1 Parent(s): 6492b34

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -26
app.py CHANGED
@@ -4,50 +4,56 @@ import os
4
  import pandas as pd
5
  from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
6
 
7
- # Paths
8
  ZIP_FILE = "xnli-multilingual-nli-dataset.zip"
9
  EXTRACT_DIR = "extracted_data"
10
 
11
  @st.cache_data
12
- def extract_and_load():
13
  if not os.path.exists(EXTRACT_DIR):
 
 
14
  with zipfile.ZipFile(ZIP_FILE, "r") as zip_ref:
15
  zip_ref.extractall(EXTRACT_DIR)
16
- csv_files = [f for f in os.listdir(EXTRACT_DIR) if f.endswith('.csv')]
17
- return csv_files
18
 
19
- # Load model and tokenizer
20
  @st.cache_resource
21
- def load_model():
22
  tokenizer = AutoTokenizer.from_pretrained("MoritzLaurer/mDeBERTa-v3-base-mnli-xnli")
23
  model = AutoModelForSequenceClassification.from_pretrained("MoritzLaurer/mDeBERTa-v3-base-mnli-xnli")
24
- nli_pipeline = pipeline("text-classification", model=model, tokenizer=tokenizer)
25
- return nli_pipeline
26
-
27
- st.set_page_config(page_title="Multilingual NLI App", layout="centered")
28
 
 
29
  st.title("🌍 Multilingual NLI (Natural Language Inference) Explorer")
30
  st.markdown("Upload premise & hypothesis pairs or use the dataset to explore entailment, contradiction, or neutrality.")
31
 
32
- nli_pipeline = load_model()
 
 
 
 
 
33
 
34
- csv_files = extract_and_load()
35
- selected_csv = st.selectbox("Choose a language CSV file:", csv_files)
36
 
37
- df = pd.read_csv(os.path.join(EXTRACT_DIR, selected_csv)).dropna()
38
- sample_df = df.sample(5).reset_index(drop=True)
 
39
 
40
- st.subheader("Sample from Dataset")
41
- st.dataframe(sample_df[['premise', 'hypothesis', 'label']])
42
 
43
- st.subheader("πŸ” Run Inference")
44
- index = st.number_input("Select Sample Index", min_value=0, max_value=len(sample_df)-1, value=0, step=1)
45
- premise = sample_df.loc[index, 'premise']
46
- hypothesis = sample_df.loc[index, 'hypothesis']
47
 
48
- st.markdown(f"**Premise:** {premise}")
49
- st.markdown(f"**Hypothesis:** {hypothesis}")
50
 
51
- if st.button("Run NLI Prediction"):
52
- result = nli_pipeline(f"{premise} </s> {hypothesis}")
53
- st.success(f"**Prediction:** {result[0]['label']} (Score: {result[0]['score']:.2f})")
 
 
 
 
4
  import pandas as pd
5
  from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
6
 
 
7
  ZIP_FILE = "xnli-multilingual-nli-dataset.zip"
8
  EXTRACT_DIR = "extracted_data"
9
 
10
  @st.cache_data
11
+ def extract_and_list_csv_files():
12
  if not os.path.exists(EXTRACT_DIR):
13
+ if not os.path.exists(ZIP_FILE):
14
+ return []
15
  with zipfile.ZipFile(ZIP_FILE, "r") as zip_ref:
16
  zip_ref.extractall(EXTRACT_DIR)
17
+ return [f for f in os.listdir(EXTRACT_DIR) if f.endswith('.csv')]
 
18
 
 
19
  @st.cache_resource
20
+ def load_nli_model():
21
  tokenizer = AutoTokenizer.from_pretrained("MoritzLaurer/mDeBERTa-v3-base-mnli-xnli")
22
  model = AutoModelForSequenceClassification.from_pretrained("MoritzLaurer/mDeBERTa-v3-base-mnli-xnli")
23
+ return pipeline("text-classification", model=model, tokenizer=tokenizer)
 
 
 
24
 
25
+ st.set_page_config(page_title="Multilingual NLI Explorer", layout="centered")
26
  st.title("🌍 Multilingual NLI (Natural Language Inference) Explorer")
27
  st.markdown("Upload premise & hypothesis pairs or use the dataset to explore entailment, contradiction, or neutrality.")
28
 
29
+ csv_files = extract_and_list_csv_files()
30
+
31
+ if not csv_files:
32
+ st.warning("⚠️ No CSV files found. Please make sure `xnli-multilingual-nli-dataset.zip` is uploaded.")
33
+ else:
34
+ selected_csv = st.selectbox("Choose a language CSV file:", csv_files)
35
 
36
+ if selected_csv:
37
+ file_path = os.path.join(EXTRACT_DIR, selected_csv)
38
 
39
+ try:
40
+ df = pd.read_csv(file_path).dropna()
41
+ sample_df = df.sample(min(5, len(df))).reset_index(drop=True)
42
 
43
+ st.subheader("πŸ“„ Sample from Dataset")
44
+ st.dataframe(sample_df[['premise', 'hypothesis', 'label']])
45
 
46
+ st.subheader("πŸ” Run Inference")
47
+ index = st.number_input("Select Sample Index", min_value=0, max_value=len(sample_df)-1, value=0, step=1)
48
+ premise = sample_df.loc[index, 'premise']
49
+ hypothesis = sample_df.loc[index, 'hypothesis']
50
 
51
+ st.markdown(f"**Premise:** {premise}")
52
+ st.markdown(f"**Hypothesis:** {hypothesis}")
53
 
54
+ if st.button("Run NLI Prediction"):
55
+ nli_pipeline = load_nli_model()
56
+ result = nli_pipeline(f"{premise} </s> {hypothesis}")
57
+ st.success(f"**Prediction:** {result[0]['label']} (Score: {result[0]['score']:.2f})")
58
+ except Exception as e:
59
+ st.error(f"❌ Error reading CSV file: {e}")