awacke1 commited on
Commit
a9c540f
·
verified ·
1 Parent(s): b0def0f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -4
app.py CHANGED
@@ -2,13 +2,13 @@ import streamlit as st
2
  from datasets import load_dataset
3
  from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
4
  import torch
 
5
 
6
  def load_orca_dataset():
7
  st.info("Loading dataset... This may take a while.")
8
  return load_dataset("microsoft/orca-agentinstruct-1M-v1")
9
 
10
  @st.cache_data
11
-
12
  def load_model_and_tokenizer(model_name):
13
  tokenizer = AutoTokenizer.from_pretrained(model_name)
14
  model = AutoModelForSequenceClassification.from_pretrained(model_name)
@@ -40,11 +40,17 @@ def main():
40
  if "dataset" in st.session_state:
41
  dataset = st.session_state["dataset"]
42
 
 
 
 
 
 
43
  st.subheader("Dataset Explorer")
44
- st.write(dataset["train"].info)
 
45
 
46
  sample_size = st.slider("Number of Samples to Display", min_value=1, max_value=20, value=5)
47
- st.write(dataset["train"].shuffle(seed=42).select(range(sample_size)))
48
 
49
  st.subheader("Model Evaluator")
50
  model_name = st.text_input("Enter Hugging Face Model Name", value="distilbert-base-uncased-finetuned-sst-2-english")
@@ -53,7 +59,7 @@ def main():
53
  if st.button("Load Model and Evaluate"):
54
  tokenizer, model = load_model_and_tokenizer(model_name)
55
 
56
- results = evaluate_model(dataset["train"].shuffle(seed=42).select(range(max_samples)), tokenizer, model, max_samples)
57
 
58
  st.subheader("Evaluation Results")
59
  st.write(results)
 
2
  from datasets import load_dataset
3
  from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
4
  import torch
5
+ import pandas as pd
6
 
7
  def load_orca_dataset():
8
  st.info("Loading dataset... This may take a while.")
9
  return load_dataset("microsoft/orca-agentinstruct-1M-v1")
10
 
11
  @st.cache_data
 
12
  def load_model_and_tokenizer(model_name):
13
  tokenizer = AutoTokenizer.from_pretrained(model_name)
14
  model = AutoModelForSequenceClassification.from_pretrained(model_name)
 
40
  if "dataset" in st.session_state:
41
  dataset = st.session_state["dataset"]
42
 
43
+ # List available splits
44
+ available_splits = list(dataset.keys())
45
+ st.sidebar.subheader("Available Dataset Splits")
46
+ selected_split = st.sidebar.selectbox("Select Split", available_splits)
47
+
48
  st.subheader("Dataset Explorer")
49
+ st.write(f"Displaying information for split: `{selected_split}`")
50
+ st.write(dataset[selected_split].info)
51
 
52
  sample_size = st.slider("Number of Samples to Display", min_value=1, max_value=20, value=5)
53
+ st.write(dataset[selected_split].shuffle(seed=42).select(range(sample_size)))
54
 
55
  st.subheader("Model Evaluator")
56
  model_name = st.text_input("Enter Hugging Face Model Name", value="distilbert-base-uncased-finetuned-sst-2-english")
 
59
  if st.button("Load Model and Evaluate"):
60
  tokenizer, model = load_model_and_tokenizer(model_name)
61
 
62
+ results = evaluate_model(dataset[selected_split].shuffle(seed=42).select(range(max_samples)), tokenizer, model, max_samples)
63
 
64
  st.subheader("Evaluation Results")
65
  st.write(results)