gourisankar85 commited on
Commit
5b260bd
·
verified ·
1 Parent(s): efb5c9e

Upload 8 files

Browse files
scripts/download_files.py CHANGED
@@ -7,6 +7,9 @@ LOCAL_SAVE_PATH = "data" # Path where files will be saved
7
  GITHUB_API_URL = "https://api.github.com/repos/chen700564/RGB/contents/data"
8
  RAW_BASE_URL = "https://raw.githubusercontent.com/chen700564/RGB/master/data/"
9
 
 
 
 
10
  def get_file_list():
11
  """Fetch the list of files from the GitHub repository."""
12
  response = requests.get(GITHUB_API_URL)
@@ -30,7 +33,6 @@ def download_file(file_name):
30
 
31
  file_url = RAW_BASE_URL + file_name
32
  local_file_path = os.path.join(LOCAL_SAVE_PATH, file_name)
33
-
34
  response = requests.get(file_url, stream=True)
35
  if response.status_code == 200:
36
  total_size = int(response.headers.get("content-length", 0))
 
7
  GITHUB_API_URL = "https://api.github.com/repos/chen700564/RGB/contents/data"
8
  RAW_BASE_URL = "https://raw.githubusercontent.com/chen700564/RGB/master/data/"
9
 
10
+ # Ensure the directory exists before downloading
11
+ os.makedirs(LOCAL_SAVE_PATH, exist_ok=True)
12
+
13
  def get_file_list():
14
  """Fetch the list of files from the GitHub repository."""
15
  response = requests.get(GITHUB_API_URL)
 
33
 
34
  file_url = RAW_BASE_URL + file_name
35
  local_file_path = os.path.join(LOCAL_SAVE_PATH, file_name)
 
36
  response = requests.get(file_url, stream=True)
37
  if response.status_code == 200:
38
  total_size = int(response.headers.get("content-length", 0))
scripts/evaluate_factual_robustness.py CHANGED
@@ -9,15 +9,15 @@ from scripts.prompt import get_factual_prompt
9
 
10
  def evaluate_factual_robustness(config):
11
  """Evaluates negative rejection for a given model by processing predictions and computing scores."""
12
- config["noise_rate"] = 0.4 # Time being to do clarification
13
- modelname = config["model_name"]
14
- noise_rate = config["noise_rate"]
15
- passage_num = config["passage_num"]
16
 
17
- if config["model_name"] in config["models"]:
18
- model = GroqClient(plm=config["model_name"])
19
  else:
20
- logging.warning(f"Skipping unknown model: {config["model_name"]}")
21
  return
22
 
23
  # File paths
@@ -84,7 +84,7 @@ def evaluate_factual_robustness(config):
84
  'rejecttt':rejecttt,
85
  'correct_tt':correct_tt,
86
  'nums': len(results),
87
- 'noise_rate': config["noise_rate"],
88
  }
89
  return scores
90
 
 
9
 
10
  def evaluate_factual_robustness(config):
11
  """Evaluates negative rejection for a given model by processing predictions and computing scores."""
12
+ config['noise_rate'] = 0.4 # Time being to do clarification
13
+ modelname = config['model_name']
14
+ noise_rate = config['noise_rate']
15
+ passage_num = config['passage_num']
16
 
17
+ if config['model_name'] in config["models"]:
18
+ model = GroqClient(plm=config['model_name'])
19
  else:
20
+ logging.warning(f"Skipping unknown model: {config['model_name']}")
21
  return
22
 
23
  # File paths
 
84
  'rejecttt':rejecttt,
85
  'correct_tt':correct_tt,
86
  'nums': len(results),
87
+ 'noise_rate': config['noise_rate'],
88
  }
89
  return scores
90
 
scripts/evaluate_information_integration.py CHANGED
@@ -11,11 +11,11 @@ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(
11
  # Improved function to evaluate noise robustness
12
  def evaluate_information_integration(config):
13
  result_path = config["result_path"] + 'Information Integration/'
14
- noise_rate = config["noise_rate"]
15
- passage_num = config["passage_num"]
16
 
17
  # Iterate over each model specified in the config
18
- filename = os.path.join(result_path, f'prediction_{config["model_name"]}_noise_{noise_rate}_passage_{passage_num}.json')
19
  ensure_directory_exists(filename)
20
 
21
  # Load existing results if file exists
@@ -45,7 +45,7 @@ def evaluate_information_integration(config):
45
 
46
  # Save the final score file with tt and all_rate
47
  scores = {
48
- 'model': config["model_name"],
49
  'accuracy': accuracy,
50
  'noise_rate': noise_rate,
51
  'correct_count': correct_count,
@@ -56,7 +56,7 @@ def evaluate_information_integration(config):
56
  logging.info(f"Score: {scores}")
57
  logging.info(f"Information Integration Accuracy: {accuracy:.2%}")
58
 
59
- score_filename = os.path.join(result_path, f'scores_{config["model_name"]}_noise_{noise_rate}_passage_{passage_num}.json')
60
  with open(score_filename, 'w') as f:
61
  json.dump(scores, f, ensure_ascii=False, indent=4)
62
 
 
11
  # Improved function to evaluate noise robustness
12
  def evaluate_information_integration(config):
13
  result_path = config["result_path"] + 'Information Integration/'
14
+ noise_rate = config['noise_rate']
15
+ passage_num = config['passage_num']
16
 
17
  # Iterate over each model specified in the config
18
+ filename = os.path.join(result_path, f'prediction_{config['model_name']}_noise_{noise_rate}_passage_{passage_num}.json')
19
  ensure_directory_exists(filename)
20
 
21
  # Load existing results if file exists
 
45
 
46
  # Save the final score file with tt and all_rate
47
  scores = {
48
+ 'model': config['model_name'],
49
  'accuracy': accuracy,
50
  'noise_rate': noise_rate,
51
  'correct_count': correct_count,
 
56
  logging.info(f"Score: {scores}")
57
  logging.info(f"Information Integration Accuracy: {accuracy:.2%}")
58
 
59
+ score_filename = os.path.join(result_path, f'scores_{config['model_name']}_noise_{noise_rate}_passage_{passage_num}.json')
60
  with open(score_filename, 'w') as f:
61
  json.dump(scores, f, ensure_ascii=False, indent=4)
62
 
scripts/evaluate_negative_rejection.py CHANGED
@@ -10,15 +10,15 @@ from scripts.prompt import get_prompt
10
 
11
  def evaluate_negative_rejection(config):
12
  """Evaluates negative rejection for a given model by processing predictions and computing scores."""
13
- config["noise_rate"] = 1.0 # Noise rate should be 1.0 for negative rejection evaluation
14
- modelname = config["model_name"]
15
- noise_rate = config["noise_rate"]
16
- passage_num = config["passage_num"]
17
 
18
- if config["model_name"] in config["models"]:
19
- model = GroqClient(plm=config["model_name"])
20
  else:
21
- logging.warning(f"Skipping unknown model: {config["model_name"]}")
22
  return
23
 
24
  # File paths
 
10
 
11
  def evaluate_negative_rejection(config):
12
  """Evaluates negative rejection for a given model by processing predictions and computing scores."""
13
+ config['noise_rate'] = 1.0 # Noise rate should be 1.0 for negative rejection evaluation
14
+ modelname = config['model_name']
15
+ noise_rate = config['noise_rate']
16
+ passage_num = config['passage_num']
17
 
18
+ if config['model_name'] in config["models"]:
19
+ model = GroqClient(plm=config['model_name'])
20
  else:
21
+ logging.warning(f"Skipping unknown model: {config['model_name']}")
22
  return
23
 
24
  # File paths
scripts/evaluate_noise_robustness.py CHANGED
@@ -11,11 +11,11 @@ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(
11
  # Improved function to evaluate noise robustness
12
  def evaluate_noise_robustness(config):
13
  result_path = config["result_path"] + 'Noise Robustness/'
14
- noise_rate = config["noise_rate"]
15
- passage_num = config["passage_num"]
16
 
17
  # Iterate over each model specified in the config
18
- filename = os.path.join(result_path, f'prediction_{config["model_name"]}_noise_{noise_rate}_passage_{passage_num}.json')
19
  ensure_directory_exists(filename)
20
 
21
  # Load existing results if file exists
@@ -45,7 +45,7 @@ def evaluate_noise_robustness(config):
45
 
46
  # Save the final score file with tt and all_rate
47
  scores = {
48
- 'model': config["model_name"],
49
  'accuracy': accuracy,
50
  'noise_rate': noise_rate,
51
  'correct_count': correct_count,
@@ -56,7 +56,7 @@ def evaluate_noise_robustness(config):
56
  logging.info(f"score: {scores}")
57
  logging.info(f"Noise Robustness Accuracy: {accuracy:.2%}")
58
 
59
- score_filename = os.path.join(result_path, f'scores_{config["model_name"]}_noise_{noise_rate}_passage_{passage_num}.json')
60
  with open(score_filename, 'w') as f:
61
  json.dump(scores, f, ensure_ascii=False, indent=4)
62
 
 
11
  # Improved function to evaluate noise robustness
12
  def evaluate_noise_robustness(config):
13
  result_path = config["result_path"] + 'Noise Robustness/'
14
+ noise_rate = config['noise_rate']
15
+ passage_num = config['passage_num']
16
 
17
  # Iterate over each model specified in the config
18
+ filename = os.path.join(result_path, f'prediction_{config['model_name']}_noise_{noise_rate}_passage_{passage_num}.json')
19
  ensure_directory_exists(filename)
20
 
21
  # Load existing results if file exists
 
45
 
46
  # Save the final score file with tt and all_rate
47
  scores = {
48
+ 'model': config['model_name'],
49
  'accuracy': accuracy,
50
  'noise_rate': noise_rate,
51
  'correct_count': correct_count,
 
56
  logging.info(f"score: {scores}")
57
  logging.info(f"Noise Robustness Accuracy: {accuracy:.2%}")
58
 
59
+ score_filename = os.path.join(result_path, f'scores_{config['model_name']}_noise_{noise_rate}_passage_{passage_num}.json')
60
  with open(score_filename, 'w') as f:
61
  json.dump(scores, f, ensure_ascii=False, indent=4)
62
 
scripts/get_factual_evaluation.py CHANGED
@@ -11,11 +11,11 @@ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(
11
  # Improved function to evaluate noise robustness
12
  def get_factual_evaluation(config):
13
  result_path = config["result_path"] + 'Counterfactual Robustness/'
14
- noise_rate = config["noise_rate"]
15
- passage_num = config["passage_num"]
16
 
17
  # Iterate over each model specified in the config
18
- filename = os.path.join(result_path, f'prediction_{config["model_name"]}_noise_{noise_rate}_passage_{passage_num}.json')
19
  ensure_directory_exists(filename)
20
 
21
  # Load existing results if file exists
@@ -61,7 +61,7 @@ def get_factual_evaluation(config):
61
  scores['correct_tt'] = correct_tt
62
 
63
  #logging.info(f"score: {scores}")
64
- score_filename = os.path.join(result_path, f'scores_{config["model_name"]}_noise_{noise_rate}_passage_{passage_num}.json')
65
  with open(score_filename, 'w') as f:
66
  json.dump(scores, f, ensure_ascii=False, indent=4)
67
 
 
11
  # Improved function to evaluate noise robustness
12
  def get_factual_evaluation(config):
13
  result_path = config["result_path"] + 'Counterfactual Robustness/'
14
+ noise_rate = config['noise_rate']
15
+ passage_num = config['passage_num']
16
 
17
  # Iterate over each model specified in the config
18
+ filename = os.path.join(result_path, f'prediction_{config['model_name']}_noise_{noise_rate}_passage_{passage_num}.json')
19
  ensure_directory_exists(filename)
20
 
21
  # Load existing results if file exists
 
61
  scores['correct_tt'] = correct_tt
62
 
63
  #logging.info(f"score: {scores}")
64
+ score_filename = os.path.join(result_path, f'scores_{config['model_name']}_noise_{noise_rate}_passage_{passage_num}.json')
65
  with open(score_filename, 'w') as f:
66
  json.dump(scores, f, ensure_ascii=False, indent=4)
67
 
scripts/get_prediction_result.py CHANGED
@@ -13,17 +13,17 @@ def get_prediction_result(config, data_file_name):
13
  results = []
14
  dataset = load_dataset(data_file_name)
15
  # Create GroqClient instance for supported models
16
- if config["model_name"] in config["models"]:
17
- model = GroqClient(plm=config["model_name"])
18
  else:
19
- logging.warning(f"Skipping unknown model: {config["model_name"]}")
20
  return
21
 
22
  # Iterate through dataset and process queries
23
- for idx, instance in enumerate(dataset[:config["num_queries"]], start=0):
24
- logging.info(f"Executing Query {idx + 1} for Model: {config["model_name"]}")
25
 
26
- query, ans, docs = process_data(instance, config["noise_rate"], config["passage_num"], data_file_name)
27
 
28
  # Retry mechanism for prediction
29
  for attempt in range(1, config["retry_attempts"] + 1):
@@ -46,7 +46,7 @@ def get_prediction_result(config, data_file_name):
46
  'label': label,
47
  'prediction': prediction,
48
  'docs': docs,
49
- 'noise_rate': config["noise_rate"],
50
  'factlabel': factlabel
51
  }
52
  results.append(new_instance)
 
13
  results = []
14
  dataset = load_dataset(data_file_name)
15
  # Create GroqClient instance for supported models
16
+ if config['model_name'] in config["models"]:
17
+ model = GroqClient(plm=config['model_name'])
18
  else:
19
+ logging.warning(f"Skipping unknown model: {config['model_name']}")
20
  return
21
 
22
  # Iterate through dataset and process queries
23
+ for idx, instance in enumerate(dataset[:config['num_queries']], start=0):
24
+ logging.info(f"Executing Query {idx + 1} for Model: {config['model_name']}")
25
 
26
+ query, ans, docs = process_data(instance, config['noise_rate'], config['passage_num'], data_file_name)
27
 
28
  # Retry mechanism for prediction
29
  for attempt in range(1, config["retry_attempts"] + 1):
 
46
  'label': label,
47
  'prediction': prediction,
48
  'docs': docs,
49
+ 'noise_rate': config['noise_rate'],
50
  'factlabel': factlabel
51
  }
52
  results.append(new_instance)
scripts/helper.py CHANGED
@@ -31,11 +31,11 @@ def update_config(config, model_name=None, noise_rate=None, num_queries=None):
31
  dict: The updated configuration dictionary.
32
  """
33
  if model_name:
34
- config["model_name"] = model_name
35
  if noise_rate is not None: # Explicitly check for None to handle 0.0
36
- config["noise_rate"] = float(noise_rate) # Ensure it's a float
37
  if num_queries is not None: # Explicitly check for None to handle 0
38
- config["num_queries"] = int(num_queries) # Ensure it's an integer
39
  return config
40
 
41
  def load_dataset(file_name):
 
31
  dict: The updated configuration dictionary.
32
  """
33
  if model_name:
34
+ config['model_name'] = model_name
35
  if noise_rate is not None: # Explicitly check for None to handle 0.0
36
+ config['noise_rate'] = float(noise_rate) # Ensure it's a float
37
  if num_queries is not None: # Explicitly check for None to handle 0
38
+ config['num_queries'] = int(num_queries) # Ensure it's an integer
39
  return config
40
 
41
  def load_dataset(file_name):