RKocielnik commited on
Commit
af9829f
·
verified ·
1 Parent(s): b829dd9

update sentence manager to new huggingface_hub

Browse files
Files changed (1) hide show
  1. mgr_sentences.py +27 -3
mgr_sentences.py CHANGED
@@ -4,6 +4,7 @@ import re
4
  import pandas as pd
5
  import numpy as np
6
  import glob
 
7
  import huggingface_hub
8
  print("hfh", huggingface_hub.__version__)
9
  from huggingface_hub import hf_hub_download, upload_file, delete_file, snapshot_download, list_repo_files, dataset_info
@@ -64,6 +65,18 @@ def saveSentences(sentences_df):
64
  print(f"Org size: {grp_saved_df.shape[0]}, Mrg size: {new_grp_df.shape[0]}")
65
  store_group_sentences(filename, new_grp_df)
66
 
 
 
 
 
 
 
 
 
 
 
 
 
67
 
68
  # https://huggingface.co/spaces/elonmuskceo/persistent-data/blob/main/app.py
69
  def get_sentence_csv(file_path: str):
@@ -82,11 +95,22 @@ def get_sentence_csv(file_path: str):
82
  # file not found
83
  print(f"file not found, probably: {e}")
84
 
85
- files=glob.glob(f"./{LOCAL_DATA_DIRNAME}/", recursive=True)
 
 
 
 
 
 
 
86
  print("Files glob: "+', '.join(files))
 
 
 
 
 
87
  #print("Save file:" + str(os.path.basename(file_path)))
88
-
89
- df = pd.read_csv(os.path.join(LOCAL_DATA_DIRNAME, os.path.basename(file_path)), encoding='UTF8')
90
 
91
  return df
92
 
 
4
  import pandas as pd
5
  import numpy as np
6
  import glob
7
+ from pathlib import Path
8
  import huggingface_hub
9
  print("hfh", huggingface_hub.__version__)
10
  from huggingface_hub import hf_hub_download, upload_file, delete_file, snapshot_download, list_repo_files, dataset_info
 
65
  print(f"Org size: {grp_saved_df.shape[0]}, Mrg size: {new_grp_df.shape[0]}")
66
  store_group_sentences(filename, new_grp_df)
67
 
68
+ def list_folders_sorted_by_date(path):
69
+ # Convert string path to a Path object
70
+ directory = Path(path)
71
+
72
+ # Get all folders in the given directory
73
+ folders = [f for f in directory.iterdir() if f.is_dir()]
74
+
75
+ # Sort folders by modification time, most recent first
76
+ sorted_folders = sorted(folders, key=lambda x: x.stat().st_mtime, reverse=True)
77
+
78
+ # Return folder names
79
+ return [folder.name for folder in sorted_folders]
80
 
81
  # https://huggingface.co/spaces/elonmuskceo/persistent-data/blob/main/app.py
82
  def get_sentence_csv(file_path: str):
 
95
  # file not found
96
  print(f"file not found, probably: {e}")
97
 
98
+ ds_local_path = os.path.join(LOCAL_DATA_DIRNAME,
99
+ "datasets--AnimaLab--bias-test-gpt-sentences",
100
+ "snapshots")
101
+ folders_sorted = list_folders_sorted_by_date(ds_local_path)
102
+ print("---SENTENCE FOLDERS---")
103
+ print(os.path.join(ds_local_path, folders_sorted[0]))
104
+
105
+ files=glob.glob(os.path.join(ds_local_path, folders_sorted[0], file_path), recursive=True)
106
  print("Files glob: "+', '.join(files))
107
+
108
+ df = pd.read_csv(os.path.join(ds_local_path, folders_sorted[0], file_path), encoding='UTF8')
109
+
110
+ #files=glob.glob(f"./{LOCAL_DATA_DIRNAME}/", recursive=True)
111
+ #print("Files glob: "+', '.join(files))
112
  #print("Save file:" + str(os.path.basename(file_path)))
113
+ #df = pd.read_csv(os.path.join(LOCAL_DATA_DIRNAME, os.path.basename(file_path)), encoding='UTF8')
 
114
 
115
  return df
116