patrickvonplaten commited on
Commit
9fec05e
·
1 Parent(s): b5dcb77
Files changed (1) hide show
  1. app.py +13 -8
app.py CHANGED
@@ -74,6 +74,7 @@ def match_code(lines, codewords):
74
 
75
  def main(filename, codewords_mapping):
76
  lines = retrieve_lines(filename)
 
77
 
78
  for label, codewords in codewords_mapping.items():
79
  match = match_code(lines, codewords)
@@ -98,28 +99,32 @@ def main(filename, codewords_mapping):
98
  with open(result_file, "a") as f:
99
  f.write(out)
100
 
 
 
 
 
 
101
  def convert(*keywords):
102
- # cached_folder = snapshot_download("claudiag/atlas", token=os.environ.get("HF_TOKEN"))
103
  codewords_mapping = {k: v for k,v in zip(CATEGORIES, keywords)}
104
 
105
  num_files = 0
106
 
107
- return "_".join(codewords_mapping.values())
108
-
109
- for folder in tqdm.tqdm(glob.glob("./*")):
110
- shutil.rmtree(RESULTS_FOLDER, ignore_errors=True)
111
- os.makedirs(RESULTS_FOLDER)
112
 
 
 
113
  all_files = tqdm.tqdm(glob.glob(f"./{folder}/*"))
114
  num_files += len(all_files)
115
 
116
  for filename in all_files:
117
  try:
118
- main(filename)
119
  except Exception as e:
120
  print(f"{filename} not working because \n {e}")
121
 
122
- return f"Retrieved from {num_files}"
123
 
124
  inputs = [gr.Textbox(label=f"Enter your keywords for {k}", max_lines=2, placeholder=CAT_TO_CODEWORDS[k]) for k in CATEGORIES]
125
 
 
74
 
75
  def main(filename, codewords_mapping):
76
  lines = retrieve_lines(filename)
77
+ files = []
78
 
79
  for label, codewords in codewords_mapping.items():
80
  match = match_code(lines, codewords)
 
99
  with open(result_file, "a") as f:
100
  f.write(out)
101
 
102
+ files.append(result_file)
103
+
104
+ return files
105
+
106
+
107
  def convert(*keywords):
108
+ cached_folder = snapshot_download("claudiag/atlas", token=os.environ.get("HF_TOKEN"))
109
  codewords_mapping = {k: v for k,v in zip(CATEGORIES, keywords)}
110
 
111
  num_files = 0
112
 
113
+ shutil.rmtree(RESULTS_FOLDER, ignore_errors=True)
114
+ os.makedirs(RESULTS_FOLDER)
 
 
 
115
 
116
+ result_files = []
117
+ for folder in tqdm.tqdm(glob.glob(os.path.join(cached_folder, "/*"))):
118
  all_files = tqdm.tqdm(glob.glob(f"./{folder}/*"))
119
  num_files += len(all_files)
120
 
121
  for filename in all_files:
122
  try:
123
+ result_files += main(filename)
124
  except Exception as e:
125
  print(f"{filename} not working because \n {e}")
126
 
127
+ return f"Retrieved from {num_files}"
128
 
129
  inputs = [gr.Textbox(label=f"Enter your keywords for {k}", max_lines=2, placeholder=CAT_TO_CODEWORDS[k]) for k in CATEGORIES]
130