patrickvonplaten commited on
Commit
d71a81f
·
1 Parent(s): 713b4ba
Files changed (1) hide show
  1. app.py +16 -2
app.py CHANGED
@@ -5,6 +5,7 @@ import shutil
5
  import os
6
  import tqdm
7
  from huggingface_hub import snapshot_download
 
8
  import tempfile
9
  import re
10
  import pdfminer
@@ -12,12 +13,18 @@ import pdfminer
12
  print("pdfminer", pdfminer.__version__)
13
  print("pandoc", pypandoc.__version__)
14
 
 
 
 
 
 
 
15
  #from docx import Document
16
  #document = Document()
17
  #document.add_heading('Labels for ', level=1)
18
  RESULTS_FOLDER = "./results"
19
 
20
- DOC_FOLDER = snapshot_download("claudiag/atlas", token=os.environ.get("HF_TOKEN"), repo_type="dataset")
21
 
22
  CAT_TO_CODEWORDS = {
23
  "Prejudices": ["prejudice", "judge", "preconceive", "stigma", "assumption", "assume", "misunderstanding", "unexamined", "distorted", "clear", "compar"],
@@ -127,7 +134,14 @@ def convert(*keywords):
127
 
128
  return f"Retrieved from {num_files}"
129
 
130
- inputs = [gr.Textbox(label=f"Enter your keywords for {k}", max_lines=2, placeholder=CAT_TO_CODEWORDS[k]) for k in CATEGORIES]
 
 
 
 
 
 
 
131
 
132
  iface = gr.Interface(
133
  fn=convert, inputs=inputs, outputs="text")
 
5
  import os
6
  import tqdm
7
  from huggingface_hub import snapshot_download
8
+ from huggingface_hub import HfApi, login
9
  import tempfile
10
  import re
11
  import pdfminer
 
13
  print("pdfminer", pdfminer.__version__)
14
  print("pandoc", pypandoc.__version__)
15
 
16
+ HF_TOKEN = os.environ.get("HF_TOKEN")
17
+
18
+ api = HfApi()
19
+ login(HF_TOKEN)
20
+
21
+
22
  #from docx import Document
23
  #document = Document()
24
  #document.add_heading('Labels for ', level=1)
25
  RESULTS_FOLDER = "./results"
26
 
27
+ DOC_FOLDER = snapshot_download("claudiag/atlas", token=HF_TOKEN, repo_type="dataset")
28
 
29
  CAT_TO_CODEWORDS = {
30
  "Prejudices": ["prejudice", "judge", "preconceive", "stigma", "assumption", "assume", "misunderstanding", "unexamined", "distorted", "clear", "compar"],
 
134
 
135
  return f"Retrieved from {num_files}"
136
 
137
+ # api.upload_file(
138
+ # path_or_fileobj="/path/to/local/folder/README.md",
139
+ # path_in_repo="README.md",
140
+ # repo_id="username/test-dataset",
141
+ # repo_type="dataset",
142
+ # )
143
+
144
+ inputs = [gr.Textbox(label=f"Enter your keywords for {k}", max_lines=2, placeholder=CAT_TO_CODEWORDS[k], value=CAT_TO_CODEWORDS[k]) for k in CATEGORIES]
145
 
146
  iface = gr.Interface(
147
  fn=convert, inputs=inputs, outputs="text")