GitHub Actions commited on
Commit
807d96f
·
1 Parent(s): e3a898b

Sync from GitHub repo

Browse files
Files changed (1) hide show
  1. app.py +128 -43
app.py CHANGED
@@ -1103,65 +1103,150 @@ def setup_periodic_tasks():
1103
  app.logger.error(f"Error uploading database to {database_repo_id}: {str(e)}")
1104
 
1105
  def sync_preferences_data():
1106
- """Zips and uploads preference data folders to HF dataset"""
1107
  with app.app_context(): # Ensure app context for logging
1108
  if not os.path.isdir(votes_dir):
1109
- # app.logger.info(f"Votes directory '{votes_dir}' not found, skipping preference sync.")
1110
  return # Don't log every 5 mins if dir doesn't exist yet
1111
 
 
 
 
 
1112
  try:
1113
  api = HfApi(token=os.getenv("HF_TOKEN"))
1114
  vote_uuids = [d for d in os.listdir(votes_dir) if os.path.isdir(os.path.join(votes_dir, d))]
1115
 
1116
  if not vote_uuids:
1117
- # app.logger.info("No new preference data to upload.")
1118
- return # Don't log every 5 mins if no new data
 
 
 
 
 
 
1119
 
1120
- uploaded_count = 0
 
 
 
1121
  for vote_uuid in vote_uuids:
1122
  dir_path = os.path.join(votes_dir, vote_uuid)
1123
- zip_base_path = os.path.join(votes_dir, vote_uuid) # Name zip file same as folder
1124
- zip_path = f"{zip_base_path}.zip"
1125
 
1126
  try:
1127
- # Create zip archive
1128
- shutil.make_archive(zip_base_path, 'zip', dir_path)
1129
- app.logger.info(f"Created zip archive: {zip_path}")
1130
-
1131
- # Upload zip file
1132
- api.upload_file(
1133
- path_or_fileobj=zip_path,
1134
- path_in_repo=f"votes/{year}/{month}/{vote_uuid}.zip",
1135
- repo_id=preferences_repo_id,
1136
- repo_type="dataset",
1137
- commit_message=f"Add preference data {vote_uuid}"
1138
- )
1139
- app.logger.info(f"Successfully uploaded {zip_path} to {preferences_repo_id}")
1140
- uploaded_count += 1
1141
-
1142
- # Cleanup local files after successful upload
1143
- try:
1144
- os.remove(zip_path)
1145
- shutil.rmtree(dir_path)
1146
- app.logger.info(f"Cleaned up local files: {zip_path} and {dir_path}")
1147
- except OSError as e:
1148
- app.logger.error(f"Error cleaning up files for {vote_uuid}: {str(e)}")
1149
-
1150
- except Exception as upload_err:
1151
- app.logger.error(f"Error processing or uploading preference data for {vote_uuid}: {str(upload_err)}")
1152
- # Optionally remove zip if it exists but upload failed
1153
- if os.path.exists(zip_path):
1154
- try:
1155
- os.remove(zip_path)
1156
- except OSError as e:
1157
- app.logger.error(f"Error removing zip file after failed upload {zip_path}: {str(e)}")
1158
- # Keep the original folder for the next attempt
1159
-
1160
- if uploaded_count > 0:
1161
- app.logger.info(f"Finished preference data sync. Uploaded {uploaded_count} new entries.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1162
 
1163
  except Exception as e:
1164
- app.logger.error(f"General error during preference data sync: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1165
 
1166
 
1167
  # Schedule periodic tasks
 
1103
  app.logger.error(f"Error uploading database to {database_repo_id}: {str(e)}")
1104
 
1105
  def sync_preferences_data():
1106
+ """Zips and uploads preference data folders in batches to HF dataset"""
1107
  with app.app_context(): # Ensure app context for logging
1108
  if not os.path.isdir(votes_dir):
 
1109
  return # Don't log every 5 mins if dir doesn't exist yet
1110
 
1111
+ temp_batch_dir = None # Initialize to manage cleanup
1112
+ temp_individual_zip_dir = None # Initialize for individual zips
1113
+ local_batch_zip_path = None # Initialize for batch zip path
1114
+
1115
  try:
1116
  api = HfApi(token=os.getenv("HF_TOKEN"))
1117
  vote_uuids = [d for d in os.listdir(votes_dir) if os.path.isdir(os.path.join(votes_dir, d))]
1118
 
1119
  if not vote_uuids:
1120
+ return # No data to process
1121
+
1122
+ app.logger.info(f"Found {len(vote_uuids)} vote directories to process.")
1123
+
1124
+ # Create temporary directories
1125
+ temp_batch_dir = tempfile.mkdtemp(prefix="hf_batch_")
1126
+ temp_individual_zip_dir = tempfile.mkdtemp(prefix="hf_indiv_zips_")
1127
+ app.logger.debug(f"Created temp directories: {temp_batch_dir}, {temp_individual_zip_dir}")
1128
 
1129
+ processed_vote_dirs = []
1130
+ individual_zips_in_batch = []
1131
+
1132
+ # 1. Create individual zips and move them to the batch directory
1133
  for vote_uuid in vote_uuids:
1134
  dir_path = os.path.join(votes_dir, vote_uuid)
1135
+ individual_zip_base_path = os.path.join(temp_individual_zip_dir, vote_uuid)
1136
+ individual_zip_path = f"{individual_zip_base_path}.zip"
1137
 
1138
  try:
1139
+ shutil.make_archive(individual_zip_base_path, 'zip', dir_path)
1140
+ app.logger.debug(f"Created individual zip: {individual_zip_path}")
1141
+
1142
+ # Move the created zip into the batch directory
1143
+ final_individual_zip_path = os.path.join(temp_batch_dir, f"{vote_uuid}.zip")
1144
+ shutil.move(individual_zip_path, final_individual_zip_path)
1145
+ app.logger.debug(f"Moved individual zip to batch dir: {final_individual_zip_path}")
1146
+
1147
+ processed_vote_dirs.append(dir_path) # Mark original dir for later cleanup
1148
+ individual_zips_in_batch.append(final_individual_zip_path)
1149
+
1150
+ except Exception as zip_err:
1151
+ app.logger.error(f"Error creating or moving zip for {vote_uuid}: {str(zip_err)}")
1152
+ # Clean up partial zip if it exists
1153
+ if os.path.exists(individual_zip_path):
1154
+ try:
1155
+ os.remove(individual_zip_path)
1156
+ except OSError:
1157
+ pass
1158
+ # Continue processing other votes
1159
+
1160
+ # Clean up the temporary dir used for creating individual zips
1161
+ shutil.rmtree(temp_individual_zip_dir)
1162
+ temp_individual_zip_dir = None # Mark as cleaned
1163
+ app.logger.debug("Cleaned up temporary individual zip directory.")
1164
+
1165
+ if not individual_zips_in_batch:
1166
+ app.logger.warning("No individual zips were successfully created for batching.")
1167
+ # Clean up batch dir if it's empty or only contains failed attempts
1168
+ if temp_batch_dir and os.path.exists(temp_batch_dir):
1169
+ shutil.rmtree(temp_batch_dir)
1170
+ temp_batch_dir = None
1171
+ return
1172
+
1173
+ # 2. Create the batch zip file
1174
+ batch_timestamp = datetime.utcnow().strftime("%Y%m%d_%H%M%S")
1175
+ batch_uuid_short = str(uuid.uuid4())[:8]
1176
+ batch_zip_filename = f"{batch_timestamp}_batch_{batch_uuid_short}.zip"
1177
+ # Create batch zip in a standard temp location first
1178
+ local_batch_zip_base = os.path.join(tempfile.gettempdir(), batch_zip_filename.replace('.zip', ''))
1179
+ local_batch_zip_path = f"{local_batch_zip_base}.zip"
1180
+
1181
+ app.logger.info(f"Creating batch zip: {local_batch_zip_path} with {len(individual_zips_in_batch)} individual zips.")
1182
+ shutil.make_archive(local_batch_zip_base, 'zip', temp_batch_dir)
1183
+ app.logger.info(f"Batch zip created successfully: {local_batch_zip_path}")
1184
+
1185
+ # 3. Upload the batch zip file
1186
+ hf_repo_path = f"votes/{year}/{month}/{batch_zip_filename}"
1187
+ app.logger.info(f"Uploading batch zip to HF Hub: {preferences_repo_id}/{hf_repo_path}")
1188
+
1189
+ api.upload_file(
1190
+ path_or_fileobj=local_batch_zip_path,
1191
+ path_in_repo=hf_repo_path,
1192
+ repo_id=preferences_repo_id,
1193
+ repo_type="dataset",
1194
+ commit_message=f"Add batch preference data {batch_zip_filename} ({len(individual_zips_in_batch)} votes)"
1195
+ )
1196
+ app.logger.info(f"Successfully uploaded batch {batch_zip_filename} to {preferences_repo_id}")
1197
+
1198
+ # 4. Cleanup after successful upload
1199
+ app.logger.info("Cleaning up local files after successful upload.")
1200
+ # Remove original vote directories that were successfully zipped and uploaded
1201
+ for dir_path in processed_vote_dirs:
1202
+ try:
1203
+ shutil.rmtree(dir_path)
1204
+ app.logger.debug(f"Removed original vote directory: {dir_path}")
1205
+ except OSError as e:
1206
+ app.logger.error(f"Error removing processed vote directory {dir_path}: {str(e)}")
1207
+
1208
+ # Remove the temporary batch directory (containing the individual zips)
1209
+ shutil.rmtree(temp_batch_dir)
1210
+ temp_batch_dir = None
1211
+ app.logger.debug("Removed temporary batch directory.")
1212
+
1213
+ # Remove the local batch zip file
1214
+ os.remove(local_batch_zip_path)
1215
+ local_batch_zip_path = None
1216
+ app.logger.debug("Removed local batch zip file.")
1217
+
1218
+ app.logger.info(f"Finished preference data sync. Uploaded batch {batch_zip_filename}.")
1219
 
1220
  except Exception as e:
1221
+ app.logger.error(f"Error during preference data batch sync: {str(e)}", exc_info=True)
1222
+ # If upload failed, the local batch zip might exist, clean it up.
1223
+ if local_batch_zip_path and os.path.exists(local_batch_zip_path):
1224
+ try:
1225
+ os.remove(local_batch_zip_path)
1226
+ app.logger.debug("Cleaned up local batch zip after failed upload.")
1227
+ except OSError as clean_err:
1228
+ app.logger.error(f"Error cleaning up batch zip after failed upload: {clean_err}")
1229
+ # Do NOT remove temp_batch_dir if it exists; its contents will be retried next time.
1230
+ # Do NOT remove original vote directories if upload failed.
1231
+
1232
+ finally:
1233
+ # Final cleanup for temporary directories in case of unexpected exits
1234
+ if temp_individual_zip_dir and os.path.exists(temp_individual_zip_dir):
1235
+ try:
1236
+ shutil.rmtree(temp_individual_zip_dir)
1237
+ except Exception as final_clean_err:
1238
+ app.logger.error(f"Error in final cleanup (indiv zips): {final_clean_err}")
1239
+ # Only clean up batch dir in finally block if it *wasn't* kept intentionally after upload failure
1240
+ if temp_batch_dir and os.path.exists(temp_batch_dir):
1241
+ # Check if an upload attempt happened and failed
1242
+ upload_failed = 'e' in locals() and isinstance(e, Exception) # Crude check if exception occurred
1243
+ if not upload_failed: # If no upload error or upload succeeded, clean up
1244
+ try:
1245
+ shutil.rmtree(temp_batch_dir)
1246
+ except Exception as final_clean_err:
1247
+ app.logger.error(f"Error in final cleanup (batch dir): {final_clean_err}")
1248
+ else:
1249
+ app.logger.warning("Keeping temporary batch directory due to upload failure for next attempt.")
1250
 
1251
 
1252
  # Schedule periodic tasks