Spaces:
Running
Running
index finding debug
Browse files- components/review_dashboard_page.py +161 -20
components/review_dashboard_page.py
CHANGED
@@ -395,9 +395,15 @@ class ReviewDashboardPage:
|
|
395 |
).count()
|
396 |
total_count += interval_count
|
397 |
|
398 |
-
# Query to get annotations
|
399 |
if not all_reviewed and first_unreviewed_tts_id:
|
400 |
-
# Load
|
|
|
|
|
|
|
|
|
|
|
|
|
401 |
initial_query = db.query(
|
402 |
Annotation,
|
403 |
TTSData.filename,
|
@@ -413,7 +419,7 @@ class ReviewDashboardPage:
|
|
413 |
)
|
414 |
).filter(
|
415 |
Annotation.annotator_id == target_annotator_obj.id,
|
416 |
-
TTSData.id >=
|
417 |
).order_by(TTSData.id).limit(INITIAL_BATCH_SIZE)
|
418 |
|
419 |
initial_results = initial_query.all()
|
@@ -465,10 +471,15 @@ class ReviewDashboardPage:
|
|
465 |
})
|
466 |
|
467 |
# Determine initial index inside the loaded batch
|
468 |
-
# - Normal case (has unreviewed): start at 0 (first unreviewed)
|
469 |
-
# - All reviewed: start at last item in the batch for browsing
|
470 |
initial_idx = 0
|
471 |
-
if items and all_reviewed:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
472 |
initial_idx = len(items) - 1
|
473 |
|
474 |
# Set initial display
|
@@ -696,19 +707,149 @@ class ReviewDashboardPage:
|
|
696 |
gr.update(value="β Reject") # Reset button text
|
697 |
)
|
698 |
|
699 |
-
def jump_by_data_id_fn(items, target_data_id, current_idx):
|
|
|
700 |
if not target_data_id:
|
701 |
-
return current_idx
|
702 |
-
|
703 |
-
|
704 |
-
|
705 |
-
|
706 |
-
|
707 |
-
|
708 |
-
|
709 |
-
|
710 |
-
#
|
711 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
712 |
|
713 |
def load_more_items_fn(items, session, current_batch_size=10):
|
714 |
"""Load more items when user needs them (pagination support)"""
|
@@ -948,8 +1089,8 @@ class ReviewDashboardPage:
|
|
948 |
# Jump button
|
949 |
self.btn_jump.click(
|
950 |
fn=jump_by_data_id_fn,
|
951 |
-
inputs=[self.items_state, self.jump_data_id_input, self.idx_state],
|
952 |
-
outputs=self.idx_state
|
953 |
).then(
|
954 |
fn=show_current_review_item_fn,
|
955 |
inputs=[self.items_state, self.idx_state, session_state],
|
|
|
395 |
).count()
|
396 |
total_count += interval_count
|
397 |
|
398 |
+
# Query to get annotations with a window around the first unreviewed item
|
399 |
if not all_reviewed and first_unreviewed_tts_id:
|
400 |
+
# Load a window around the first unreviewed TTS ID (some before, some after)
|
401 |
+
WINDOW_BEFORE = 2 # Load 2 items before the first unreviewed
|
402 |
+
WINDOW_AFTER = INITIAL_BATCH_SIZE - WINDOW_BEFORE - 1 # Rest after
|
403 |
+
|
404 |
+
# Get a range starting before the first unreviewed item
|
405 |
+
window_start_id = max(1, first_unreviewed_tts_id - WINDOW_BEFORE)
|
406 |
+
|
407 |
initial_query = db.query(
|
408 |
Annotation,
|
409 |
TTSData.filename,
|
|
|
419 |
)
|
420 |
).filter(
|
421 |
Annotation.annotator_id == target_annotator_obj.id,
|
422 |
+
TTSData.id >= window_start_id
|
423 |
).order_by(TTSData.id).limit(INITIAL_BATCH_SIZE)
|
424 |
|
425 |
initial_results = initial_query.all()
|
|
|
471 |
})
|
472 |
|
473 |
# Determine initial index inside the loaded batch
|
|
|
|
|
474 |
initial_idx = 0
|
475 |
+
if items and not all_reviewed and first_unreviewed_tts_id:
|
476 |
+
# Find the first unreviewed item within the loaded batch
|
477 |
+
for i, item in enumerate(items):
|
478 |
+
if item["tts_id"] == first_unreviewed_tts_id:
|
479 |
+
initial_idx = i
|
480 |
+
break
|
481 |
+
elif items and all_reviewed:
|
482 |
+
# All reviewed: start at last item in the batch for browsing
|
483 |
initial_idx = len(items) - 1
|
484 |
|
485 |
# Set initial display
|
|
|
707 |
gr.update(value="β Reject") # Reset button text
|
708 |
)
|
709 |
|
710 |
+
def jump_by_data_id_fn(items, target_data_id, current_idx, session):
|
711 |
+
"""Jump to a specific TTS ID by querying the database and loading a new batch around it"""
|
712 |
if not target_data_id:
|
713 |
+
return items, current_idx, ""
|
714 |
+
|
715 |
+
user_id = session.get("user_id")
|
716 |
+
username = session.get("username")
|
717 |
+
|
718 |
+
if not user_id or not username:
|
719 |
+
gr.Warning("User session not found")
|
720 |
+
return items, current_idx, ""
|
721 |
+
|
722 |
+
# Find target annotator
|
723 |
+
target_annotator = None
|
724 |
+
for annotator_name, reviewer_name in conf.REVIEW_MAPPING.items():
|
725 |
+
if reviewer_name == username:
|
726 |
+
target_annotator = annotator_name
|
727 |
+
break
|
728 |
+
|
729 |
+
if not target_annotator:
|
730 |
+
gr.Warning("Target annotator not found for user")
|
731 |
+
return items, current_idx, ""
|
732 |
+
|
733 |
+
try:
|
734 |
+
target_id = int(target_data_id)
|
735 |
+
|
736 |
+
with get_db() as db:
|
737 |
+
target_annotator_obj = db.query(Annotator).filter_by(name=target_annotator).first()
|
738 |
+
if not target_annotator_obj:
|
739 |
+
gr.Warning("Target annotator not found in database")
|
740 |
+
return items, current_idx, ""
|
741 |
+
|
742 |
+
# Get the target annotator's assigned intervals
|
743 |
+
assigned_intervals = db.query(AnnotationInterval).filter(
|
744 |
+
AnnotationInterval.annotator_id == target_annotator_obj.id
|
745 |
+
).all()
|
746 |
+
|
747 |
+
if not assigned_intervals:
|
748 |
+
gr.Warning("No assigned intervals found")
|
749 |
+
return items, current_idx, ""
|
750 |
+
|
751 |
+
# Check if the target TTS ID exists within the assigned intervals
|
752 |
+
target_annotation = None
|
753 |
+
for interval in assigned_intervals:
|
754 |
+
if interval.start_index is None or interval.end_index is None:
|
755 |
+
continue
|
756 |
+
|
757 |
+
target_annotation = db.query(Annotation).join(
|
758 |
+
TTSData, Annotation.tts_data_id == TTSData.id
|
759 |
+
).filter(
|
760 |
+
Annotation.annotator_id == target_annotator_obj.id,
|
761 |
+
TTSData.id == target_id,
|
762 |
+
TTSData.id >= interval.start_index,
|
763 |
+
TTSData.id <= interval.end_index
|
764 |
+
).first()
|
765 |
+
|
766 |
+
if target_annotation:
|
767 |
+
break
|
768 |
+
|
769 |
+
if not target_annotation:
|
770 |
+
gr.Warning(f"Data ID {target_id} not found in assigned review range")
|
771 |
+
return items, current_idx, ""
|
772 |
+
|
773 |
+
# Load a batch around the target ID
|
774 |
+
BATCH_SIZE = 10
|
775 |
+
WINDOW_BEFORE = BATCH_SIZE // 2
|
776 |
+
|
777 |
+
window_start_id = max(1, target_id - WINDOW_BEFORE)
|
778 |
+
|
779 |
+
# Query for annotations in the window
|
780 |
+
new_query = db.query(
|
781 |
+
Annotation,
|
782 |
+
TTSData.filename,
|
783 |
+
TTSData.sentence
|
784 |
+
).join(
|
785 |
+
TTSData, Annotation.tts_data_id == TTSData.id
|
786 |
+
).join(
|
787 |
+
AnnotationInterval,
|
788 |
+
and_(
|
789 |
+
AnnotationInterval.annotator_id == target_annotator_obj.id,
|
790 |
+
TTSData.id >= AnnotationInterval.start_index,
|
791 |
+
TTSData.id <= AnnotationInterval.end_index
|
792 |
+
)
|
793 |
+
).filter(
|
794 |
+
Annotation.annotator_id == target_annotator_obj.id,
|
795 |
+
TTSData.id >= window_start_id
|
796 |
+
).order_by(TTSData.id).limit(BATCH_SIZE)
|
797 |
+
|
798 |
+
results = new_query.all()
|
799 |
+
|
800 |
+
# Process new items
|
801 |
+
new_items = []
|
802 |
+
target_idx = 0
|
803 |
+
for i, (annotation, filename, sentence) in enumerate(results):
|
804 |
+
# Check if annotation is deleted
|
805 |
+
is_deleted = not annotation.annotated_sentence or annotation.annotated_sentence.strip() == ""
|
806 |
+
annotated_sentence_display = "[DELETED ANNOTATION]" if is_deleted else annotation.annotated_sentence
|
807 |
+
|
808 |
+
new_items.append({
|
809 |
+
"annotation_id": annotation.id,
|
810 |
+
"tts_id": annotation.tts_data_id,
|
811 |
+
"filename": filename,
|
812 |
+
"sentence": sentence,
|
813 |
+
"annotated_sentence": annotated_sentence_display,
|
814 |
+
"is_deleted": is_deleted,
|
815 |
+
"annotated_at": annotation.annotated_at.isoformat() if annotation.annotated_at else "",
|
816 |
+
"validation_status": "Loading...",
|
817 |
+
"validation_loaded": False
|
818 |
+
})
|
819 |
+
|
820 |
+
# Find the target index within the new batch
|
821 |
+
if annotation.tts_data_id == target_id:
|
822 |
+
target_idx = i
|
823 |
+
|
824 |
+
if new_items:
|
825 |
+
# Count total for review info
|
826 |
+
total_count = 0
|
827 |
+
for interval in assigned_intervals:
|
828 |
+
if interval.start_index is None or interval.end_index is None:
|
829 |
+
continue
|
830 |
+
interval_count = db.query(Annotation).join(
|
831 |
+
TTSData, Annotation.tts_data_id == TTSData.id
|
832 |
+
).filter(
|
833 |
+
Annotation.annotator_id == target_annotator_obj.id,
|
834 |
+
TTSData.id >= interval.start_index,
|
835 |
+
TTSData.id <= interval.end_index
|
836 |
+
).count()
|
837 |
+
total_count += interval_count
|
838 |
+
|
839 |
+
review_info = f"π **Phase 2 Review Mode** - Jumped to ID {target_id}. Loaded {len(new_items)} of {total_count} total items."
|
840 |
+
log.info(f"Successfully jumped to TTS ID {target_id}, loaded {len(new_items)} items, target at index {target_idx}")
|
841 |
+
return new_items, target_idx, review_info
|
842 |
+
else:
|
843 |
+
gr.Warning(f"No items loaded around ID {target_id}")
|
844 |
+
return items, current_idx, ""
|
845 |
+
|
846 |
+
except ValueError:
|
847 |
+
gr.Warning(f"Invalid Data ID format: {target_data_id}")
|
848 |
+
return items, current_idx, ""
|
849 |
+
except Exception as e:
|
850 |
+
log.error(f"Error jumping to ID {target_data_id}: {e}")
|
851 |
+
gr.Warning(f"Error jumping to ID {target_data_id}")
|
852 |
+
return items, current_idx, ""
|
853 |
|
854 |
def load_more_items_fn(items, session, current_batch_size=10):
|
855 |
"""Load more items when user needs them (pagination support)"""
|
|
|
1089 |
# Jump button
|
1090 |
self.btn_jump.click(
|
1091 |
fn=jump_by_data_id_fn,
|
1092 |
+
inputs=[self.items_state, self.jump_data_id_input, self.idx_state, session_state],
|
1093 |
+
outputs=[self.items_state, self.idx_state, self.review_info]
|
1094 |
).then(
|
1095 |
fn=show_current_review_item_fn,
|
1096 |
inputs=[self.items_state, self.idx_state, session_state],
|