vargha commited on
Commit
8dd3ae7
Β·
1 Parent(s): bdb3c28

index finding debug

Browse files
Files changed (1) hide show
  1. components/review_dashboard_page.py +161 -20
components/review_dashboard_page.py CHANGED
@@ -395,9 +395,15 @@ class ReviewDashboardPage:
395
  ).count()
396
  total_count += interval_count
397
 
398
- # Query to get annotations starting from the first unreviewed item
399
  if not all_reviewed and first_unreviewed_tts_id:
400
- # Load from first unreviewed TTS ID
 
 
 
 
 
 
401
  initial_query = db.query(
402
  Annotation,
403
  TTSData.filename,
@@ -413,7 +419,7 @@ class ReviewDashboardPage:
413
  )
414
  ).filter(
415
  Annotation.annotator_id == target_annotator_obj.id,
416
- TTSData.id >= first_unreviewed_tts_id
417
  ).order_by(TTSData.id).limit(INITIAL_BATCH_SIZE)
418
 
419
  initial_results = initial_query.all()
@@ -465,10 +471,15 @@ class ReviewDashboardPage:
465
  })
466
 
467
  # Determine initial index inside the loaded batch
468
- # - Normal case (has unreviewed): start at 0 (first unreviewed)
469
- # - All reviewed: start at last item in the batch for browsing
470
  initial_idx = 0
471
- if items and all_reviewed:
 
 
 
 
 
 
 
472
  initial_idx = len(items) - 1
473
 
474
  # Set initial display
@@ -696,19 +707,149 @@ class ReviewDashboardPage:
696
  gr.update(value="❌ Reject") # Reset button text
697
  )
698
 
699
- def jump_by_data_id_fn(items, target_data_id, current_idx):
 
700
  if not target_data_id:
701
- return current_idx
702
- # try:
703
- target_id = int(target_data_id)
704
- for i, item in enumerate(items):
705
- if item["tts_id"] == target_id:
706
- return i
707
- gr.Warning(f"Data ID {target_id} not found in review items")
708
- # except ValueError:
709
- # sentry_sdk.capture_exception()
710
- # gr.Warning(f"Invalid Data ID format: {target_data_id}")
711
- return current_idx
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
712
 
713
  def load_more_items_fn(items, session, current_batch_size=10):
714
  """Load more items when user needs them (pagination support)"""
@@ -948,8 +1089,8 @@ class ReviewDashboardPage:
948
  # Jump button
949
  self.btn_jump.click(
950
  fn=jump_by_data_id_fn,
951
- inputs=[self.items_state, self.jump_data_id_input, self.idx_state],
952
- outputs=self.idx_state
953
  ).then(
954
  fn=show_current_review_item_fn,
955
  inputs=[self.items_state, self.idx_state, session_state],
 
395
  ).count()
396
  total_count += interval_count
397
 
398
+ # Query to get annotations with a window around the first unreviewed item
399
  if not all_reviewed and first_unreviewed_tts_id:
400
+ # Load a window around the first unreviewed TTS ID (some before, some after)
401
+ WINDOW_BEFORE = 2 # Load 2 items before the first unreviewed
402
+ WINDOW_AFTER = INITIAL_BATCH_SIZE - WINDOW_BEFORE - 1 # Rest after
403
+
404
+ # Get a range starting before the first unreviewed item
405
+ window_start_id = max(1, first_unreviewed_tts_id - WINDOW_BEFORE)
406
+
407
  initial_query = db.query(
408
  Annotation,
409
  TTSData.filename,
 
419
  )
420
  ).filter(
421
  Annotation.annotator_id == target_annotator_obj.id,
422
+ TTSData.id >= window_start_id
423
  ).order_by(TTSData.id).limit(INITIAL_BATCH_SIZE)
424
 
425
  initial_results = initial_query.all()
 
471
  })
472
 
473
  # Determine initial index inside the loaded batch
 
 
474
  initial_idx = 0
475
+ if items and not all_reviewed and first_unreviewed_tts_id:
476
+ # Find the first unreviewed item within the loaded batch
477
+ for i, item in enumerate(items):
478
+ if item["tts_id"] == first_unreviewed_tts_id:
479
+ initial_idx = i
480
+ break
481
+ elif items and all_reviewed:
482
+ # All reviewed: start at last item in the batch for browsing
483
  initial_idx = len(items) - 1
484
 
485
  # Set initial display
 
707
  gr.update(value="❌ Reject") # Reset button text
708
  )
709
 
710
+ def jump_by_data_id_fn(items, target_data_id, current_idx, session):
711
+ """Jump to a specific TTS ID by querying the database and loading a new batch around it"""
712
  if not target_data_id:
713
+ return items, current_idx, ""
714
+
715
+ user_id = session.get("user_id")
716
+ username = session.get("username")
717
+
718
+ if not user_id or not username:
719
+ gr.Warning("User session not found")
720
+ return items, current_idx, ""
721
+
722
+ # Find target annotator
723
+ target_annotator = None
724
+ for annotator_name, reviewer_name in conf.REVIEW_MAPPING.items():
725
+ if reviewer_name == username:
726
+ target_annotator = annotator_name
727
+ break
728
+
729
+ if not target_annotator:
730
+ gr.Warning("Target annotator not found for user")
731
+ return items, current_idx, ""
732
+
733
+ try:
734
+ target_id = int(target_data_id)
735
+
736
+ with get_db() as db:
737
+ target_annotator_obj = db.query(Annotator).filter_by(name=target_annotator).first()
738
+ if not target_annotator_obj:
739
+ gr.Warning("Target annotator not found in database")
740
+ return items, current_idx, ""
741
+
742
+ # Get the target annotator's assigned intervals
743
+ assigned_intervals = db.query(AnnotationInterval).filter(
744
+ AnnotationInterval.annotator_id == target_annotator_obj.id
745
+ ).all()
746
+
747
+ if not assigned_intervals:
748
+ gr.Warning("No assigned intervals found")
749
+ return items, current_idx, ""
750
+
751
+ # Check if the target TTS ID exists within the assigned intervals
752
+ target_annotation = None
753
+ for interval in assigned_intervals:
754
+ if interval.start_index is None or interval.end_index is None:
755
+ continue
756
+
757
+ target_annotation = db.query(Annotation).join(
758
+ TTSData, Annotation.tts_data_id == TTSData.id
759
+ ).filter(
760
+ Annotation.annotator_id == target_annotator_obj.id,
761
+ TTSData.id == target_id,
762
+ TTSData.id >= interval.start_index,
763
+ TTSData.id <= interval.end_index
764
+ ).first()
765
+
766
+ if target_annotation:
767
+ break
768
+
769
+ if not target_annotation:
770
+ gr.Warning(f"Data ID {target_id} not found in assigned review range")
771
+ return items, current_idx, ""
772
+
773
+ # Load a batch around the target ID
774
+ BATCH_SIZE = 10
775
+ WINDOW_BEFORE = BATCH_SIZE // 2
776
+
777
+ window_start_id = max(1, target_id - WINDOW_BEFORE)
778
+
779
+ # Query for annotations in the window
780
+ new_query = db.query(
781
+ Annotation,
782
+ TTSData.filename,
783
+ TTSData.sentence
784
+ ).join(
785
+ TTSData, Annotation.tts_data_id == TTSData.id
786
+ ).join(
787
+ AnnotationInterval,
788
+ and_(
789
+ AnnotationInterval.annotator_id == target_annotator_obj.id,
790
+ TTSData.id >= AnnotationInterval.start_index,
791
+ TTSData.id <= AnnotationInterval.end_index
792
+ )
793
+ ).filter(
794
+ Annotation.annotator_id == target_annotator_obj.id,
795
+ TTSData.id >= window_start_id
796
+ ).order_by(TTSData.id).limit(BATCH_SIZE)
797
+
798
+ results = new_query.all()
799
+
800
+ # Process new items
801
+ new_items = []
802
+ target_idx = 0
803
+ for i, (annotation, filename, sentence) in enumerate(results):
804
+ # Check if annotation is deleted
805
+ is_deleted = not annotation.annotated_sentence or annotation.annotated_sentence.strip() == ""
806
+ annotated_sentence_display = "[DELETED ANNOTATION]" if is_deleted else annotation.annotated_sentence
807
+
808
+ new_items.append({
809
+ "annotation_id": annotation.id,
810
+ "tts_id": annotation.tts_data_id,
811
+ "filename": filename,
812
+ "sentence": sentence,
813
+ "annotated_sentence": annotated_sentence_display,
814
+ "is_deleted": is_deleted,
815
+ "annotated_at": annotation.annotated_at.isoformat() if annotation.annotated_at else "",
816
+ "validation_status": "Loading...",
817
+ "validation_loaded": False
818
+ })
819
+
820
+ # Find the target index within the new batch
821
+ if annotation.tts_data_id == target_id:
822
+ target_idx = i
823
+
824
+ if new_items:
825
+ # Count total for review info
826
+ total_count = 0
827
+ for interval in assigned_intervals:
828
+ if interval.start_index is None or interval.end_index is None:
829
+ continue
830
+ interval_count = db.query(Annotation).join(
831
+ TTSData, Annotation.tts_data_id == TTSData.id
832
+ ).filter(
833
+ Annotation.annotator_id == target_annotator_obj.id,
834
+ TTSData.id >= interval.start_index,
835
+ TTSData.id <= interval.end_index
836
+ ).count()
837
+ total_count += interval_count
838
+
839
+ review_info = f"πŸ” **Phase 2 Review Mode** - Jumped to ID {target_id}. Loaded {len(new_items)} of {total_count} total items."
840
+ log.info(f"Successfully jumped to TTS ID {target_id}, loaded {len(new_items)} items, target at index {target_idx}")
841
+ return new_items, target_idx, review_info
842
+ else:
843
+ gr.Warning(f"No items loaded around ID {target_id}")
844
+ return items, current_idx, ""
845
+
846
+ except ValueError:
847
+ gr.Warning(f"Invalid Data ID format: {target_data_id}")
848
+ return items, current_idx, ""
849
+ except Exception as e:
850
+ log.error(f"Error jumping to ID {target_data_id}: {e}")
851
+ gr.Warning(f"Error jumping to ID {target_data_id}")
852
+ return items, current_idx, ""
853
 
854
  def load_more_items_fn(items, session, current_batch_size=10):
855
  """Load more items when user needs them (pagination support)"""
 
1089
  # Jump button
1090
  self.btn_jump.click(
1091
  fn=jump_by_data_id_fn,
1092
+ inputs=[self.items_state, self.jump_data_id_input, self.idx_state, session_state],
1093
+ outputs=[self.items_state, self.idx_state, self.review_info]
1094
  ).then(
1095
  fn=show_current_review_item_fn,
1096
  inputs=[self.items_state, self.idx_state, session_state],