Spaces:

navidved
/

tts_labeling

Running

App Files Files Community

vargha commited on Aug 10

Commit

8dd3ae7

1 Parent(s): bdb3c28

index finding debug

Browse files

Files changed (1) hide show

components/review_dashboard_page.py +161 -20

components/review_dashboard_page.py CHANGED Viewed

@@ -395,9 +395,15 @@ class ReviewDashboardPage:
                     ).count()
                     total_count += interval_count
-                # Query to get annotations starting from the first unreviewed item
                 if not all_reviewed and first_unreviewed_tts_id:
-                    # Load from first unreviewed TTS ID
                     initial_query = db.query(
                         Annotation,
                         TTSData.filename,
@@ -413,7 +419,7 @@ class ReviewDashboardPage:
                         )
                     ).filter(
                         Annotation.annotator_id == target_annotator_obj.id,
-                        TTSData.id >= first_unreviewed_tts_id
                     ).order_by(TTSData.id).limit(INITIAL_BATCH_SIZE)
                     initial_results = initial_query.all()
@@ -465,10 +471,15 @@ class ReviewDashboardPage:
                     })
                 # Determine initial index inside the loaded batch
-                # - Normal case (has unreviewed): start at 0 (first unreviewed)
-                # - All reviewed: start at last item in the batch for browsing
                 initial_idx = 0
-                if items and all_reviewed:
                     initial_idx = len(items) - 1
                 # Set initial display
@@ -696,19 +707,149 @@ class ReviewDashboardPage:
                         gr.update(value="❌ Reject")  # Reset button text
                     )
-        def jump_by_data_id_fn(items, target_data_id, current_idx):
             if not target_data_id:
-                return current_idx
-            # try:
-            target_id = int(target_data_id)
-            for i, item in enumerate(items):
-                if item["tts_id"] == target_id:
-                    return i
-            gr.Warning(f"Data ID {target_id} not found in review items")
-            # except ValueError:
-            #     sentry_sdk.capture_exception()
-            #     gr.Warning(f"Invalid Data ID format: {target_data_id}")
-            return current_idx
         def load_more_items_fn(items, session, current_batch_size=10):
             """Load more items when user needs them (pagination support)"""
@@ -948,8 +1089,8 @@ class ReviewDashboardPage:
         # Jump button
         self.btn_jump.click(
             fn=jump_by_data_id_fn,
-            inputs=[self.items_state, self.jump_data_id_input, self.idx_state],
-            outputs=self.idx_state
         ).then(
             fn=show_current_review_item_fn,
             inputs=[self.items_state, self.idx_state, session_state],

                     ).count()
                     total_count += interval_count
+                # Query to get annotations with a window around the first unreviewed item
                 if not all_reviewed and first_unreviewed_tts_id:
+                    # Load a window around the first unreviewed TTS ID (some before, some after)
+                    WINDOW_BEFORE = 2  # Load 2 items before the first unreviewed
+                    WINDOW_AFTER = INITIAL_BATCH_SIZE - WINDOW_BEFORE - 1  # Rest after
+                    # Get a range starting before the first unreviewed item
+                    window_start_id = max(1, first_unreviewed_tts_id - WINDOW_BEFORE)
                     initial_query = db.query(
                         Annotation,
                         TTSData.filename,
                         )
                     ).filter(
                         Annotation.annotator_id == target_annotator_obj.id,
+                        TTSData.id >= window_start_id
                     ).order_by(TTSData.id).limit(INITIAL_BATCH_SIZE)
                     initial_results = initial_query.all()
                     })
                 # Determine initial index inside the loaded batch
                 initial_idx = 0
+                if items and not all_reviewed and first_unreviewed_tts_id:
+                    # Find the first unreviewed item within the loaded batch
+                    for i, item in enumerate(items):
+                        if item["tts_id"] == first_unreviewed_tts_id:
+                            initial_idx = i
+                            break
+                elif items and all_reviewed:
+                    # All reviewed: start at last item in the batch for browsing
                     initial_idx = len(items) - 1
                 # Set initial display
                         gr.update(value="❌ Reject")  # Reset button text
                     )
+        def jump_by_data_id_fn(items, target_data_id, current_idx, session):
+            """Jump to a specific TTS ID by querying the database and loading a new batch around it"""
             if not target_data_id:
+                return items, current_idx, ""
+            user_id = session.get("user_id")
+            username = session.get("username")
+            if not user_id or not username:
+                gr.Warning("User session not found")
+                return items, current_idx, ""
+            # Find target annotator
+            target_annotator = None
+            for annotator_name, reviewer_name in conf.REVIEW_MAPPING.items():
+                if reviewer_name == username:
+                    target_annotator = annotator_name
+                    break
+            if not target_annotator:
+                gr.Warning("Target annotator not found for user")
+                return items, current_idx, ""
+            try:
+                target_id = int(target_data_id)
+                with get_db() as db:
+                    target_annotator_obj = db.query(Annotator).filter_by(name=target_annotator).first()
+                    if not target_annotator_obj:
+                        gr.Warning("Target annotator not found in database")
+                        return items, current_idx, ""
+                    # Get the target annotator's assigned intervals
+                    assigned_intervals = db.query(AnnotationInterval).filter(
+                        AnnotationInterval.annotator_id == target_annotator_obj.id
+                    ).all()
+                    if not assigned_intervals:
+                        gr.Warning("No assigned intervals found")
+                        return items, current_idx, ""
+                    # Check if the target TTS ID exists within the assigned intervals
+                    target_annotation = None
+                    for interval in assigned_intervals:
+                        if interval.start_index is None or interval.end_index is None:
+                            continue
+                        target_annotation = db.query(Annotation).join(
+                            TTSData, Annotation.tts_data_id == TTSData.id
+                        ).filter(
+                            Annotation.annotator_id == target_annotator_obj.id,
+                            TTSData.id == target_id,
+                            TTSData.id >= interval.start_index,
+                            TTSData.id <= interval.end_index
+                        ).first()
+                        if target_annotation:
+                            break
+                    if not target_annotation:
+                        gr.Warning(f"Data ID {target_id} not found in assigned review range")
+                        return items, current_idx, ""
+                    # Load a batch around the target ID
+                    BATCH_SIZE = 10
+                    WINDOW_BEFORE = BATCH_SIZE // 2
+                    window_start_id = max(1, target_id - WINDOW_BEFORE)
+                    # Query for annotations in the window
+                    new_query = db.query(
+                        Annotation,
+                        TTSData.filename,
+                        TTSData.sentence
+                    ).join(
+                        TTSData, Annotation.tts_data_id == TTSData.id
+                    ).join(
+                        AnnotationInterval,
+                        and_(
+                            AnnotationInterval.annotator_id == target_annotator_obj.id,
+                            TTSData.id >= AnnotationInterval.start_index,
+                            TTSData.id <= AnnotationInterval.end_index
+                        )
+                    ).filter(
+                        Annotation.annotator_id == target_annotator_obj.id,
+                        TTSData.id >= window_start_id
+                    ).order_by(TTSData.id).limit(BATCH_SIZE)
+                    results = new_query.all()
+                    # Process new items
+                    new_items = []
+                    target_idx = 0
+                    for i, (annotation, filename, sentence) in enumerate(results):
+                        # Check if annotation is deleted
+                        is_deleted = not annotation.annotated_sentence or annotation.annotated_sentence.strip() == ""
+                        annotated_sentence_display = "[DELETED ANNOTATION]" if is_deleted else annotation.annotated_sentence
+                        new_items.append({
+                            "annotation_id": annotation.id,
+                            "tts_id": annotation.tts_data_id,
+                            "filename": filename,
+                            "sentence": sentence,
+                            "annotated_sentence": annotated_sentence_display,
+                            "is_deleted": is_deleted,
+                            "annotated_at": annotation.annotated_at.isoformat() if annotation.annotated_at else "",
+                            "validation_status": "Loading...",
+                            "validation_loaded": False
+                        })
+                        # Find the target index within the new batch
+                        if annotation.tts_data_id == target_id:
+                            target_idx = i
+                    if new_items:
+                        # Count total for review info
+                        total_count = 0
+                        for interval in assigned_intervals:
+                            if interval.start_index is None or interval.end_index is None:
+                                continue
+                            interval_count = db.query(Annotation).join(
+                                TTSData, Annotation.tts_data_id == TTSData.id
+                            ).filter(
+                                Annotation.annotator_id == target_annotator_obj.id,
+                                TTSData.id >= interval.start_index,
+                                TTSData.id <= interval.end_index
+                            ).count()
+                            total_count += interval_count
+                        review_info = f"🔍 **Phase 2 Review Mode** - Jumped to ID {target_id}. Loaded {len(new_items)} of {total_count} total items."
+                        log.info(f"Successfully jumped to TTS ID {target_id}, loaded {len(new_items)} items, target at index {target_idx}")
+                        return new_items, target_idx, review_info
+                    else:
+                        gr.Warning(f"No items loaded around ID {target_id}")
+                        return items, current_idx, ""
+            except ValueError:
+                gr.Warning(f"Invalid Data ID format: {target_data_id}")
+                return items, current_idx, ""
+            except Exception as e:
+                log.error(f"Error jumping to ID {target_data_id}: {e}")
+                gr.Warning(f"Error jumping to ID {target_data_id}")
+                return items, current_idx, ""
         def load_more_items_fn(items, session, current_batch_size=10):
             """Load more items when user needs them (pagination support)"""
         # Jump button
         self.btn_jump.click(
             fn=jump_by_data_id_fn,
+            inputs=[self.items_state, self.jump_data_id_input, self.idx_state, session_state],
+            outputs=[self.items_state, self.idx_state, self.review_info]
         ).then(
             fn=show_current_review_item_fn,
             inputs=[self.items_state, self.idx_state, session_state],