texmetrics-regex-checks-gradio-1-devtesting

Sleeping

App Files Files Community

samyak152002 commited on 11 days ago

Commit

f9e77fb

verified ·

1 Parent(s): fee8cba

Update main_analyzer.py (#1)

Browse files

- Update main_analyzer.py (21e59ca78f93aa1af8acd53cd3e78c131cc7bb50)

Files changed (1) hide show

main_analyzer.py +10 -16

main_analyzer.py CHANGED Viewed

@@ -90,7 +90,7 @@ def analyze_pdf(filepath_or_stream: Any) -> Tuple[Dict[str, Any], None]:
                     print(f"Analyzer: Mapping {len(detailed_issues_for_mapping)} issues to PDF coordinates...")
                     # ... (rest of mapping logic as before) ...
                     for page_idx in range(doc_for_mapping.page_count):
-                        page = doc_for_mapping[page_idx]
                         current_page_num_1_based = page_idx + 1
                         unmapped_issues_on_this_page_by_context = defaultdict(list)
                         for issue_dict in detailed_issues_for_mapping:
@@ -104,9 +104,14 @@ def analyze_pdf(filepath_or_stream: Any) -> Tuple[Dict[str, Any], None]:
                         for ctx_str, issues_for_ctx in unmapped_issues_on_this_page_by_context.items():
                             if not ctx_str or not ctx_str.strip(): continue
                             try:
-                                pdf_rects = page.search_for(ctx_str, flags=fitz.TEXT_PRESERVE_LIGATURES | fitz.TEXT_PRESERVE_WHITESPACE)
-                                if pdf_rects:
-                                    try_map_issues_to_page_rects(issues_for_ctx, pdf_rects, current_page_num_1_based)
                             except Exception as search_exc:
                                 print(f"Analyzer: Warning: Error searching for context '{ctx_str[:30].replace(chr(10),' ')}' on page {current_page_num_1_based}: {search_exc}")
                     total_mapped = sum(1 for iss in detailed_issues_for_mapping if iss['is_mapped_to_pdf'])
@@ -153,15 +158,4 @@ def analyze_pdf(filepath_or_stream: Any) -> Tuple[Dict[str, Any], None]:
         # itself if it received a stream; this isn't happening in the Gradio flow.
         if doc_for_mapping: # Ensure the fitz document for mapping is closed
             doc_for_mapping.close()
-            print(f"Analyzer: Closed fitz document used for mapping.")
-        # The original finally block for temp_file_for_stream_path:
-        # if temp_file_for_stream_path and os.path.exists(temp_file_for_stream_path):
-        #     try:
-        #         os.remove(temp_file_for_stream_path)
-        #         print(f"Analyzer: Cleaned up main temporary PDF file: {temp_file_for_stream_path}")
-        #     except Exception as e_clean:
-        #         print(f"Analyzer: Error cleaning up main temporary PDF file {temp_file_for_stream_path}: {e_clean}")
-        # This part is removed because temp_file_for_stream_path is never assigned a value
-        # in the current structure of analyze_pdf. If analyze_pdf were to handle streams
-        # by creating its own temp file, then this cleanup would be relevant for that temp file.

                     print(f"Analyzer: Mapping {len(detailed_issues_for_mapping)} issues to PDF coordinates...")
                     # ... (rest of mapping logic as before) ...
                     for page_idx in range(doc_for_mapping.page_count):
+                        page = doc_for_mapping[page_idx] # Current PyMuPDF page object
                         current_page_num_1_based = page_idx + 1
                         unmapped_issues_on_this_page_by_context = defaultdict(list)
                         for issue_dict in detailed_issues_for_mapping:
                         for ctx_str, issues_for_ctx in unmapped_issues_on_this_page_by_context.items():
                             if not ctx_str or not ctx_str.strip(): continue
                             try:
+                                pdf_rects_for_context_occurrences = page.search_for(ctx_str, flags=fitz.TEXT_PRESERVE_LIGATURES | fitz.TEXT_PRESERVE_WHITESPACE)
+                                if pdf_rects_for_context_occurrences:
+                                    try_map_issues_to_page_rects(
+                                        issues_for_ctx,
+                                        pdf_rects_for_context_occurrences,
+                                        current_page_num_1_based,
+                                        page # Pass the current page object
+                                    )
                             except Exception as search_exc:
                                 print(f"Analyzer: Warning: Error searching for context '{ctx_str[:30].replace(chr(10),' ')}' on page {current_page_num_1_based}: {search_exc}")
                     total_mapped = sum(1 for iss in detailed_issues_for_mapping if iss['is_mapped_to_pdf'])
         # itself if it received a stream; this isn't happening in the Gradio flow.
         if doc_for_mapping: # Ensure the fitz document for mapping is closed
             doc_for_mapping.close()
+            print(f"Analyzer: Closed fitz document used for mapping.")