Update app.py
Browse files
    	
        app.py
    CHANGED
    
    | 
         @@ -59,13 +59,21 @@ def evaluate_model_with_insights(model_name): 
     | 
|
| 59 | 
         
             
                for dataset_name, dataset in datasets.items():
         
     | 
| 60 | 
         
             
                    all_mrr, all_map, all_ndcg = [], [], []
         
     | 
| 61 | 
         
             
                    dataset_samples = []
         
     | 
| 62 | 
         
            -
             
     | 
| 63 | 
         
             
                    if 'candidate_document' in dataset.column_names:
         
     | 
| 64 | 
         
             
                        grouped_data = dataset.to_pandas().groupby("query")
         
     | 
| 65 | 
         
             
                        for query, group in grouped_data:
         
     | 
| 66 | 
         
            -
                             
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 67 | 
         
             
                            relevance_labels = group['relevance_label'].tolist()
         
     | 
| 68 | 
         
            -
             
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 69 | 
         
             
                            scores = model.predict(pairs)
         
     | 
| 70 | 
         | 
| 71 | 
         
             
                            # Collecting top-5 results for display
         
     | 
| 
         @@ -83,8 +91,21 @@ def evaluate_model_with_insights(model_name): 
     | 
|
| 83 | 
         
             
                    else:
         
     | 
| 84 | 
         
             
                        for entry in dataset:
         
     | 
| 85 | 
         
             
                            query = entry['query']
         
     | 
| 86 | 
         
            -
             
     | 
| 87 | 
         
            -
                             
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 88 | 
         
             
                            pairs = [(query, doc) for doc in candidate_texts]
         
     | 
| 89 | 
         
             
                            scores = model.predict(pairs)
         
     | 
| 90 | 
         | 
| 
         @@ -100,6 +121,27 @@ def evaluate_model_with_insights(model_name): 
     | 
|
| 100 | 
         
             
                            all_mrr.append(mean_reciprocal_rank(relevance_labels, scores))
         
     | 
| 101 | 
         
             
                            all_map.append(mean_average_precision(relevance_labels, scores))
         
     | 
| 102 | 
         
             
                            all_ndcg.append(ndcg_at_k(relevance_labels, scores, k=10))
         
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 103 | 
         | 
| 104 | 
         
             
                    # Metrics for this dataset
         
     | 
| 105 | 
         
             
                    results.append({
         
     | 
| 
         @@ -155,4 +197,4 @@ interface = gr.Interface( 
     | 
|
| 155 | 
         
             
                )
         
     | 
| 156 | 
         
             
            )
         
     | 
| 157 | 
         | 
| 158 | 
         
            -
            interface.launch(debug=True)
         
     | 
| 
         | 
|
| 59 | 
         
             
                for dataset_name, dataset in datasets.items():
         
     | 
| 60 | 
         
             
                    all_mrr, all_map, all_ndcg = [], [], []
         
     | 
| 61 | 
         
             
                    dataset_samples = []
         
     | 
| 
         | 
|
| 62 | 
         
             
                    if 'candidate_document' in dataset.column_names:
         
     | 
| 63 | 
         
             
                        grouped_data = dataset.to_pandas().groupby("query")
         
     | 
| 64 | 
         
             
                        for query, group in grouped_data:
         
     | 
| 65 | 
         
            +
                            # Skip invalid queries
         
     | 
| 66 | 
         
            +
                            if query is None or not isinstance(query, str) or query.strip() == "":
         
     | 
| 67 | 
         
            +
                                continue
         
     | 
| 68 | 
         
            +
             
     | 
| 69 | 
         
            +
                            candidate_texts = group['candidate_document'].dropna().tolist()
         
     | 
| 70 | 
         
             
                            relevance_labels = group['relevance_label'].tolist()
         
     | 
| 71 | 
         
            +
             
     | 
| 72 | 
         
            +
                            # Skip if no valid candidate documents
         
     | 
| 73 | 
         
            +
                            if not candidate_texts or len(candidate_texts) != len(relevance_labels):
         
     | 
| 74 | 
         
            +
                                continue
         
     | 
| 75 | 
         
            +
             
     | 
| 76 | 
         
            +
                            pairs = [(query, doc) for doc in candidate_texts if doc is not None and isinstance(doc, str) and doc.strip() != ""]
         
     | 
| 77 | 
         
             
                            scores = model.predict(pairs)
         
     | 
| 78 | 
         | 
| 79 | 
         
             
                            # Collecting top-5 results for display
         
     | 
| 
         | 
|
| 91 | 
         
             
                    else:
         
     | 
| 92 | 
         
             
                        for entry in dataset:
         
     | 
| 93 | 
         
             
                            query = entry['query']
         
     | 
| 94 | 
         
            +
             
     | 
| 95 | 
         
            +
                            # Validate query and documents
         
     | 
| 96 | 
         
            +
                            if query is None or not isinstance(query, str) or query.strip() == "":
         
     | 
| 97 | 
         
            +
                                continue
         
     | 
| 98 | 
         
            +
             
     | 
| 99 | 
         
            +
                            candidate_texts = [
         
     | 
| 100 | 
         
            +
                                doc for doc in [entry.get('positive'), entry.get('negative1'), entry.get('negative2'), entry.get('negative3'), entry.get('negative4')]
         
     | 
| 101 | 
         
            +
                                if doc is not None and isinstance(doc, str) and doc.strip() != ""
         
     | 
| 102 | 
         
            +
                            ]
         
     | 
| 103 | 
         
            +
                            relevance_labels = [1] + [0] * (len(candidate_texts) - 1)
         
     | 
| 104 | 
         
            +
             
     | 
| 105 | 
         
            +
                            # Skip if no valid candidate documents
         
     | 
| 106 | 
         
            +
                            if not candidate_texts or len(candidate_texts) != len(relevance_labels):
         
     | 
| 107 | 
         
            +
                                continue
         
     | 
| 108 | 
         
            +
             
     | 
| 109 | 
         
             
                            pairs = [(query, doc) for doc in candidate_texts]
         
     | 
| 110 | 
         
             
                            scores = model.predict(pairs)
         
     | 
| 111 | 
         | 
| 
         | 
|
| 121 | 
         
             
                            all_mrr.append(mean_reciprocal_rank(relevance_labels, scores))
         
     | 
| 122 | 
         
             
                            all_map.append(mean_average_precision(relevance_labels, scores))
         
     | 
| 123 | 
         
             
                            all_ndcg.append(ndcg_at_k(relevance_labels, scores, k=10))
         
     | 
| 124 | 
         
            +
             
     | 
| 125 | 
         
            +
                        else:
         
     | 
| 126 | 
         
            +
                            for entry in dataset:
         
     | 
| 127 | 
         
            +
                                query = entry['query']
         
     | 
| 128 | 
         
            +
                                candidate_texts = [entry['positive'], entry['negative1'], entry['negative2'], entry['negative3'], entry['negative4']]
         
     | 
| 129 | 
         
            +
                                relevance_labels = [1, 0, 0, 0, 0]
         
     | 
| 130 | 
         
            +
                                pairs = [(query, doc) for doc in candidate_texts]
         
     | 
| 131 | 
         
            +
                                scores = model.predict(pairs)
         
     | 
| 132 | 
         
            +
             
     | 
| 133 | 
         
            +
                                # Collecting top-5 results for display
         
     | 
| 134 | 
         
            +
                                sorted_indices = np.argsort(scores)[::-1]
         
     | 
| 135 | 
         
            +
                                top_docs = [(candidate_texts[i], scores[i], relevance_labels[i]) for i in sorted_indices[:5]]
         
     | 
| 136 | 
         
            +
                                dataset_samples.append({
         
     | 
| 137 | 
         
            +
                                    "Query": query,
         
     | 
| 138 | 
         
            +
                                    "Top 5 Candidates": top_docs
         
     | 
| 139 | 
         
            +
                                })
         
     | 
| 140 | 
         
            +
             
     | 
| 141 | 
         
            +
                                # Metrics
         
     | 
| 142 | 
         
            +
                                all_mrr.append(mean_reciprocal_rank(relevance_labels, scores))
         
     | 
| 143 | 
         
            +
                                all_map.append(mean_average_precision(relevance_labels, scores))
         
     | 
| 144 | 
         
            +
                                all_ndcg.append(ndcg_at_k(relevance_labels, scores, k=10))
         
     | 
| 145 | 
         | 
| 146 | 
         
             
                    # Metrics for this dataset
         
     | 
| 147 | 
         
             
                    results.append({
         
     | 
| 
         | 
|
| 197 | 
         
             
                )
         
     | 
| 198 | 
         
             
            )
         
     | 
| 199 | 
         | 
| 200 | 
         
            +
            interface.launch(debug=True)
         
     |