Spaces:

linguabot
/

quality-lens

Sleeping

App Files Files Community

Tristan Yu commited on Jul 3

Commit

cf3775c

verified ·

1 Parent(s): e84ef07

Upload 7 files

Browse files

Files changed (8) hide show

.gitattributes +1 -0
Dockerfile +33 -0
app.py +320 -0
blaser_sonar_space.py +149 -0
requirements.txt +8 -0
static/favicon 2.ico +0 -0
static/icon.ico +3 -0
templates/index.html +320 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+static/icon.ico filter=lfs diff=lfs merge=lfs -text

Dockerfile ADDED Viewed

	@@ -0,0 +1,33 @@

+FROM python:3.9-slim
+# Create a non-root user
+RUN useradd -m -u 1000 user
+USER user
+ENV HOME=/home/user \
+    PATH=/home/user/.local/bin:$PATH
+WORKDIR $HOME/app
+# Install system dependencies
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends \
+    build-essential \
+    git \
+    && rm -rf /var/lib/apt/lists/*
+# Copy requirements first
+COPY --chown=user requirements.txt .
+RUN pip install --no-cache-dir --upgrade -r requirements.txt
+# Copy the rest of the application
+COPY --chown=user . .
+# Set environment variables
+ENV HF_TOKEN=$HF_TOKEN
+ENV PORT=7860
+# Make port 7860 available (Hugging Face Spaces default)
+EXPOSE 7860
+# Command to run the application
+CMD ["python", "app.py"]

app.py ADDED Viewed

	@@ -0,0 +1,320 @@

+#!/usr/bin/env python3
+"""
+Quality Lens - Translation Quality Estimation and Hallucination Detection
+This script provides a web interface for evaluating translation quality and detecting hallucinations.
+Features:
+- Translation quality assessment
+- Semantic equivalence analysis
+- Hallucination detection
+- COMET-QE integration
+"""
+import os
+import multiprocessing as mp
+from typing import Optional, Dict, Any, List, cast
+from flask import Flask, render_template, request, jsonify
+from blaser_sonar_space import BLASEREvaluator
+from comet import download_model, load_from_checkpoint
+from comet.models import RegressionMetric
+from dotenv import load_dotenv
+from huggingface_hub import login
+import pytorch_lightning as pl
+import torch
+import traceback
+import sys
+# Load environment variables
+load_dotenv()
+# Set tokenizer parallelism to avoid deadlocks
+os.environ["TOKENIZERS_PARALLELISM"] = "false"
+# Set multiprocessing start method to 'fork' on Unix systems
+if os.name != 'nt':  # Not Windows
+    mp.set_start_method('fork', force=True)
+# Login to Hugging Face Hub if token is available
+if os.getenv("HUGGINGFACE_TOKEN"):
+    login(token=os.getenv("HUGGINGFACE_TOKEN"))
+app = Flask(__name__)
+class COMETEvaluator:
+    """COMET-QE evaluator using the wmt22-comet-da model."""
+    def __init__(self):
+        self.model = None
+        self.model_name = "Unbabel/wmt22-comet-da"  # Using the latest recommended model
+    def initialize(self) -> bool:
+        """Initialize the COMET model."""
+        try:
+            print(f"🔄 Downloading COMET model {self.model_name}...")
+            model_path = download_model(self.model_name)
+            print("✨ Loading COMET model...")
+            # Load and initialize the model
+            self.model = cast(RegressionMetric, load_from_checkpoint(model_path))
+            # Set model to evaluation mode
+            self.model.eval()
+            # Create a trainer with simplified configuration
+            self.model.trainer = pl.Trainer(
+                accelerator="mps" if torch.backends.mps.is_available() else "cpu",
+                devices=1,
+                enable_progress_bar=True,  # Enable to see progress
+                logger=False,
+                strategy="auto"
+            )
+            # Move model to appropriate device
+            device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
+            print(f"🖥️ Using device: {device}")
+            self.model = self.model.to(device)
+            return True
+        except Exception as e:
+            print(f"❌ Failed to initialize COMET model: {str(e)}")
+            traceback.print_exc()  # Print full traceback
+            return False
+    def evaluate(self, source: str, hypothesis: str, reference: str) -> Optional[Dict[str, Any]]:
+        """
+        Evaluate translation quality using COMET.
+        Args:
+            source: Source text
+            hypothesis: Translation to evaluate
+            reference: Reference translation
+        Returns:
+            Dictionary containing evaluation results or None if evaluation fails
+        """
+        if self.model is None:
+            print("❌ COMET model not initialized")
+            return None
+        try:
+            print("📊 Starting COMET evaluation...")
+            # Prepare data in COMET format
+            data = [{
+                "src": source,
+                "mt": hypothesis,
+                "ref": reference
+            }]
+            print("🔄 Data prepared, running prediction...")
+            # Set environment variables for multiprocessing
+            os.environ["TOKENIZERS_PARALLELISM"] = "false"
+            # Call predict method with minimal configuration
+            model_output = self.model.predict(
+                samples=data,
+                batch_size=1,
+                num_workers=1  # Use 1 worker to enable multiprocessing
+            )
+            print(f"✨ Raw model output: {model_output}")
+            # Get the system score from the output
+            if isinstance(model_output, dict):
+                comet_score = float(model_output.get('system_score', 0.0))
+            else:
+                # Try to get the first score if it's a list/tuple
+                try:
+                    comet_score = float(model_output[0])
+                except (IndexError, TypeError, ValueError) as e:
+                    print(f"❌ Could not extract score from COMET output: {str(e)}")
+                    return None
+            print(f"📈 COMET score: {comet_score}")
+            # Map COMET score to BLASER range (1-5)
+            mapped_score = 1 + 4 / (1 + torch.exp(torch.tensor(-comet_score)))
+            result = {
+                "comet_score": comet_score,
+                "mapped_score": float(mapped_score),
+                "quality_assessment": "Good" if mapped_score >= 3.5 else "Poor"
+            }
+            print(f"✅ Evaluation complete: {result}")
+            return result
+        except Exception as e:
+            print(f"❌ COMET evaluation failed: {str(e)}")
+            traceback.print_exc()  # Print full traceback
+            return None
+# Initialize evaluators
+print("🚀 Initializing Quality Lens components...")
+blaser_evaluator = BLASEREvaluator()
+if not blaser_evaluator.initialize():
+    raise RuntimeError("Failed to initialize BLASER evaluator")
+print("✅ BLASER initialized successfully!")
+print("🚀 Initializing COMET-QE...")
+comet_evaluator = COMETEvaluator()
+if not comet_evaluator.initialize():
+    raise RuntimeError("Failed to initialize COMET evaluator")
+print("✅ COMET-QE initialized successfully!")
+def evaluate_translation(source_text: str, translation_text: str) -> Dict[str, Any]:
+    """Evaluate translation quality using BLASER and COMET."""
+    try:
+        print("📊 Evaluating translation...")
+        # Get BLASER score
+        blaser_score = blaser_evaluator.evaluate(source_text, translation_text)
+        # Get COMET score
+        comet_result = comet_evaluator.evaluate(
+            source_text,
+            translation_text,
+            ""  # Reference translation is not available in this context
+        )
+        if comet_result is None:
+            raise Exception("COMET evaluation failed")
+        # Extract scores
+        comet_score = comet_result["comet_score"]
+        comet_mapped_score = comet_result["mapped_score"]
+        # Get quality assessment
+        quality = get_quality_assessment(
+            blaser_score=blaser_score,
+            comet_score=comet_mapped_score
+        )
+        return {
+            "success": True,
+            "blaser_score": round(blaser_score, 3),
+            "comet_score": round(comet_mapped_score, 3),
+            "raw_comet_score": round(comet_score, 3),
+            "quality_assessment": quality
+        }
+    except Exception as e:
+        print(f"❌ Translation evaluation failed: {e}")
+        return {
+            "success": False,
+            "error": str(e)
+        }
+def get_quality_assessment(blaser_score: float, comet_score: Optional[float] = None) -> Dict[str, Any]:
+    """Get quality assessment based on BLASER and COMET scores."""
+    # Map BLASER score to quality level
+    if blaser_score >= 4.5:
+        blaser_quality = "Excellent"
+    elif blaser_score >= 4.0:
+        blaser_quality = "Very Good"
+    elif blaser_score >= 3.5:
+        blaser_quality = "Good"
+    elif blaser_score >= 3.0:
+        blaser_quality = "Fair"
+    else:
+        blaser_quality = "Poor"
+    # If COMET score is available, map it to quality level
+    comet_quality = None
+    if comet_score is not None:
+        if comet_score >= 4.5:
+            comet_quality = "Excellent"
+        elif comet_score >= 4.0:
+            comet_quality = "Very Good"
+        elif comet_score >= 3.5:
+            comet_quality = "Good"
+        elif comet_score >= 3.0:
+            comet_quality = "Fair"
+        else:
+            comet_quality = "Poor"
+    # Determine confidence level based on multiple factors
+    confidence = "High"
+    if comet_quality and comet_score is not None:
+        score_diff = abs(blaser_score - comet_score)
+        avg_score = (blaser_score + comet_score) / 2
+        # Very high confidence when scores agree and are in good ranges
+        if score_diff < 0.2 and avg_score >= 4.0:
+            confidence = "Very High"
+        # High confidence when scores are similar and acceptable
+        elif score_diff < 0.3 and avg_score >= 3.5:
+            confidence = "High"
+        # Medium confidence when scores differ moderately or are in mediocre range
+        elif score_diff < 0.5 or avg_score >= 3.0:
+            confidence = "Medium"
+        # Low confidence when scores differ significantly or are poor
+        else:
+            confidence = "Low"
+    return {
+        "quality_level": blaser_quality,
+        "comet_quality_level": comet_quality,
+        "confidence": confidence,
+        "explanation": get_quality_explanation(blaser_quality, comet_quality, confidence)
+    }
+def get_quality_explanation(blaser_quality: str, comet_quality: Optional[str], confidence: str) -> str:
+    """Generate explanation for quality assessment."""
+    if not comet_quality:
+        return f"BLASER rates this translation as {blaser_quality}."
+    if blaser_quality == comet_quality:
+        if confidence == "Very High":
+            return f"Both BLASER and COMET strongly agree that this translation is {blaser_quality}."
+        else:
+            return f"Both BLASER and COMET agree that this translation is {blaser_quality}."
+    if confidence == "Low":
+        return f"There is significant disagreement between metrics: BLASER rates it as {blaser_quality} while COMET rates it as {comet_quality}."
+    return f"BLASER rates this translation as {blaser_quality}, while COMET rates it as {comet_quality}. Consider reviewing for potential issues."
+@app.route('/')
+def index():
+    """Render the main page"""
+    return render_template('index.html')
+@app.route('/evaluate', methods=['POST'])
+def evaluate():
+    """Handle translation evaluation requests"""
+    try:
+        # Get form data
+        source_text: Optional[str] = request.form.get('source_text')
+        translation_text: Optional[str] = request.form.get('translation_text')
+        source_lang: Optional[str] = request.form.get('source_lang')
+        target_lang: Optional[str] = request.form.get('target_lang')
+        # Validate input
+        if not all([source_text, translation_text, source_lang, target_lang]):
+            return jsonify({
+                'error': 'Missing required fields'
+            }), 400
+        # At this point, we know all values are strings
+        assert isinstance(source_text, str)
+        assert isinstance(translation_text, str)
+        assert isinstance(source_lang, str)
+        assert isinstance(target_lang, str)
+        # Evaluate translation using both metrics
+        evaluation = evaluate_translation(source_text, translation_text)
+        return jsonify(evaluation)
+    except Exception as e:
+        print(f"Error during evaluation: {e}")
+        return jsonify({
+            'error': 'An error occurred during evaluation'
+        }), 500
+if __name__ == "__main__":
+    # Run the app
+    port = int(os.environ.get("PORT", 7860))
+    app.run(host="0.0.0.0", port=port)

blaser_sonar_space.py ADDED Viewed

	@@ -0,0 +1,149 @@

+#!/usr/bin/env python3
+"""
+BLASER 2.0-QE Implementation using sonar-space package
+This implementation should give accurate scores matching the official results
+"""
+import torch
+from sonar.inference_pipelines.text import TextToEmbeddingModelPipeline
+from sonar.models.blaser.loader import load_blaser_model
+class BLASEREvaluator:
+    def __init__(self):
+        """Initialize BLASER evaluator"""
+        self.text_embedder = None
+        self.blaser_model = None
+        self.initialized = False
+    def initialize(self):
+        """Initialize models and pipelines"""
+        print("🚀 Initializing BLASER 2.0-QE...")
+        print("This may take a few minutes on first run as models are downloaded...")
+        try:
+            # Initialize text embedder with SONAR
+            print("📝 Loading SONAR text embedder...")
+            self.text_embedder = TextToEmbeddingModelPipeline(
+                encoder="text_sonar_basic_encoder",
+                tokenizer="text_sonar_basic_encoder"
+            )
+            # Load BLASER model
+            print("🎯 Loading BLASER 2.0-QE model...")
+            self.blaser_model = load_blaser_model("blaser_2_0_qe").eval()
+            self.initialized = True
+            print("✅ BLASER 2.0-QE initialized successfully!")
+            return True
+        except Exception as e:
+            print(f"❌ Initialization failed: {e}")
+            print("Try setting FAIRSEQ2_EXTENSION_TRACE=1 for more details")
+            return False
+    def evaluate(self, source_text: str, translation_text: str,
+                source_lang: str = "fra_Latn", target_lang: str = "eng_Latn") -> float:
+        """
+        Evaluate translation quality using BLASER 2.0-QE
+        Args:
+            source_text: Source text
+            translation_text: Machine translation
+            source_lang: Source language code (default: fra_Latn)
+            target_lang: Target language code (default: eng_Latn)
+        Returns:
+            BLASER score (higher is better)
+        """
+        if not self.initialized:
+            raise RuntimeError("BLASER not initialized. Call initialize() first.")
+        print(f"\n📊 Evaluating translation:")
+        print(f"   Source ({source_lang}): {source_text}")
+        print(f"   Translation ({target_lang}): {translation_text}")
+        # Generate embeddings using SONAR
+        print("🔄 Generating embeddings...")
+        src_embs = self.text_embedder.predict([source_text], source_lang=source_lang)
+        mt_embs = self.text_embedder.predict([translation_text], source_lang=target_lang)
+        # Get BLASER score
+        print("🔄 Computing BLASER score...")
+        with torch.inference_mode():
+            score = self.blaser_model(src=src_embs, mt=mt_embs).item()
+        print(f"✨ BLASER score: {score:.3f}")
+        return score
+def main():
+    """Example usage"""
+    # Initialize evaluator
+    evaluator = BLASEREvaluator()
+    if not evaluator.initialize():
+        print("Failed to initialize BLASER")
+        return
+    # Test cases with both directions
+    test_cases = [
+        # French-English pair
+        {
+            "source": "Le chat s'assit sur le tapis.",
+            "translation": "The cat sat down on the carpet.",
+            "source_lang": "fra_Latn",
+            "target_lang": "eng_Latn",
+            "name": "French → English"
+        },
+        {
+            "source": "The cat sat down on the carpet.",
+            "translation": "Le chat s'assit sur le tapis.",
+            "source_lang": "eng_Latn",
+            "target_lang": "fra_Latn",
+            "name": "English → French"
+        },
+        # English-English pair
+        {
+            "source": "The dog is running.",
+            "translation": "The dog runs.",
+            "source_lang": "eng_Latn",
+            "target_lang": "eng_Latn",
+            "name": "English → English (present continuous → simple)"
+        },
+        {
+            "source": "The dog runs.",
+            "translation": "The dog is running.",
+            "source_lang": "eng_Latn",
+            "target_lang": "eng_Latn",
+            "name": "English → English (simple → present continuous)"
+        },
+        # Spanish-English pair
+        {
+            "source": "El gato está sentado en la alfombra.",
+            "translation": "The cat is sitting on the carpet.",
+            "source_lang": "spa_Latn",
+            "target_lang": "eng_Latn",
+            "name": "Spanish → English"
+        },
+        {
+            "source": "The cat is sitting on the carpet.",
+            "translation": "El gato está sentado en la alfombra.",
+            "source_lang": "eng_Latn",
+            "target_lang": "spa_Latn",
+            "name": "English → Spanish"
+        }
+    ]
+    print("\n=== Running BLASER evaluations in both directions ===\n")
+    for case in test_cases:
+        print(f"\n🔄 Testing: {case['name']}")
+        score = evaluator.evaluate(
+            case["source"],
+            case["translation"],
+            case["source_lang"],
+            case["target_lang"]
+        )
+        print(f"📈 Final score: {score:.3f}")
+        print("   " + "="*50)
+if __name__ == "__main__":
+    main()

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+flask>=2.0.0
+unbabel-comet>=2.0.0
+torch>=2.0.0  # Required by COMET
+transformers>=4.0.0  # Required by COMET
+sonar-space  # For BLASER evaluation
+python-dotenv
+huggingface-hub
+pytorch-lightning

static/favicon 2.ico ADDED Viewed

static/icon.ico ADDED Viewed

Git LFS Details

SHA256: 97817baf4e42678312576e53bdcff7e30c151332c895774ed314591558de3267
Pointer size: 131 Bytes
Size of remote file: 248 kB

templates/index.html ADDED Viewed

	@@ -0,0 +1,320 @@

+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Quality Lens</title>
+    <link rel="icon" type="image/x-icon" href="{{ url_for('static', filename='icon.ico') }}">
+    <link href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css" rel="stylesheet">
+    <style>
+        body {
+            background-color: #f8f9fa;
+            padding-top: 2rem;
+        }
+        .score-display {
+            font-size: 2.5rem;
+            font-weight: bold;
+            color: #0d6efd;
+        }
+        .loading {
+            display: none;
+        }
+        .card {
+            box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
+        }
+        .language-select {
+            max-width: 200px;
+        }
+        #result {
+            transition: all 0.3s ease;
+        }
+        .score-circle {
+            width: 120px;
+            height: 120px;
+            border-radius: 50%;
+            border: 8px solid #e9ecef;
+            display: flex;
+            flex-direction: column;
+            justify-content: center;
+            align-items: center;
+            margin: 0 auto;
+            transition: border-color 0.3s ease;
+        }
+        .score-circle.excellent {
+            border-color: #28a745;
+        }
+        .score-circle.acceptable {
+            border-color: #17a2b8;
+        }
+        .score-circle.poor {
+            border-color: #dc3545;
+        }
+        .score-label {
+            font-size: 0.8rem;
+            color: #6c757d;
+        }
+        .quality-meter {
+            padding: 10px;
+        }
+        .hallucination-types .alert {
+            margin-bottom: 0.5rem;
+        }
+        .action-item {
+            padding: 0.5rem 0;
+        }
+        .quality-level, .confidence {
+            font-size: 1.1rem;
+        }
+        .explanation {
+            font-size: 1rem;
+            color: #6c757d;
+        }
+    </style>
+</head>
+<body>
+    <div class="container">
+        <h1 class="text-center mb-4">Quality Lens</h1>
+        <h5 class="text-center text-muted mb-4">Translation QE & Hallucination Detection</h5>
+        <div class="row justify-content-center">
+            <div class="col-md-10">
+                <div class="card">
+                    <div class="card-body">
+                        <form id="evaluationForm" method="POST">
+                            <div class="row mb-3">
+                                <!-- Source Text -->
+                                <div class="col-md-6">
+                                    <div class="form-group">
+                                        <label for="source_text" class="form-label">Source Text:</label>
+                                        <textarea class="form-control" id="source_text" name="source_text" rows="4" required></textarea>
+                                        <div class="mt-2">
+                                            <label for="source_lang" class="form-label">Source Language:</label>
+                                            <select class="form-select language-select" id="source_lang" name="source_lang" required>
+                                                <option value="eng_Latn">English</option>
+                                                <option value="fra_Latn">French</option>
+                                                <option value="spa_Latn">Spanish</option>
+                                                <option value="deu_Latn">German</option>
+                                                <option value="ita_Latn">Italian</option>
+                                                <option value="por_Latn">Portuguese</option>
+                                                <option value="nld_Latn">Dutch</option>
+                                                <option value="zho_Hans">Chinese (Simplified)</option>
+                                                <option value="jpn_Jpan">Japanese</option>
+                                                <option value="kor_Hang">Korean</option>
+                                            </select>
+                                        </div>
+                                    </div>
+                                </div>
+                                <!-- Translation Text -->
+                                <div class="col-md-6">
+                                    <div class="form-group">
+                                        <label for="translation_text" class="form-label">Translation:</label>
+                                        <textarea class="form-control" id="translation_text" name="translation_text" rows="4" required></textarea>
+                                        <div class="mt-2">
+                                            <label for="target_lang" class="form-label">Target Language:</label>
+                                            <select class="form-select language-select" id="target_lang" name="target_lang" required>
+                                                <option value="eng_Latn">English</option>
+                                                <option value="fra_Latn">French</option>
+                                                <option value="spa_Latn">Spanish</option>
+                                                <option value="deu_Latn">German</option>
+                                                <option value="ita_Latn">Italian</option>
+                                                <option value="por_Latn">Portuguese</option>
+                                                <option value="nld_Latn">Dutch</option>
+                                                <option value="zho_Hans">Chinese (Simplified)</option>
+                                                <option value="jpn_Jpan">Japanese</option>
+                                                <option value="kor_Hang">Korean</option>
+                                            </select>
+                                        </div>
+                                    </div>
+                                </div>
+                            </div>
+                            <!-- Submit Button -->
+                            <div class="text-center">
+                                <button type="submit" class="btn btn-primary btn-lg px-4">
+                                    Evaluate Translation
+                                </button>
+                            </div>
+                        </form>
+                        <!-- Loading Spinner -->
+                        <div class="loading text-center mt-4">
+                            <div class="spinner-border text-primary" role="status">
+                                <span class="visually-hidden">Loading...</span>
+                            </div>
+                            <p class="mt-2">Evaluating translation quality...</p>
+                        </div>
+                        <!-- Results -->
+                        <div id="result" class="mt-4" style="display: none;">
+                            <div class="row">
+                                <!-- BLASER Score -->
+                                <div class="col-md-6 mb-3">
+                                    <div class="card h-100">
+                                        <div class="card-body text-center">
+                                            <h5 class="card-title mb-3">BLASER Score</h5>
+                                            <div class="score-display mb-2" id="blaser_score">0.000</div>
+                                            <p class="text-muted">
+                                                BLASER scores range from 1 to 5, where 5 indicates perfect semantic equivalence.
+                                            </p>
+                                        </div>
+                                    </div>
+                                </div>
+                                <!-- COMET Score -->
+                                <div class="col-md-6 mb-3">
+                                    <div class="card h-100">
+                                        <div class="card-body text-center">
+                                            <h5 class="card-title mb-3">COMET Score</h5>
+                                            <div class="score-display mb-2" id="comet_score">0.000</div>
+                                            <p class="text-muted">
+                                                COMET scores are mapped to match BLASER's 1-5 range.
+                                            </p>
+                                        </div>
+                                    </div>
+                                </div>
+                            </div>
+                            <!-- Hallucination Detection -->
+                            <div class="card mt-3">
+                                <div class="card-body">
+                                    <h5 class="card-title">
+                                        <i class="fas fa-exclamation-triangle"></i>
+                                        Hallucination Detection
+                                    </h5>
+                                    <div class="progress mb-3">
+                                        <div id="hallucination_score_bar" class="progress-bar" role="progressbar" style="width: 0%">
+                                            0%
+                                        </div>
+                                    </div>
+                                    <p id="hallucination_details" class="mb-0">
+                                        <!-- Will be populated by JS -->
+                                    </p>
+                                </div>
+                            </div>
+                        </div>
+                    </div>
+                </div>
+            </div>
+        </div>
+        <!-- Acknowledgments -->
+        <footer class="mt-5 mb-4">
+            <div class="container">
+                <hr>
+                <div class="row justify-content-center">
+                    <div class="col-md-8">
+                        <h6 class="text-center mb-3">Acknowledgments</h6>
+                        <div class="d-flex flex-wrap justify-content-center gap-4">
+                            <div class="text-center">
+                                <a href="https://huggingface.co/facebook/blaser-2.0-qe" target="_blank" class="text-decoration-none">
+                                    <span class="badge bg-light text-dark border">BLASER 2.0</span>
+                                </a>
+                                <div class="small text-muted">CC BY-NC 4.0</div>
+                            </div>
+                            <div class="text-center">
+                                <a href="https://github.com/facebookresearch/SONAR" target="_blank" class="text-decoration-none">
+                                    <span class="badge bg-light text-dark border">SONAR</span>
+                                </a>
+                                <div class="small text-muted">MIT</div>
+                            </div>
+                            <div class="text-center">
+                                <a href="https://github.com/Unbabel/COMET" target="_blank" class="text-decoration-none">
+                                    <span class="badge bg-light text-dark border">COMET-QE</span>
+                                </a>
+                                <div class="small text-muted">CC BY-NC-SA 4.0</div>
+                            </div>
+                            <div class="text-center">
+                                <a href="https://arxiv.org/abs/2501.17295" target="_blank" class="text-decoration-none">
+                                    <span class="badge bg-light text-dark border">Hallucination Detection</span>
+                                </a>
+                                <div class="small text-muted">arXiv:2501.17295</div>
+                            </div>
+                        </div>
+                        <p class="small text-muted text-center mt-3">
+                            For research and non-commercial use only
+                        </p>
+                    </div>
+                </div>
+            </div>
+        </footer>
+    </div>
+    <script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/js/bootstrap.bundle.min.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/js/all.min.js"></script>
+    <script>
+        function updateAssessment(data) {
+            if (!data.success) {
+                throw new Error(data.error || 'Unknown error occurred');
+            }
+            // Update scores
+            document.getElementById('blaser_score').textContent = data.blaser_score.toFixed(3);
+            document.getElementById('comet_score').textContent = data.comet_score.toFixed(3);
+            // Show results
+            document.getElementById('result').style.display = 'block';
+            // Calculate hallucination score based on BLASER 2.0-QE paper
+            // HS(x,y) = 1 - BLASER(x,y)/5
+            const hallucinationScore = 1 - (data.blaser_score / 5);
+            const riskPercentage = Math.round(hallucinationScore * 100);
+            // Update progress bar
+            const riskBar = document.getElementById('hallucination_score_bar');
+            riskBar.style.width = `${riskPercentage}%`;
+            riskBar.textContent = `${riskPercentage}%`;
+            // Using threshold T=0.4 (equivalent to BLASER score of 3.0)
+            // This means HS >= 0.4 indicates hallucination
+            if (hallucinationScore >= 0.4) {
+                riskBar.className = 'progress-bar bg-danger';
+                document.getElementById('hallucination_details').textContent =
+                    'High likelihood of hallucination detected (HS ≥ 0.4). The translation may contain fabricated content.';
+            } else if (hallucinationScore >= 0.3) {
+                riskBar.className = 'progress-bar bg-warning';
+                document.getElementById('hallucination_details').textContent =
+                    'Moderate risk of semantic divergence (0.3 ≤ HS < 0.4).';
+            } else {
+                riskBar.className = 'progress-bar bg-info';
+                document.getElementById('hallucination_details').textContent =
+                    'Low risk of hallucination (HS < 0.3). Translation appears semantically faithful.';
+            }
+        }
+        document.getElementById('evaluationForm').addEventListener('submit', async function(e) {
+            e.preventDefault();
+            // Show loading spinner
+            document.querySelector('.loading').style.display = 'block';
+            document.getElementById('result').style.display = 'none';
+            // Get form data
+            const formData = new FormData(this);
+            try {
+                // Send request to backend
+                const response = await fetch('/evaluate', {
+                    method: 'POST',
+                    body: formData
+                });
+                const data = await response.json();
+                if (!data.success) {
+                    throw new Error(data.error || 'Unknown error occurred');
+                }
+                // Update assessment display
+                updateAssessment(data);
+            } catch (error) {
+                console.error('Error:', error);
+                alert('An error occurred while evaluating the translation. Please try again.');
+            } finally {
+                // Hide loading spinner
+                document.querySelector('.loading').style.display = 'none';
+            }
+        });
+    </script>
+</body>
+</html>