yolloo commited on
Commit
4c134b3
·
verified ·
1 Parent(s): a1cd25b

Upload 6 files

Browse files
Files changed (6) hide show
  1. Dockerfile +24 -0
  2. app.py +45 -0
  3. qamatcher_server.py +64 -0
  4. qgen_server.py +33 -0
  5. requirements.txt +11 -0
  6. whisper_server.py +77 -0
Dockerfile ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use a more stable and widely-used official Python base image
2
+ FROM python:3.9-slim-bullseye
3
+
4
+ # Set the working directory
5
+ WORKDIR /code
6
+
7
+ # Update package lists and set environment variables for caching
8
+ RUN apt-get update && apt-get install -y --no-install-recommends git
9
+ ENV XDG_CACHE_HOME=/tmp/.cache
10
+ ENV HF_HOME=/tmp/.cache/huggingface
11
+ ENV PIP_CACHE_DIR=/tmp/.cache/pip
12
+
13
+ # Copy the requirements file and install dependencies
14
+ COPY ./requirements.txt /code/requirements.txt
15
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
16
+
17
+ # Copy all the Python server files
18
+ COPY . /code/
19
+
20
+ # Expose the port the app runs on (Hugging Face default)
21
+ EXPOSE 7860
22
+
23
+ # Command to run the application using gunicorn
24
+ CMD ["gunicorn", "--bind", "0.0.0.0:7860", "app:app"]
app.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import os
3
+ from flask import Flask, request, jsonify
4
+ from flask_cors import CORS
5
+
6
+ # This is done in the Dockerfile now, but leaving for local dev consistency
7
+ os.environ["XDG_CACHE_HOME"] = os.environ.get("XDG_CACHE_HOME", "/tmp/.cache")
8
+
9
+ # Import handlers from other server files
10
+ from whisper_server import handle_transcribe, model as whisper_model
11
+ from qgen_server import handle_generate_questions, qg_model
12
+ from qamatcher_server import handle_match_question, matcher_model
13
+
14
+ app = Flask(__name__)
15
+
16
+ # Configure CORS to allow all origins
17
+ CORS(app, resources={r"/*": {"origins": "*"}})
18
+
19
+ @app.route('/')
20
+ def index():
21
+ return jsonify({
22
+ 'message': 'VoiceQ AI Server is running!',
23
+ 'models_loaded': {
24
+ 'whisper': whisper_model is not None,
25
+ 'question-generator': qg_model is not None,
26
+ 'question-matcher': matcher_model is not None,
27
+ }
28
+ })
29
+
30
+ @app.route('/transcribe', methods=['POST'])
31
+ def transcribe():
32
+ return handle_transcribe()
33
+
34
+ @app.route('/generate-questions', methods=['POST'])
35
+ def generate_questions():
36
+ return handle_generate_questions()
37
+
38
+ @app.route('/match-question', methods=['POST'])
39
+ def match_question():
40
+ return handle_match_question()
41
+
42
+ # The following block is for local development only and will not be used in the Docker container.
43
+ if __name__ == '__main__':
44
+ PORT = int(os.environ.get("PORT", 5001))
45
+ app.run(host='0.0.0.0', port=PORT, debug=True)
qamatcher_server.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from flask import request, jsonify
3
+ from sentence_transformers import SentenceTransformer, util
4
+
5
+ print("Loading SentenceTransformer model (paraphrase-MiniLM-L6-v2)...")
6
+ matcher_model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
7
+ print("SentenceTransformer model loaded.")
8
+
9
+ # Define a threshold for a "good" match
10
+ SIMILARITY_THRESHOLD = 0.6
11
+
12
+ def handle_match_question():
13
+ data = request.get_json()
14
+ if not data or 'user_question' not in data or 'documents' not in data:
15
+ return jsonify({'error': 'Invalid request. "user_question" and "documents" are required.'}), 400
16
+
17
+ user_question = data['user_question']
18
+ documents = data['documents']
19
+
20
+ if not documents:
21
+ return jsonify({'answer': "There are no notes to search."})
22
+
23
+ # Flatten the list of questions from all documents
24
+ all_questions = []
25
+ # Map each question to the original note text
26
+ question_to_note_map = {}
27
+
28
+ for doc in documents:
29
+ note_text = doc.get('note_text', '')
30
+ for q in doc.get('questions', []):
31
+ all_questions.append(q)
32
+ question_to_note_map[q] = note_text
33
+
34
+ if not all_questions:
35
+ return jsonify({'answer': "No questions have been generated for your notes yet."})
36
+
37
+ try:
38
+ # Encode the user's question and all stored questions
39
+ user_embedding = matcher_model.encode(user_question, convert_to_tensor=True)
40
+ stored_embeddings = matcher_model.encode(all_questions, convert_to_tensor=True)
41
+
42
+ # Compute cosine similarity
43
+ cosine_scores = util.pytorch_cos_sim(user_embedding, stored_embeddings)
44
+
45
+ # Find the best match
46
+ best_match_idx = cosine_scores.argmax()
47
+ best_score = float(cosine_scores[0][best_match_idx])
48
+ best_question = all_questions[best_match_idx]
49
+
50
+ print(f"User Question: '{user_question}'")
51
+ print(f"Best matched stored question: '{best_question}' with score: {best_score:.4f}")
52
+
53
+ # Check if the match is good enough
54
+ if best_score > SIMILARITY_THRESHOLD:
55
+ # Return the note associated with the best-matched question
56
+ answer = question_to_note_map[best_question]
57
+ else:
58
+ answer = "Sorry, I couldn't find a relevant note to answer your question."
59
+
60
+ return jsonify({'answer': answer})
61
+
62
+ except Exception as e:
63
+ print(f"Error during question matching: {e}")
64
+ return jsonify({'error': str(e)}), 500
qgen_server.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from flask import request, jsonify
3
+ from transformers import pipeline
4
+
5
+ print("Loading Question Generation model (iarfmoose/t5-base-question-generator)...")
6
+ # Initialize the pipeline for text2text-generation with the specified model
7
+ qg_model = pipeline("text2text-generation", model="iarfmoose/t5-base-question-generator")
8
+ print("Question Generation model loaded.")
9
+
10
+ def handle_generate_questions():
11
+ data = request.get_json()
12
+ if not data or 'text' not in data:
13
+ return jsonify({'error': 'Invalid request. "text" field is required.'}), 400
14
+
15
+ text = data['text']
16
+
17
+ # Prepend the text with "generate questions: " as required by this model
18
+ input_text = f"generate questions: {text}"
19
+
20
+ try:
21
+ # Generate questions
22
+ results = qg_model(input_text, max_length=64, num_beams=4, early_stopping=True)
23
+
24
+ # The result is a single string with questions separated by '<sep>'
25
+ generated_text = results[0]['generated_text']
26
+ questions = [q.strip() for q in generated_text.split('<sep>') if q.strip()]
27
+
28
+ print(f"Generated questions for text: '{text[:50]}...' -> {questions}")
29
+
30
+ return jsonify({'questions': questions})
31
+ except Exception as e:
32
+ print(f"Error during question generation: {e}")
33
+ return jsonify({'error': str(e)}), 500
requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Flask==3.0.3
2
+ # openai-whisper is replaced by the transformers library
3
+ # openai-whisper==20231117
4
+ sentence-transformers==2.7.0
5
+ torch==2.3.0
6
+ torchaudio==2.3.0
7
+ flask-cors==4.0.1
8
+ gunicorn==22.0.0
9
+ # Added for Hugging Face model loading
10
+ transformers==4.43.3
11
+ accelerate
whisper_server.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import os
3
+ import tempfile
4
+ from flask import request, jsonify
5
+ from transformers import pipeline
6
+ import torch
7
+
8
+ # Define a writable directory for the model cache
9
+ cache_dir = os.path.join(os.getenv("XDG_CACHE_HOME", "/tmp/.cache"), "huggingface_models")
10
+ os.makedirs(cache_dir, exist_ok=True)
11
+
12
+ print("Loading collabora/whisper-tiny-hindi model via transformers pipeline...")
13
+
14
+ # Determine device
15
+ device = "cuda:0" if torch.cuda.is_available() else "cpu"
16
+
17
+ # Initialize the ASR pipeline with the specified model
18
+ # Using the transformers pipeline is the correct way to load custom models from the Hub.
19
+ model = pipeline(
20
+ "automatic-speech-recognition",
21
+ model="collabora/whisper-tiny-hindi",
22
+ device=device
23
+ )
24
+
25
+ print("Whisper model loaded.")
26
+
27
+ def handle_transcribe():
28
+ if 'file' not in request.files:
29
+ return jsonify({'error': 'No file part in the request'}), 400
30
+
31
+ file = request.files['file']
32
+
33
+ if file.filename == '':
34
+ return jsonify({'error': 'No selected file'}), 400
35
+
36
+ if file:
37
+ # Use a temporary file to save the upload
38
+ with tempfile.NamedTemporaryFile(delete=True, suffix=".webm") as temp_audio:
39
+ file.save(temp_audio.name)
40
+
41
+ try:
42
+ print(f"Transcribing file: {temp_audio.name} with collabora/whisper-tiny-hindi pipeline")
43
+
44
+ # The pipeline expects a file path and handles the processing.
45
+ result = model(temp_audio.name)
46
+
47
+ transcribed_text = result.get('text', '')
48
+
49
+ print("Transcription successful.")
50
+ return jsonify({'text': transcribed_text})
51
+ except Exception as e:
52
+ print(f"Error during transcription: {e}")
53
+ # Provide a more specific error if possible
54
+ error_message = f"An unexpected error occurred during transcription: {str(e)}"
55
+ if "out of memory" in str(e).lower():
56
+ error_message = "The model ran out of memory. Please try a smaller audio file or check server resources."
57
+
58
+ return jsonify({'error': error_message}), 500
59
+
60
+ return jsonify({'error': 'File processing failed'}), 500
61
+
62
+ # This part is for standalone execution if needed for testing
63
+ if __name__ == '__main__':
64
+ from flask import Flask
65
+ from flask_cors import CORS
66
+
67
+ app = Flask(__name__)
68
+ CORS(app)
69
+ PORT = int(os.environ.get("PORT", 5001))
70
+
71
+ @app.route('/transcribe', methods=['POST'])
72
+ def standalone_transcribe():
73
+ return handle_transcribe()
74
+
75
+ app.run(host='0.0.0.0', port=PORT, debug=True)
76
+
77
+