Spaces:
Running
Running
File size: 4,451 Bytes
1950d4b 27ac340 1950d4b 27ac340 1950d4b 095874f 1950d4b e6108ec 1950d4b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 |
from flask import Flask, request, jsonify
from flask_cors import CORS
import spacy
import os
import logging
from logging.handlers import RotatingFileHandler
from werkzeug.middleware.proxy_fix import ProxyFix
# Initialize logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# Configure logging to console instead of file to avoid permission issues
console_handler = logging.StreamHandler()
console_handler.setFormatter(logging.Formatter(
'%(asctime)s %(levelname)s: %(message)s [in %(pathname)s:%(lineno)d]'
))
logger.addHandler(console_handler)
# Initialize Flask app
app = Flask(__name__)
app.wsgi_app = ProxyFix(app.wsgi_app) # For proper handling behind proxy servers
# Configure CORS
CORS(app, resources={r"/*": {"origins": "*"}})
# Load environment variables or use defaults
PORT = int(os.environ.get('PORT', 7860))
DEBUG = os.environ.get('DEBUG', 'False').lower() == 'true'
HOST = os.environ.get('HOST', '0.0.0.0')
try:
# Load spaCy model
logger.info("Loading spaCy model...")
nlp = spacy.load('en_core_web_trf')
logger.info("spaCy model loaded successfully")
except Exception as e:
logger.error(f"Failed to load spaCy model: {str(e)}")
raise
# Define tag groups for RSVP
DELAY_GROUPS = {
'named_entity': [],
'content': ['NOUN', 'PROPN', 'VERB', 'ADJ', 'INTJ'],
'function': ['DET', 'PRON', 'ADP', 'AUX', 'CCONJ', 'SCONJ'],
'modifier': ['ADV', 'NUM'],
'punctuation': ['PUNCT', 'SPACE', 'SYM']
}
def get_group(token):
"""Return the group name for a given token, considering both NER and POS"""
if token.ent_type_:
return 'named_entity'
pos_tag = token.pos_
for group, tags in DELAY_GROUPS.items():
if pos_tag in tags:
return group
return 'other'
def combine_with_punctuation(doc):
"""Combine tokens with following punctuation and get their groups"""
combined_tokens = []
combined_groups = []
current_token = ""
current_group = None
for i, token in enumerate(doc):
if token.pos_ in DELAY_GROUPS['punctuation']:
if current_token:
current_token += token.text
else:
current_token = token.text
current_group = get_group(token)
else:
if current_token:
combined_tokens.append(current_token)
combined_groups.append(current_group)
current_token = token.text
current_group = get_group(token)
if i == len(doc) - 1:
combined_tokens.append(current_token)
combined_groups.append(current_group)
return combined_tokens, combined_groups
@app.route('/health', methods=['GET'])
def health_check():
"""Health check endpoint"""
return jsonify({'status': 'healthy', 'model': 'en_core_web_trf'})
@app.route('/pos-tag', methods=['POST'])
def pos_tag():
try:
data = request.get_json()
if not data:
logger.warning("No JSON data in request")
return jsonify({'error': 'No JSON data provided'}), 400
if 'text' not in data:
logger.warning("No 'text' field in request data")
return jsonify({'error': 'Please provide text in the request body'}), 400
text = data['text']
if not text.strip():
logger.warning("Empty text provided")
return jsonify({'error': 'Text cannot be empty'}), 400
logger.info(f"Processing text of length: {len(text)}")
doc = nlp(text)
tokens, groups = combine_with_punctuation(doc)
response = {
'tokens': tokens,
'groups': groups,
}
logger.info(f"Successfully processed text with {len(tokens)} tokens")
return jsonify(response)
except Exception as e:
logger.error(f"Error processing request: {str(e)}", exc_info=True)
return jsonify({'error': 'Internal server error'}), 500
@app.errorhandler(404)
def not_found_error(error):
return jsonify({'error': 'Not found'}), 404
@app.errorhandler(500)
def internal_error(error):
logger.error('Server Error', exc_info=True)
return jsonify({'error': 'Internal server error'}), 500
if __name__ == '__main__':
logger.info(f"Starting server on {HOST}:{PORT}")
app.run(host=HOST, port=PORT, debug=DEBUG) |