Sandipan Haldar commited on
Commit
770544d
·
1 Parent(s): 8d8077a

feat: Replace general context with LinkedIn-specific context

Browse files

- Update environment configuration to use TEMPERATURE_LINKEDIN and DEFAULT_TOKENS_LINKEDIN
- Replace general context with LinkedIn context in autocomplete engine
- Add LinkedIn-specialized system prompts focusing on professional networking
- Update UI to show 'LinkedIn Content' option instead of 'General Text'
- Modify prompt editor to include LinkedIn-specific templates
- Update examples and documentation to reflect LinkedIn context
- Change default context from 'general' to 'linkedin' throughout codebase
- Update README.md to document LinkedIn context type

This change transforms the application from a general text completion tool
to a LinkedIn-focused professional content creation assistant.

Files changed (7) hide show
  1. .env.example +7 -7
  2. README.md +1 -1
  3. app.py +28 -27
  4. config/settings.py +8 -8
  5. settings.py +8 -8
  6. src/autocomplete.py +12 -11
  7. src/utils.py +145 -111
.env.example CHANGED
@@ -27,23 +27,23 @@ RATE_LIMIT_REQUESTS_PER_MINUTE=60
27
  RATE_LIMIT_ENABLED=true
28
 
29
  # Model Configuration
30
- OPENAI_MODEL=gpt-3.5-turbo
31
  ANTHROPIC_MODEL=claude-3-haiku-20240307
32
 
33
  # Temperature settings for different contexts (0.0 to 2.0)
34
  TEMPERATURE_EMAIL=0.6
35
  TEMPERATURE_CREATIVE=0.8
36
- TEMPERATURE_GENERAL=0.7
37
 
38
  # Default token limits for different contexts
39
- DEFAULT_TOKENS_EMAIL=200
40
- DEFAULT_TOKENS_CREATIVE=250
41
- DEFAULT_TOKENS_GENERAL=200
42
 
43
  # UI Configuration
44
  UI_THEME=soft
45
- UI_TITLE=🚀 Smart Auto-Complete
46
- UI_DESCRIPTION=Intelligent text completion powered by AI
47
 
48
  # Server Configuration
49
  SERVER_HOST=0.0.0.0
 
27
  RATE_LIMIT_ENABLED=true
28
 
29
  # Model Configuration
30
+ OPENAI_MODEL=gpt-4.1-mini
31
  ANTHROPIC_MODEL=claude-3-haiku-20240307
32
 
33
  # Temperature settings for different contexts (0.0 to 2.0)
34
  TEMPERATURE_EMAIL=0.6
35
  TEMPERATURE_CREATIVE=0.8
36
+ TEMPERATURE_LINKEDIN=0.7
37
 
38
  # Default token limits for different contexts
39
+ DEFAULT_TOKENS_EMAIL=250
40
+ DEFAULT_TOKENS_CREATIVE=500
41
+ DEFAULT_TOKENS_LINKEDIN=500
42
 
43
  # UI Configuration
44
  UI_THEME=soft
45
+ UI_TITLE=🚀 LinkedIn Smart Auto-Complete
46
+ UI_DESCRIPTION=Intelligent LinkedIn text completion powered by AI
47
 
48
  # Server Configuration
49
  SERVER_HOST=0.0.0.0
README.md CHANGED
@@ -139,7 +139,7 @@ suggestions = autocomplete.get_suggestions(
139
 
140
  - `email`: Professional email writing
141
  - `creative`: Creative writing and storytelling
142
- - `general`: General text completion
143
 
144
  ## 🧪 Testing
145
 
 
139
 
140
  - `email`: Professional email writing
141
  - `creative`: Creative writing and storytelling
142
+ - `linkedin`: LinkedIn professional content and networking
143
 
144
  ## 🧪 Testing
145
 
app.py CHANGED
@@ -6,12 +6,12 @@ A context-aware text completion tool built with Gradio
6
 
7
  from typing import List, Tuple
8
 
 
 
9
  from config.settings import AppSettings
10
  from src.autocomplete import SmartAutoComplete
11
  from src.utils import setup_logging
12
 
13
- import gradio as gr
14
-
15
  # Initialize logging
16
  logger = setup_logging()
17
 
@@ -184,9 +184,9 @@ def create_interface():
184
  choices=[
185
  ("📧 Email Writing", "email"),
186
  ("✍️ Creative Writing", "creative"),
187
- ("📝 General Text", "general"),
188
  ],
189
- value="email",
190
  label="Select Context",
191
  elem_classes=["context-selector"],
192
  )
@@ -274,24 +274,25 @@ def create_interface():
274
  placeholder="Enter the user message template...",
275
  )
276
 
277
- with gr.Tab("📝 General Context"):
278
- general_system_prompt = gr.Textbox(
279
  label="System Prompt",
280
- value="""You are a helpful writing assistant. Generate natural,
281
- contextually appropriate text completions. Focus on:
282
- - Natural language flow
283
- - Contextual relevance
284
- - Clarity and coherence
285
- - Appropriate tone
 
286
 
287
  IMPORTANT: Generate a completion that is approximately {max_tokens} tokens long.
288
  Adjust your response length accordingly - shorter for fewer tokens, longer for more tokens.""",
289
  lines=8,
290
- placeholder="Enter the system prompt for general context...",
291
  )
292
- general_user_template = gr.Textbox(
293
  label="User Message Template",
294
- value="Complete this text naturally with approximately {max_tokens} tokens: {text}",
295
  lines=3,
296
  placeholder="Enter the user message template...",
297
  )
@@ -330,9 +331,9 @@ def create_interface():
330
  "creative",
331
  ],
332
  [
333
- "Academic research paper on technology trends",
334
- "The impact of artificial intelligence on modern society",
335
- "general",
336
  ],
337
  ],
338
  inputs=[context_input, text_input, context_selector],
@@ -349,8 +350,8 @@ def create_interface():
349
  email_user,
350
  creative_sys,
351
  creative_user,
352
- general_sys,
353
- general_user,
354
  ):
355
  """Update suggestions based on input with custom prompts"""
356
  logger.info(
@@ -370,9 +371,9 @@ def create_interface():
370
  "user_template": creative_user,
371
  "temperature": 0.8,
372
  },
373
- "general": {
374
- "system_prompt": general_sys,
375
- "user_template": general_user,
376
  "temperature": 0.7,
377
  },
378
  }
@@ -405,8 +406,8 @@ def create_interface():
405
  email_user_template,
406
  creative_system_prompt,
407
  creative_user_template,
408
- general_system_prompt,
409
- general_user_template,
410
  ],
411
  outputs=[status_display, copy_textbox],
412
  )
@@ -416,7 +417,7 @@ def create_interface():
416
  ---
417
 
418
  ### 🎮 How to Use:
419
- 1. **Select your context** (Email, Creative, or General)
420
  2. **Add context information** (optional) - background info, references, or previous context
421
  3. **Enter your text** in the main text area
422
  4. **Adjust output length** (50-500 tokens) in settings
@@ -428,7 +429,7 @@ def create_interface():
428
  - **Context Window**: Add background info, previous conversations, or references to improve suggestions
429
  - **Email**: Try starting with "Dear..." or "I hope..." + add meeting context
430
  - **Creative**: Start with "Once upon a time..." + add story background
431
- - **General**: Works great for any type of text! + add relevant context
432
  - **Output Length**: Adjust the token slider for longer or shorter completions
433
  - **Custom Prompts**: Edit the AI prompts to customize behavior for your specific needs
434
 
 
6
 
7
  from typing import List, Tuple
8
 
9
+ import gradio as gr
10
+
11
  from config.settings import AppSettings
12
  from src.autocomplete import SmartAutoComplete
13
  from src.utils import setup_logging
14
 
 
 
15
  # Initialize logging
16
  logger = setup_logging()
17
 
 
184
  choices=[
185
  ("📧 Email Writing", "email"),
186
  ("✍️ Creative Writing", "creative"),
187
+ ("💼 LinkedIn Content", "linkedin"),
188
  ],
189
+ value="linkedin",
190
  label="Select Context",
191
  elem_classes=["context-selector"],
192
  )
 
274
  placeholder="Enter the user message template...",
275
  )
276
 
277
+ with gr.Tab("💼 LinkedIn Context"):
278
+ linkedin_system_prompt = gr.Textbox(
279
  label="System Prompt",
280
+ value="""You are a LinkedIn writing assistant specialized in professional networking content. Generate engaging,
281
+ professional LinkedIn-appropriate text completions. Focus on:
282
+ - Professional networking tone
283
+ - Industry-relevant language
284
+ - Engaging and authentic voice
285
+ - LinkedIn best practices (hashtags, mentions, professional insights)
286
+ - Career development and business communication
287
 
288
  IMPORTANT: Generate a completion that is approximately {max_tokens} tokens long.
289
  Adjust your response length accordingly - shorter for fewer tokens, longer for more tokens.""",
290
  lines=8,
291
+ placeholder="Enter the system prompt for LinkedIn context...",
292
  )
293
+ linkedin_user_template = gr.Textbox(
294
  label="User Message Template",
295
+ value="Complete this LinkedIn post/content naturally and professionally with approximately {max_tokens} tokens: {text}",
296
  lines=3,
297
  placeholder="Enter the user message template...",
298
  )
 
331
  "creative",
332
  ],
333
  [
334
+ "Professional networking and career development",
335
+ "Excited to share my thoughts on the future of AI in our industry",
336
+ "linkedin",
337
  ],
338
  ],
339
  inputs=[context_input, text_input, context_selector],
 
350
  email_user,
351
  creative_sys,
352
  creative_user,
353
+ linkedin_sys,
354
+ linkedin_user,
355
  ):
356
  """Update suggestions based on input with custom prompts"""
357
  logger.info(
 
371
  "user_template": creative_user,
372
  "temperature": 0.8,
373
  },
374
+ "linkedin": {
375
+ "system_prompt": linkedin_sys,
376
+ "user_template": linkedin_user,
377
  "temperature": 0.7,
378
  },
379
  }
 
406
  email_user_template,
407
  creative_system_prompt,
408
  creative_user_template,
409
+ linkedin_system_prompt,
410
+ linkedin_user_template,
411
  ],
412
  outputs=[status_display, copy_textbox],
413
  )
 
417
  ---
418
 
419
  ### 🎮 How to Use:
420
+ 1. **Select your context** (Email, Creative, or LinkedIn)
421
  2. **Add context information** (optional) - background info, references, or previous context
422
  3. **Enter your text** in the main text area
423
  4. **Adjust output length** (50-500 tokens) in settings
 
429
  - **Context Window**: Add background info, previous conversations, or references to improve suggestions
430
  - **Email**: Try starting with "Dear..." or "I hope..." + add meeting context
431
  - **Creative**: Start with "Once upon a time..." + add story background
432
+ - **LinkedIn**: Perfect for professional posts, career updates, industry insights + add professional context
433
  - **Output Length**: Adjust the token slider for longer or shorter completions
434
  - **Custom Prompts**: Edit the AI prompts to customize behavior for your specific needs
435
 
config/settings.py CHANGED
@@ -60,12 +60,12 @@ class AppSettings:
60
  # Temperature settings for different contexts
61
  self.TEMPERATURE_EMAIL = float(os.getenv("TEMPERATURE_EMAIL", "0.6"))
62
  self.TEMPERATURE_CREATIVE = float(os.getenv("TEMPERATURE_CREATIVE", "0.8"))
63
- self.TEMPERATURE_GENERAL = float(os.getenv("TEMPERATURE_GENERAL", "0.7"))
64
 
65
  # Default token limits for different contexts
66
  self.DEFAULT_TOKENS_EMAIL = int(os.getenv("DEFAULT_TOKENS_EMAIL", "200"))
67
  self.DEFAULT_TOKENS_CREATIVE = int(os.getenv("DEFAULT_TOKENS_CREATIVE", "250"))
68
- self.DEFAULT_TOKENS_GENERAL = int(os.getenv("DEFAULT_TOKENS_GENERAL", "200"))
69
 
70
  # UI Configuration
71
  self.UI_THEME = os.getenv("UI_THEME", "soft")
@@ -135,7 +135,7 @@ class AppSettings:
135
  for temp_attr in [
136
  "TEMPERATURE_EMAIL",
137
  "TEMPERATURE_CREATIVE",
138
- "TEMPERATURE_GENERAL",
139
  ]:
140
  temp_value = getattr(self, temp_attr)
141
  if not (0.0 <= temp_value <= 2.0):
@@ -175,7 +175,7 @@ class AppSettings:
175
  Get configuration for a specific context
176
 
177
  Args:
178
- context: Context name (email, code, creative, general)
179
 
180
  Returns:
181
  Dictionary with context-specific configuration
@@ -191,14 +191,14 @@ class AppSettings:
191
  "default_tokens": self.DEFAULT_TOKENS_CREATIVE,
192
  "model_preference": "anthropic", # Often better for creative content
193
  },
194
- "general": {
195
- "temperature": self.TEMPERATURE_GENERAL,
196
- "default_tokens": self.DEFAULT_TOKENS_GENERAL,
197
  "model_preference": self.DEFAULT_PROVIDER,
198
  },
199
  }
200
 
201
- return context_configs.get(context, context_configs["general"])
202
 
203
  def get_model_for_provider(self, provider: str) -> str:
204
  """
 
60
  # Temperature settings for different contexts
61
  self.TEMPERATURE_EMAIL = float(os.getenv("TEMPERATURE_EMAIL", "0.6"))
62
  self.TEMPERATURE_CREATIVE = float(os.getenv("TEMPERATURE_CREATIVE", "0.8"))
63
+ self.TEMPERATURE_LINKEDIN = float(os.getenv("TEMPERATURE_LINKEDIN", "0.7"))
64
 
65
  # Default token limits for different contexts
66
  self.DEFAULT_TOKENS_EMAIL = int(os.getenv("DEFAULT_TOKENS_EMAIL", "200"))
67
  self.DEFAULT_TOKENS_CREATIVE = int(os.getenv("DEFAULT_TOKENS_CREATIVE", "250"))
68
+ self.DEFAULT_TOKENS_LINKEDIN = int(os.getenv("DEFAULT_TOKENS_LINKEDIN", "200"))
69
 
70
  # UI Configuration
71
  self.UI_THEME = os.getenv("UI_THEME", "soft")
 
135
  for temp_attr in [
136
  "TEMPERATURE_EMAIL",
137
  "TEMPERATURE_CREATIVE",
138
+ "TEMPERATURE_LINKEDIN",
139
  ]:
140
  temp_value = getattr(self, temp_attr)
141
  if not (0.0 <= temp_value <= 2.0):
 
175
  Get configuration for a specific context
176
 
177
  Args:
178
+ context: Context name (email, code, creative, linkedin)
179
 
180
  Returns:
181
  Dictionary with context-specific configuration
 
191
  "default_tokens": self.DEFAULT_TOKENS_CREATIVE,
192
  "model_preference": "anthropic", # Often better for creative content
193
  },
194
+ "linkedin": {
195
+ "temperature": self.TEMPERATURE_LINKEDIN,
196
+ "default_tokens": self.DEFAULT_TOKENS_LINKEDIN,
197
  "model_preference": self.DEFAULT_PROVIDER,
198
  },
199
  }
200
 
201
+ return context_configs.get(context, context_configs["linkedin"])
202
 
203
  def get_model_for_provider(self, provider: str) -> str:
204
  """
settings.py CHANGED
@@ -60,12 +60,12 @@ class AppSettings:
60
  # Temperature settings for different contexts
61
  self.TEMPERATURE_EMAIL = float(os.getenv("TEMPERATURE_EMAIL", "0.6"))
62
  self.TEMPERATURE_CREATIVE = float(os.getenv("TEMPERATURE_CREATIVE", "0.8"))
63
- self.TEMPERATURE_GENERAL = float(os.getenv("TEMPERATURE_GENERAL", "0.7"))
64
 
65
  # Default token limits for different contexts
66
  self.DEFAULT_TOKENS_EMAIL = int(os.getenv("DEFAULT_TOKENS_EMAIL", "200"))
67
  self.DEFAULT_TOKENS_CREATIVE = int(os.getenv("DEFAULT_TOKENS_CREATIVE", "250"))
68
- self.DEFAULT_TOKENS_GENERAL = int(os.getenv("DEFAULT_TOKENS_GENERAL", "200"))
69
 
70
  # UI Configuration
71
  self.UI_THEME = os.getenv("UI_THEME", "soft")
@@ -135,7 +135,7 @@ class AppSettings:
135
  for temp_attr in [
136
  "TEMPERATURE_EMAIL",
137
  "TEMPERATURE_CREATIVE",
138
- "TEMPERATURE_GENERAL",
139
  ]:
140
  temp_value = getattr(self, temp_attr)
141
  if not (0.0 <= temp_value <= 2.0):
@@ -175,7 +175,7 @@ class AppSettings:
175
  Get configuration for a specific context
176
 
177
  Args:
178
- context: Context name (email, code, creative, general)
179
 
180
  Returns:
181
  Dictionary with context-specific configuration
@@ -191,14 +191,14 @@ class AppSettings:
191
  "default_tokens": self.DEFAULT_TOKENS_CREATIVE,
192
  "model_preference": "anthropic", # Often better for creative content
193
  },
194
- "general": {
195
- "temperature": self.TEMPERATURE_GENERAL,
196
- "default_tokens": self.DEFAULT_TOKENS_GENERAL,
197
  "model_preference": self.DEFAULT_PROVIDER,
198
  },
199
  }
200
 
201
- return context_configs.get(context, context_configs["general"])
202
 
203
  def get_model_for_provider(self, provider: str) -> str:
204
  """
 
60
  # Temperature settings for different contexts
61
  self.TEMPERATURE_EMAIL = float(os.getenv("TEMPERATURE_EMAIL", "0.6"))
62
  self.TEMPERATURE_CREATIVE = float(os.getenv("TEMPERATURE_CREATIVE", "0.8"))
63
+ self.TEMPERATURE_LINKEDIN = float(os.getenv("TEMPERATURE_LINKEDIN", "0.7"))
64
 
65
  # Default token limits for different contexts
66
  self.DEFAULT_TOKENS_EMAIL = int(os.getenv("DEFAULT_TOKENS_EMAIL", "200"))
67
  self.DEFAULT_TOKENS_CREATIVE = int(os.getenv("DEFAULT_TOKENS_CREATIVE", "250"))
68
+ self.DEFAULT_TOKENS_LINKEDIN = int(os.getenv("DEFAULT_TOKENS_LINKEDIN", "200"))
69
 
70
  # UI Configuration
71
  self.UI_THEME = os.getenv("UI_THEME", "soft")
 
135
  for temp_attr in [
136
  "TEMPERATURE_EMAIL",
137
  "TEMPERATURE_CREATIVE",
138
+ "TEMPERATURE_LINKEDIN",
139
  ]:
140
  temp_value = getattr(self, temp_attr)
141
  if not (0.0 <= temp_value <= 2.0):
 
175
  Get configuration for a specific context
176
 
177
  Args:
178
+ context: Context name (email, code, creative, linkedin)
179
 
180
  Returns:
181
  Dictionary with context-specific configuration
 
191
  "default_tokens": self.DEFAULT_TOKENS_CREATIVE,
192
  "model_preference": "anthropic", # Often better for creative content
193
  },
194
+ "linkedin": {
195
+ "temperature": self.TEMPERATURE_LINKEDIN,
196
+ "default_tokens": self.DEFAULT_TOKENS_LINKEDIN,
197
  "model_preference": self.DEFAULT_PROVIDER,
198
  },
199
  }
200
 
201
+ return context_configs.get(context, context_configs["linkedin"])
202
 
203
  def get_model_for_provider(self, provider: str) -> str:
204
  """
src/autocomplete.py CHANGED
@@ -57,17 +57,18 @@ class SmartAutoComplete:
57
  "user_template": "Continue this creative writing piece naturally with approximately {max_tokens} tokens: {text}",
58
  "temperature": 0.8,
59
  },
60
- "general": {
61
- "system_prompt": """You are a helpful writing assistant. Generate natural,
62
- contextually appropriate text completions. Focus on:
63
- - Natural language flow
64
- - Contextual relevance
65
- - Clarity and coherence
66
- - Appropriate tone
 
67
 
68
  IMPORTANT: Generate a completion that is approximately {max_tokens} tokens long.
69
  Adjust your response length accordingly - shorter for fewer tokens, longer for more tokens.""",
70
- "user_template": "Complete this text naturally with approximately {max_tokens} tokens: {text}",
71
  "temperature": 0.7,
72
  },
73
  }
@@ -84,7 +85,7 @@ class SmartAutoComplete:
84
  def get_suggestions(
85
  self,
86
  text: str,
87
- context: str = "general",
88
  max_tokens: int = 150,
89
  user_context: str = "",
90
  ) -> List[str]:
@@ -93,7 +94,7 @@ class SmartAutoComplete:
93
 
94
  Args:
95
  text: Input text to complete
96
- context: Context type (email, creative, general)
97
  max_tokens: Maximum tokens in the response
98
  user_context: Additional context provided by the user
99
 
@@ -149,7 +150,7 @@ class SmartAutoComplete:
149
  """Get suggestions from the API client"""
150
  try:
151
  context_config = self.CONTEXT_PROMPTS.get(
152
- request.context, self.CONTEXT_PROMPTS["general"]
153
  )
154
 
155
  # Format system prompt with max_tokens and user context
 
57
  "user_template": "Continue this creative writing piece naturally with approximately {max_tokens} tokens: {text}",
58
  "temperature": 0.8,
59
  },
60
+ "linkedin": {
61
+ "system_prompt": """You are a LinkedIn writing assistant specialized in professional networking content. Generate engaging,
62
+ professional LinkedIn-appropriate text completions. Focus on:
63
+ - Professional networking tone
64
+ - Industry-relevant language
65
+ - Engaging and authentic voice
66
+ - LinkedIn best practices (hashtags, mentions, professional insights)
67
+ - Career development and business communication
68
 
69
  IMPORTANT: Generate a completion that is approximately {max_tokens} tokens long.
70
  Adjust your response length accordingly - shorter for fewer tokens, longer for more tokens.""",
71
+ "user_template": "Complete this LinkedIn post/content naturally and professionally with approximately {max_tokens} tokens: {text}",
72
  "temperature": 0.7,
73
  },
74
  }
 
85
  def get_suggestions(
86
  self,
87
  text: str,
88
+ context: str = "linkedin",
89
  max_tokens: int = 150,
90
  user_context: str = "",
91
  ) -> List[str]:
 
94
 
95
  Args:
96
  text: Input text to complete
97
+ context: Context type (email, creative, linkedin)
98
  max_tokens: Maximum tokens in the response
99
  user_context: Additional context provided by the user
100
 
 
150
  """Get suggestions from the API client"""
151
  try:
152
  context_config = self.CONTEXT_PROMPTS.get(
153
+ request.context, self.CONTEXT_PROMPTS["linkedin"]
154
  )
155
 
156
  # Format system prompt with max_tokens and user context
src/utils.py CHANGED
@@ -3,282 +3,316 @@ Utility functions for Smart Auto-Complete
3
  Provides common functionality for text processing, logging, and validation
4
  """
5
 
 
6
  import logging
7
  import re
8
  import sys
9
- from typing import Dict, List, Optional, Tuple
10
- import html
11
  import unicodedata
 
12
 
13
 
14
  def setup_logging(level: str = "INFO") -> logging.Logger:
15
  """
16
  Set up logging configuration for the application
17
-
18
  Args:
19
  level: Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL)
20
-
21
  Returns:
22
  Configured logger instance
23
  """
24
  # Create logger
25
  logger = logging.getLogger("smart_autocomplete")
26
  logger.setLevel(getattr(logging, level.upper()))
27
-
28
  # Remove existing handlers to avoid duplicates
29
  for handler in logger.handlers[:]:
30
  logger.removeHandler(handler)
31
-
32
  # Create console handler with formatting
33
  console_handler = logging.StreamHandler(sys.stdout)
34
  console_handler.setLevel(getattr(logging, level.upper()))
35
-
36
  # Create formatter
37
  formatter = logging.Formatter(
38
- '%(asctime)s - %(name)s - %(levelname)s - %(message)s',
39
- datefmt='%Y-%m-%d %H:%M:%S'
40
  )
41
  console_handler.setFormatter(formatter)
42
-
43
  # Add handler to logger
44
  logger.addHandler(console_handler)
45
-
46
  return logger
47
 
48
 
49
  def sanitize_input(text: str) -> str:
50
  """
51
  Sanitize and clean input text for processing
52
-
53
  Args:
54
  text: Raw input text
55
-
56
  Returns:
57
  Cleaned and sanitized text
58
  """
59
  if not text:
60
  return ""
61
-
62
  # Convert to string if not already
63
  text = str(text)
64
-
65
  # HTML escape to prevent injection
66
  text = html.escape(text)
67
-
68
  # Normalize unicode characters
69
- text = unicodedata.normalize('NFKC', text)
70
-
71
  # Remove excessive whitespace but preserve structure
72
- text = re.sub(r'\n\s*\n\s*\n', '\n\n', text) # Max 2 consecutive newlines
73
- text = re.sub(r'[ \t]+', ' ', text) # Multiple spaces/tabs to single space
74
-
75
  # Remove control characters except newlines and tabs
76
- text = ''.join(char for char in text if ord(char) >= 32 or char in '\n\t')
77
-
78
  # Trim leading/trailing whitespace
79
  text = text.strip()
80
-
81
  return text
82
 
83
 
84
  def extract_context_hints(text: str) -> Dict[str, any]:
85
  """
86
  Extract contextual hints from the input text to improve suggestions
87
-
88
  Args:
89
  text: Input text to analyze
90
-
91
  Returns:
92
  Dictionary containing context hints
93
  """
94
  hints = {
95
- 'length': len(text),
96
- 'word_count': len(text.split()),
97
- 'has_greeting': False,
98
- 'has_signature': False,
99
- 'has_code_markers': False,
100
- 'has_questions': False,
101
- 'tone': 'neutral',
102
- 'language_style': 'general'
103
  }
104
-
105
  text_lower = text.lower()
106
-
107
  # Check for email patterns
108
- email_greetings = ['dear', 'hello', 'hi', 'greetings', 'good morning', 'good afternoon']
109
- email_signatures = ['sincerely', 'best regards', 'thank you', 'yours truly', 'kind regards']
110
-
111
- hints['has_greeting'] = any(greeting in text_lower for greeting in email_greetings)
112
- hints['has_signature'] = any(signature in text_lower for signature in email_signatures)
113
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
  # Check for code patterns
115
- code_markers = ['//', '/*', '*/', '#', 'def ', 'function', 'class ', 'import ', 'from ']
116
- hints['has_code_markers'] = any(marker in text_lower for marker in code_markers)
117
-
 
 
 
 
 
 
 
 
 
 
118
  # Check for questions
119
- hints['has_questions'] = '?' in text or any(q in text_lower for q in ['what', 'how', 'why', 'when', 'where', 'who'])
120
-
 
 
121
  # Determine tone
122
- formal_words = ['please', 'kindly', 'respectfully', 'sincerely', 'professional']
123
- casual_words = ['hey', 'yeah', 'cool', 'awesome', 'thanks']
124
-
125
  formal_count = sum(1 for word in formal_words if word in text_lower)
126
  casual_count = sum(1 for word in casual_words if word in text_lower)
127
-
128
  if formal_count > casual_count:
129
- hints['tone'] = 'formal'
130
  elif casual_count > formal_count:
131
- hints['tone'] = 'casual'
132
-
133
  # Determine language style
134
- if hints['has_code_markers']:
135
- hints['language_style'] = 'technical'
136
- elif hints['has_greeting'] or hints['has_signature']:
137
- hints['language_style'] = 'business'
138
- elif any(creative in text_lower for creative in ['once upon', 'story', 'character', 'plot']):
139
- hints['language_style'] = 'creative'
140
-
 
 
 
141
  return hints
142
 
143
 
144
  def validate_api_key(api_key: str, provider: str) -> bool:
145
  """
146
  Validate API key format for different providers
147
-
148
  Args:
149
  api_key: The API key to validate
150
  provider: The provider name (openai, anthropic)
151
-
152
  Returns:
153
  True if the key format is valid, False otherwise
154
  """
155
  if not api_key or not isinstance(api_key, str):
156
  return False
157
-
158
  api_key = api_key.strip()
159
-
160
- if provider.lower() == 'openai':
161
  # OpenAI keys start with 'sk-' and are typically 51 characters
162
- return api_key.startswith('sk-') and len(api_key) >= 40
163
- elif provider.lower() == 'anthropic':
164
- # Anthropic keys start with 'sk-ant-'
165
- return api_key.startswith('sk-ant-') and len(api_key) >= 40
166
-
167
  return False
168
 
169
 
170
  def truncate_text(text: str, max_length: int, preserve_words: bool = True) -> str:
171
  """
172
  Truncate text to a maximum length while optionally preserving word boundaries
173
-
174
  Args:
175
  text: Text to truncate
176
  max_length: Maximum allowed length
177
  preserve_words: Whether to preserve word boundaries
178
-
179
  Returns:
180
  Truncated text
181
  """
182
  if len(text) <= max_length:
183
  return text
184
-
185
  if not preserve_words:
186
  return text[:max_length].rstrip() + "..."
187
-
188
  # Find the last space before the max_length
189
  truncated = text[:max_length]
190
- last_space = truncated.rfind(' ')
191
-
192
  if last_space > max_length * 0.8: # Only use word boundary if it's not too far back
193
  return text[:last_space].rstrip() + "..."
194
  else:
195
  return text[:max_length].rstrip() + "..."
196
 
197
 
198
- def format_suggestions_for_display(suggestions: List[str], max_display_length: int = 100) -> List[Dict[str, str]]:
 
 
199
  """
200
  Format suggestions for display in the UI
201
-
202
  Args:
203
  suggestions: List of suggestion strings
204
  max_display_length: Maximum length for display
205
-
206
  Returns:
207
  List of formatted suggestion dictionaries
208
  """
209
  formatted = []
210
-
211
  for i, suggestion in enumerate(suggestions, 1):
212
  # Clean the suggestion
213
  clean_suggestion = sanitize_input(suggestion)
214
-
215
  # Create display version (truncated if needed)
216
  display_text = truncate_text(clean_suggestion, max_display_length)
217
-
218
- formatted.append({
219
- 'id': i,
220
- 'text': clean_suggestion,
221
- 'display_text': display_text,
222
- 'length': len(clean_suggestion),
223
- 'word_count': len(clean_suggestion.split())
224
- })
225
-
 
 
226
  return formatted
227
 
228
 
229
  def calculate_text_similarity(text1: str, text2: str) -> float:
230
  """
231
  Calculate similarity between two texts using simple word overlap
232
-
233
  Args:
234
  text1: First text
235
  text2: Second text
236
-
237
  Returns:
238
  Similarity score between 0 and 1
239
  """
240
  if not text1 or not text2:
241
  return 0.0
242
-
243
  # Convert to lowercase and split into words
244
  words1 = set(text1.lower().split())
245
  words2 = set(text2.lower().split())
246
-
247
  # Calculate Jaccard similarity
248
  intersection = len(words1.intersection(words2))
249
  union = len(words1.union(words2))
250
-
251
  return intersection / union if union > 0 else 0.0
252
 
253
 
254
  def get_text_stats(text: str) -> Dict[str, int]:
255
  """
256
  Get basic statistics about the text
257
-
258
  Args:
259
  text: Text to analyze
260
-
261
  Returns:
262
  Dictionary with text statistics
263
  """
264
  if not text:
265
- return {'characters': 0, 'words': 0, 'sentences': 0, 'paragraphs': 0}
266
-
267
  # Count characters (excluding whitespace)
268
- char_count = len(text.replace(' ', '').replace('\n', '').replace('\t', ''))
269
-
270
  # Count words
271
  word_count = len(text.split())
272
-
273
  # Count sentences (rough estimate)
274
- sentence_count = len(re.findall(r'[.!?]+', text))
275
-
276
  # Count paragraphs
277
- paragraph_count = len([p for p in text.split('\n\n') if p.strip()])
278
-
279
  return {
280
- 'characters': char_count,
281
- 'words': word_count,
282
- 'sentences': max(1, sentence_count), # At least 1 sentence
283
- 'paragraphs': max(1, paragraph_count) # At least 1 paragraph
284
  }
 
3
  Provides common functionality for text processing, logging, and validation
4
  """
5
 
6
+ import html
7
  import logging
8
  import re
9
  import sys
 
 
10
  import unicodedata
11
+ from typing import Dict, List, Optional, Tuple
12
 
13
 
14
  def setup_logging(level: str = "INFO") -> logging.Logger:
15
  """
16
  Set up logging configuration for the application
17
+
18
  Args:
19
  level: Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL)
20
+
21
  Returns:
22
  Configured logger instance
23
  """
24
  # Create logger
25
  logger = logging.getLogger("smart_autocomplete")
26
  logger.setLevel(getattr(logging, level.upper()))
27
+
28
  # Remove existing handlers to avoid duplicates
29
  for handler in logger.handlers[:]:
30
  logger.removeHandler(handler)
31
+
32
  # Create console handler with formatting
33
  console_handler = logging.StreamHandler(sys.stdout)
34
  console_handler.setLevel(getattr(logging, level.upper()))
35
+
36
  # Create formatter
37
  formatter = logging.Formatter(
38
+ "%(asctime)s - %(name)s - %(levelname)s - %(message)s",
39
+ datefmt="%Y-%m-%d %H:%M:%S",
40
  )
41
  console_handler.setFormatter(formatter)
42
+
43
  # Add handler to logger
44
  logger.addHandler(console_handler)
45
+
46
  return logger
47
 
48
 
49
  def sanitize_input(text: str) -> str:
50
  """
51
  Sanitize and clean input text for processing
52
+
53
  Args:
54
  text: Raw input text
55
+
56
  Returns:
57
  Cleaned and sanitized text
58
  """
59
  if not text:
60
  return ""
61
+
62
  # Convert to string if not already
63
  text = str(text)
64
+
65
  # HTML escape to prevent injection
66
  text = html.escape(text)
67
+
68
  # Normalize unicode characters
69
+ text = unicodedata.normalize("NFKC", text)
70
+
71
  # Remove excessive whitespace but preserve structure
72
+ text = re.sub(r"\n\s*\n\s*\n", "\n\n", text) # Max 2 consecutive newlines
73
+ text = re.sub(r"[ \t]+", " ", text) # Multiple spaces/tabs to single space
74
+
75
  # Remove control characters except newlines and tabs
76
+ text = "".join(char for char in text if ord(char) >= 32 or char in "\n\t")
77
+
78
  # Trim leading/trailing whitespace
79
  text = text.strip()
80
+
81
  return text
82
 
83
 
84
  def extract_context_hints(text: str) -> Dict[str, any]:
85
  """
86
  Extract contextual hints from the input text to improve suggestions
87
+
88
  Args:
89
  text: Input text to analyze
90
+
91
  Returns:
92
  Dictionary containing context hints
93
  """
94
  hints = {
95
+ "length": len(text),
96
+ "word_count": len(text.split()),
97
+ "has_greeting": False,
98
+ "has_signature": False,
99
+ "has_code_markers": False,
100
+ "has_questions": False,
101
+ "tone": "neutral",
102
+ "language_style": "linkedin",
103
  }
104
+
105
  text_lower = text.lower()
106
+
107
  # Check for email patterns
108
+ email_greetings = [
109
+ "dear",
110
+ "hello",
111
+ "hi",
112
+ "greetings",
113
+ "good morning",
114
+ "good afternoon",
115
+ ]
116
+ email_signatures = [
117
+ "sincerely",
118
+ "best regards",
119
+ "thank you",
120
+ "yours truly",
121
+ "kind regards",
122
+ ]
123
+
124
+ hints["has_greeting"] = any(greeting in text_lower for greeting in email_greetings)
125
+ hints["has_signature"] = any(
126
+ signature in text_lower for signature in email_signatures
127
+ )
128
+
129
  # Check for code patterns
130
+ code_markers = [
131
+ "//",
132
+ "/*",
133
+ "*/",
134
+ "#",
135
+ "def ",
136
+ "function",
137
+ "class ",
138
+ "import ",
139
+ "from ",
140
+ ]
141
+ hints["has_code_markers"] = any(marker in text_lower for marker in code_markers)
142
+
143
  # Check for questions
144
+ hints["has_questions"] = "?" in text or any(
145
+ q in text_lower for q in ["what", "how", "why", "when", "where", "who"]
146
+ )
147
+
148
  # Determine tone
149
+ formal_words = ["please", "kindly", "respectfully", "sincerely", "professional"]
150
+ casual_words = ["hey", "yeah", "cool", "awesome", "thanks"]
151
+
152
  formal_count = sum(1 for word in formal_words if word in text_lower)
153
  casual_count = sum(1 for word in casual_words if word in text_lower)
154
+
155
  if formal_count > casual_count:
156
+ hints["tone"] = "formal"
157
  elif casual_count > formal_count:
158
+ hints["tone"] = "casual"
159
+
160
  # Determine language style
161
+ if hints["has_code_markers"]:
162
+ hints["language_style"] = "technical"
163
+ elif hints["has_greeting"] or hints["has_signature"]:
164
+ hints["language_style"] = "business"
165
+ elif any(
166
+ creative in text_lower
167
+ for creative in ["once upon", "story", "character", "plot"]
168
+ ):
169
+ hints["language_style"] = "creative"
170
+
171
  return hints
172
 
173
 
174
  def validate_api_key(api_key: str, provider: str) -> bool:
175
  """
176
  Validate API key format for different providers
177
+
178
  Args:
179
  api_key: The API key to validate
180
  provider: The provider name (openai, anthropic)
181
+
182
  Returns:
183
  True if the key format is valid, False otherwise
184
  """
185
  if not api_key or not isinstance(api_key, str):
186
  return False
187
+
188
  api_key = api_key.strip()
189
+
190
+ if provider.lower() == "openai":
191
  # OpenAI keys start with 'sk-' and are typically 51 characters
192
+ return api_key.startswith("sk-") and len(api_key) >= 40
193
+ elif provider.lower() == "anthropic":
194
+ # Anthropic keys start with 'sk-ant-'
195
+ return api_key.startswith("sk-ant-") and len(api_key) >= 40
196
+
197
  return False
198
 
199
 
200
  def truncate_text(text: str, max_length: int, preserve_words: bool = True) -> str:
201
  """
202
  Truncate text to a maximum length while optionally preserving word boundaries
203
+
204
  Args:
205
  text: Text to truncate
206
  max_length: Maximum allowed length
207
  preserve_words: Whether to preserve word boundaries
208
+
209
  Returns:
210
  Truncated text
211
  """
212
  if len(text) <= max_length:
213
  return text
214
+
215
  if not preserve_words:
216
  return text[:max_length].rstrip() + "..."
217
+
218
  # Find the last space before the max_length
219
  truncated = text[:max_length]
220
+ last_space = truncated.rfind(" ")
221
+
222
  if last_space > max_length * 0.8: # Only use word boundary if it's not too far back
223
  return text[:last_space].rstrip() + "..."
224
  else:
225
  return text[:max_length].rstrip() + "..."
226
 
227
 
228
+ def format_suggestions_for_display(
229
+ suggestions: List[str], max_display_length: int = 100
230
+ ) -> List[Dict[str, str]]:
231
  """
232
  Format suggestions for display in the UI
233
+
234
  Args:
235
  suggestions: List of suggestion strings
236
  max_display_length: Maximum length for display
237
+
238
  Returns:
239
  List of formatted suggestion dictionaries
240
  """
241
  formatted = []
242
+
243
  for i, suggestion in enumerate(suggestions, 1):
244
  # Clean the suggestion
245
  clean_suggestion = sanitize_input(suggestion)
246
+
247
  # Create display version (truncated if needed)
248
  display_text = truncate_text(clean_suggestion, max_display_length)
249
+
250
+ formatted.append(
251
+ {
252
+ "id": i,
253
+ "text": clean_suggestion,
254
+ "display_text": display_text,
255
+ "length": len(clean_suggestion),
256
+ "word_count": len(clean_suggestion.split()),
257
+ }
258
+ )
259
+
260
  return formatted
261
 
262
 
263
  def calculate_text_similarity(text1: str, text2: str) -> float:
264
  """
265
  Calculate similarity between two texts using simple word overlap
266
+
267
  Args:
268
  text1: First text
269
  text2: Second text
270
+
271
  Returns:
272
  Similarity score between 0 and 1
273
  """
274
  if not text1 or not text2:
275
  return 0.0
276
+
277
  # Convert to lowercase and split into words
278
  words1 = set(text1.lower().split())
279
  words2 = set(text2.lower().split())
280
+
281
  # Calculate Jaccard similarity
282
  intersection = len(words1.intersection(words2))
283
  union = len(words1.union(words2))
284
+
285
  return intersection / union if union > 0 else 0.0
286
 
287
 
288
  def get_text_stats(text: str) -> Dict[str, int]:
289
  """
290
  Get basic statistics about the text
291
+
292
  Args:
293
  text: Text to analyze
294
+
295
  Returns:
296
  Dictionary with text statistics
297
  """
298
  if not text:
299
+ return {"characters": 0, "words": 0, "sentences": 0, "paragraphs": 0}
300
+
301
  # Count characters (excluding whitespace)
302
+ char_count = len(text.replace(" ", "").replace("\n", "").replace("\t", ""))
303
+
304
  # Count words
305
  word_count = len(text.split())
306
+
307
  # Count sentences (rough estimate)
308
+ sentence_count = len(re.findall(r"[.!?]+", text))
309
+
310
  # Count paragraphs
311
+ paragraph_count = len([p for p in text.split("\n\n") if p.strip()])
312
+
313
  return {
314
+ "characters": char_count,
315
+ "words": word_count,
316
+ "sentences": max(1, sentence_count), # At least 1 sentence
317
+ "paragraphs": max(1, paragraph_count), # At least 1 paragraph
318
  }