lu-ny commited on
Commit
4076058
·
1 Parent(s): 73e58e0

Update app.py

Browse files

updated space; port from colab

Files changed (1) hide show
  1. app.py +33 -128
app.py CHANGED
@@ -16,6 +16,9 @@ book_genres = ["Adventure", "Romance","Mystery", "Science Fiction","Fantasy","Th
16
  book_themes = ["Love and Relationships","Friendship","Family","Coming of Age","Identity and Self-discovery","Adventure and Exploration","Mystery and Intrigue","Science and Technology","Fantasy Worlds","Historical Events","War and Conflict","Survival","Good vs. Evil","Justice and Morality","Revenge","Betrayal","Hope and Resilience","Isolation and Loneliness","Social Justice","Environmental Conservation","Political Corruption","Human Rights","Dystopia","Utopia","Alien Encounters","Time Travel","Art and Creativity","Death and Mortality","Cultural Identity","Personal Growth","Addiction","Education and Knowledge","Freedom and Liberation","Equality and Inequality","Society and Class","Legacy and Inheritance","Religion and Spirituality","Grief and Loss","Ambition","Transformation","Humor and Satire","Survival of the Fittest","Dreams and Aspirations","Change and Adaptation","Forgiveness","Nature and the Environment","Exploration of the Unknown","Conflict Resolution","Fate and Destiny","Artificial Intelligence","Cybersecurity","Space Exploration","Parallel Universes","Economic Struggles","Social Media and Technology","Innovation and Invention","Psychological Thrills","Philosophical Contemplation","Ancient Mythology","Modern Mythology","Epic Journeys","The Power of Imagination","Unrequited Love","Secrets and Hidden Truths","Warriors and Heroes","Surviving Adversity","Dreams and Nightmares","Rivalry and Competition","Alien Worlds","Conspiracy","Apocalyptic Scenarios","Conformity vs. Individuality","Legacy and Heritage","Nature vs. Nurture","Moral Dilemmas","Adventure and Discovery","Journey of Self-Discovery","Unlikely Friendships","Struggle for Power","Exploration of Fear","The Supernatural","Cultural Clashes","Identity Crisis","The Quest for Knowledge","The Human Condition","Hidden Agendas","Escapism","The Pursuit of Happiness","Redemption","Rebellion","Feminism and Gender Issues","Exploration of Dreams","Innocence vs. Experience","Chaos and Order","Exploration of Evil"]
17
  writing_tones = ["Formal","Informal","Humorous","Serious","Sarcastic","Satirical","Melancholic","Optimistic","Pessimistic","Cynical","Hopeful","Lighthearted","Dark","Gothic","Whimsical","Mysterious","Eerie","Solemn","Playful","Thoughtful","Reflective","Ironic","Sensual","Nostalgic","Surreal","Dreamy","Awe-Inspiring","Introspective","Confessional","Dramatic","Exuberant","Melodramatic","Hypnotic","Inspirational","Tongue-in-Cheek","Witty","Calm","Passionate","Detached","Frightening","Intense","Calm","Suspenseful","Brave","Desperate","Eloquent","Vivid","Casual","Whispering","Eloquent","Bitter","Tragic","Pensive","Frenzied","Melodious","Resolute","Soothing","Brisk","Lyrical","Objective","Factual","Contemplative","Sardonic","Sympathetic","Objective","Sincere","Wistful","Stoic","Empathetic","Matter-of-fact","Sentimental","Sharp","Understated","Exaggerated","Casual","Bombastic","Poetic","Charming","Apologetic","Defensive","Confrontational","Inquisitive","Candid","Reverent","Matter-of-fact","Amusing","Enthusiastic","Questioning","Reproachful","Hopeless","Despondent","Wry","Sulking","Serene","Detached","Confident","Steadfast","Foolish","Impassioned","Indignant","Self-Deprecating","Wandering","Inspiring","Bewildered"]
18
 
 
 
 
19
  ######################################
20
  ########## Embeddings Class ##########
21
  ######################################
@@ -23,7 +26,7 @@ class EmbeddingGenerator:
23
  def __init__(self, model_id):
24
  self.tokenizer = AutoTokenizer.from_pretrained(model_id)
25
  self.model = AutoModel.from_pretrained(model_id)
26
-
27
  def calculate_cosine_similarity(self, embedding1, embedding2):
28
  return cosine_similarity(embedding1, embedding2)[0][0]
29
 
@@ -67,56 +70,56 @@ class EmbeddingGenerator:
67
  selected_string = ', '.join(selected_values)
68
  return selected_string
69
 
70
- # testing different embeddings models that can fit in colab,
71
- # need something smallish but also one that can create good semantic word embeddings for cosine similarity to work well
72
- #model_id = "sentence-transformers/all-MiniLM-L6-v2"
73
- #model_id = "BAAI/bge-small-en-v1.5"
74
- # idk if this will work with CPUs, will either be too slow or too big
75
- model_id = 'roberta-base'
76
-
77
  #instantiate our class
78
  embedding_generator = EmbeddingGenerator(model_id)
79
-
80
- #generate embeddings
81
  genre_embeddings = embedding_generator.get_embeddings(book_genres)
82
  theme_embeddings = embedding_generator.get_embeddings(book_themes)
83
  tone_embeddings = embedding_generator.get_embeddings(writing_tones)
84
 
85
  # Clear memory
86
  del embedding_generator
87
- # torch.cuda.empty_cache()
88
 
 
 
 
89
  # helper function to format the prompt appropriately.
90
  # For this creative writing tool, the user doesn't design the prompt itself
91
  #but rather genres, tones, & themes of a book to include
92
  def format_prompt(genres, tones, themes):
93
- #reinstantiate our embeddings class so we can compare the embeddings
94
- embedding_generator = EmbeddingGenerator("roberta-base")
95
- # pick 2-5 random ones if user leaves the field blank
96
- # lower threshold is to avoid selecting synonyms while upper threshold is to avoid antonyms
97
  if not genres:
98
- genres = embedding_generator.select_values_with_medium_similarity(genre_embeddings, random.randint(3, 5), 0.01, 0.7) # Adjust thresholds as needed
99
- if not tones:
100
- tones = embedding_generator.select_values_with_medium_similarity(tone_embeddings, random.randint(3, 5), 0.01, 0.7) # Adjust thresholds as needed
101
  if not themes:
102
- themes = embedding_generator.select_values_with_medium_similarity(theme_embeddings, random.randint(3, 5), 0.01, 0.7) # Adjust thresholds as needed
103
 
104
  # we won't need our embeddings generator after this step
105
  del embedding_generator
106
-
107
- #Alpaca format since we can't use mixtral on free CPU settings
108
- prompt = "Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n"
 
109
  #prompt we are using for now
110
  user_prompt = f"Write a novel title and a detailed, creative summary/synopsis for a book that falls under the following \n genres: {genres}, \n themes: {themes}, \n and tones: {tones}."
111
  # create our prompt according to Mistral's guidelines
112
- prompt += f"\n### Instruction:\n{user_prompt}\n"
113
  prompt += f"\n### Response:\n"
 
114
  return prompt
115
 
 
 
 
116
  def generate_novel_title_and_summary(genres, tones, themes, temperature=1.5, max_length=512, context_length=1024):
117
- # Get our API key and define our inference api endpoint model
118
- API_KEY = os.environ["API_KEY"]
119
- API_ENDPOINT = 'https://huggingface.co/HuggingFaceH4/zephyr-7b-beta'
120
  prompt = format_prompt(genres, tones, themes)
121
 
122
  # Generate a new random seed for each request
@@ -134,6 +137,7 @@ def generate_novel_title_and_summary(genres, tones, themes, temperature=1.5, max
134
  "use_cache": False
135
  }
136
  }
 
137
  headers = {"Authorization": f"Bearer {API_KEY}"}
138
 
139
  try:
@@ -151,6 +155,9 @@ def generate_novel_title_and_summary(genres, tones, themes, temperature=1.5, max
151
  except Exception as e:
152
  return f"Error: {str(e)} - An error occurred while generating text."
153
 
 
 
 
154
  def launch_interface():
155
  iface = gr.Interface(
156
  fn=generate_novel_title_and_summary,
@@ -169,107 +176,5 @@ def launch_interface():
169
 
170
  iface.launch(debug=True)
171
 
172
-
173
-
174
-
175
- ''' #setting this aside for now, going back to original deployment bc I know it works and I dont wanna play around with gradio rn
176
-
177
- # we could try something larger, I need to check the models
178
- # using zephyr for now because its pretty quick
179
- client = InferenceClient(
180
- 'HuggingFaceH4/zephyr-7b-beta'
181
- # "v1olet/v1olet_marcoroni-go-bruins-merge-7B"
182
- )
183
-
184
- # main function
185
- def generate(genres, themes, tones, system_prompt, temperature=1.25, max_new_tokens=512, top_p=0.95, repetition_penalty=1.15,):
186
- # check the temperature value, should not be too low, and make sure the values are floats
187
- temperature = float(temperature)
188
- if temperature < 1e-2:
189
- temperature = 1e-2
190
- top_p = float(top_p)
191
-
192
- generate_kwargs = dict(
193
- temperature=temperature,
194
- max_new_tokens=max_new_tokens,
195
- top_p=top_p,
196
- repetition_penalty=repetition_penalty,
197
- do_sample=True,
198
- # lets choose a random seed
199
- seed=random.randint(1, 1000000)
200
- )
201
-
202
- formatted_prompt = format_prompt(f"{system_prompt}, '' ", genres, tones, themes)
203
- stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False, load_in_4bit=True, use_flash_attention_2=True)
204
- output = ""
205
-
206
- for response in stream:
207
- output += response.token.text
208
- yield output
209
- return output
210
-
211
- additional_inputs=[
212
- gr.Textbox(
213
- label="System Prompt",
214
- max_lines=1,
215
- interactive=True,
216
- ),
217
- gr.Slider(
218
- label="Temperature",
219
- value=0.9,
220
- minimum=0.0,
221
- maximum=1.0,
222
- step=0.05,
223
- interactive=True,
224
- info="Higher values produce more diverse outputs but can cause loss of coherence",
225
- ),
226
- gr.Slider(
227
- label="Max new tokens",
228
- value=256,
229
- minimum=0,
230
- maximum=1048,
231
- step=64,
232
- interactive=True,
233
- info="The maximum numbers of new tokens (approx. wordcount) generated",
234
- ),
235
- gr.Slider(
236
- label="Top-p (nucleus sampling)",
237
- value=0.90,
238
- minimum=0.0,
239
- maximum=1,
240
- step=0.05,
241
- interactive=True,
242
- info="Higher values sample more low-probability tokens",
243
- ),
244
- gr.Slider(
245
- label="Repetition penalty",
246
- value=1.2,
247
- minimum=1.0,
248
- maximum=2.0,
249
- step=0.05,
250
- interactive=True,
251
- info="Penalize repeated tokens, encourages creativity",
252
- )
253
- ]
254
-
255
- def launch_interface():
256
- iface = gr.Interface(
257
- fn=generate,
258
- inputs=[
259
- gr.Textbox("", label="Book Genres (comma-separated, or leave blank!)"),
260
- gr.Textbox("", label="Book Themes (comma-separated, or leave blank!)"),
261
- gr.Textbox("", label="Writing Tone (comma-separated, or leave blank!)"),
262
- ],
263
- #additional_inputs=additional_inputs,
264
- outputs="text",
265
- live=False,
266
- title="Novel Title and Summary Generator",
267
- description='A fun creative writing tool, designed for when I have writer\'s block. Use it to practice building worlds, characters, scenes, etc. Write chapter 1, or a plot outline.' ,
268
- theme='ParityError/Interstellar')
269
- #,additional_inputs=additional_inputs)
270
-
271
- iface.queue().launch(debug=True)
272
-
273
- '''
274
  if __name__=="__main__":
275
  launch_interface()
 
16
  book_themes = ["Love and Relationships","Friendship","Family","Coming of Age","Identity and Self-discovery","Adventure and Exploration","Mystery and Intrigue","Science and Technology","Fantasy Worlds","Historical Events","War and Conflict","Survival","Good vs. Evil","Justice and Morality","Revenge","Betrayal","Hope and Resilience","Isolation and Loneliness","Social Justice","Environmental Conservation","Political Corruption","Human Rights","Dystopia","Utopia","Alien Encounters","Time Travel","Art and Creativity","Death and Mortality","Cultural Identity","Personal Growth","Addiction","Education and Knowledge","Freedom and Liberation","Equality and Inequality","Society and Class","Legacy and Inheritance","Religion and Spirituality","Grief and Loss","Ambition","Transformation","Humor and Satire","Survival of the Fittest","Dreams and Aspirations","Change and Adaptation","Forgiveness","Nature and the Environment","Exploration of the Unknown","Conflict Resolution","Fate and Destiny","Artificial Intelligence","Cybersecurity","Space Exploration","Parallel Universes","Economic Struggles","Social Media and Technology","Innovation and Invention","Psychological Thrills","Philosophical Contemplation","Ancient Mythology","Modern Mythology","Epic Journeys","The Power of Imagination","Unrequited Love","Secrets and Hidden Truths","Warriors and Heroes","Surviving Adversity","Dreams and Nightmares","Rivalry and Competition","Alien Worlds","Conspiracy","Apocalyptic Scenarios","Conformity vs. Individuality","Legacy and Heritage","Nature vs. Nurture","Moral Dilemmas","Adventure and Discovery","Journey of Self-Discovery","Unlikely Friendships","Struggle for Power","Exploration of Fear","The Supernatural","Cultural Clashes","Identity Crisis","The Quest for Knowledge","The Human Condition","Hidden Agendas","Escapism","The Pursuit of Happiness","Redemption","Rebellion","Feminism and Gender Issues","Exploration of Dreams","Innocence vs. Experience","Chaos and Order","Exploration of Evil"]
17
  writing_tones = ["Formal","Informal","Humorous","Serious","Sarcastic","Satirical","Melancholic","Optimistic","Pessimistic","Cynical","Hopeful","Lighthearted","Dark","Gothic","Whimsical","Mysterious","Eerie","Solemn","Playful","Thoughtful","Reflective","Ironic","Sensual","Nostalgic","Surreal","Dreamy","Awe-Inspiring","Introspective","Confessional","Dramatic","Exuberant","Melodramatic","Hypnotic","Inspirational","Tongue-in-Cheek","Witty","Calm","Passionate","Detached","Frightening","Intense","Calm","Suspenseful","Brave","Desperate","Eloquent","Vivid","Casual","Whispering","Eloquent","Bitter","Tragic","Pensive","Frenzied","Melodious","Resolute","Soothing","Brisk","Lyrical","Objective","Factual","Contemplative","Sardonic","Sympathetic","Objective","Sincere","Wistful","Stoic","Empathetic","Matter-of-fact","Sentimental","Sharp","Understated","Exaggerated","Casual","Bombastic","Poetic","Charming","Apologetic","Defensive","Confrontational","Inquisitive","Candid","Reverent","Matter-of-fact","Amusing","Enthusiastic","Questioning","Reproachful","Hopeless","Despondent","Wry","Sulking","Serene","Detached","Confident","Steadfast","Foolish","Impassioned","Indignant","Self-Deprecating","Wandering","Inspiring","Bewildered"]
18
 
19
+ API_ENDPOINT = 'https://api-inference.huggingface.co/HuggingFaceH4/zephyr-7b-beta'
20
+ API_KEY = os.getenv('API_KEY')
21
+
22
  ######################################
23
  ########## Embeddings Class ##########
24
  ######################################
 
26
  def __init__(self, model_id):
27
  self.tokenizer = AutoTokenizer.from_pretrained(model_id)
28
  self.model = AutoModel.from_pretrained(model_id)
29
+
30
  def calculate_cosine_similarity(self, embedding1, embedding2):
31
  return cosine_similarity(embedding1, embedding2)[0][0]
32
 
 
70
  selected_string = ', '.join(selected_values)
71
  return selected_string
72
 
73
+ ###########################################
74
+ ########## CREATE OUR EMBEDDINGS ##########
75
+ ###########################################
76
+ model_id = 'bert-base-uncased'
 
 
 
77
  #instantiate our class
78
  embedding_generator = EmbeddingGenerator(model_id)
 
 
79
  genre_embeddings = embedding_generator.get_embeddings(book_genres)
80
  theme_embeddings = embedding_generator.get_embeddings(book_themes)
81
  tone_embeddings = embedding_generator.get_embeddings(writing_tones)
82
 
83
  # Clear memory
84
  del embedding_generator
85
+ #torch.cuda.empty_cache()
86
 
87
+ ###########################################
88
+ ############ PROMPT FORMATTING ############
89
+ ###########################################
90
  # helper function to format the prompt appropriately.
91
  # For this creative writing tool, the user doesn't design the prompt itself
92
  #but rather genres, tones, & themes of a book to include
93
  def format_prompt(genres, tones, themes):
94
+ #reinstantiate our embeddings class so we can compare the embeddings
95
+ embedding_generator = EmbeddingGenerator("bert-base-uncased")
96
+ # pick random ones if user leaves it blank but make sure they aren't opposites
 
97
  if not genres:
98
+ genres = embedding_generator.select_values_with_medium_similarity(genre_embeddings, random.randint(3, 5), 0.6, 0.75) # Adjust thresholds as needed
99
+ if not tones:
100
+ tones = embedding_generator.select_values_with_medium_similarity(tone_embeddings, random.randint(3, 5), 0.9, 0.96) # Adjust thresholds as needed
101
  if not themes:
102
+ themes = embedding_generator.select_values_with_medium_similarity(theme_embeddings, random.randint(3, 5), 0.45, 0.8) # Adjust thresholds as needed
103
 
104
  # we won't need our embeddings generator after this step
105
  del embedding_generator
106
+ torch.cuda.empty_cache()
107
+
108
+ #Alpaca format
109
+ #prompt = "Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n"
110
  #prompt we are using for now
111
  user_prompt = f"Write a novel title and a detailed, creative summary/synopsis for a book that falls under the following \n genres: {genres}, \n themes: {themes}, \n and tones: {tones}."
112
  # create our prompt according to Mistral's guidelines
113
+ prompt = f"\n### Instruction:\n{user_prompt}\n"
114
  prompt += f"\n### Response:\n"
115
+
116
  return prompt
117
 
118
+ ###########################################
119
+ ############## MAIN FUNCTION ##############
120
+ ###########################################
121
  def generate_novel_title_and_summary(genres, tones, themes, temperature=1.5, max_length=512, context_length=1024):
122
+
 
 
123
  prompt = format_prompt(genres, tones, themes)
124
 
125
  # Generate a new random seed for each request
 
137
  "use_cache": False
138
  }
139
  }
140
+ # You can get an Inference API for FREE
141
  headers = {"Authorization": f"Bearer {API_KEY}"}
142
 
143
  try:
 
155
  except Exception as e:
156
  return f"Error: {str(e)} - An error occurred while generating text."
157
 
158
+ #############################################
159
+ ########## GRADIO INTERFACE LAUNCH ##########
160
+ #############################################
161
  def launch_interface():
162
  iface = gr.Interface(
163
  fn=generate_novel_title_and_summary,
 
176
 
177
  iface.launch(debug=True)
178
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
179
  if __name__=="__main__":
180
  launch_interface()