Spaces:

lu-ny
/

Novel_Title_Summary_Gen

Sleeping

App Files Files Community

lu-ny commited on Dec 15, 2023

Commit

4076058

1 Parent(s): 73e58e0

Update app.py

Browse files

updated space; port from colab

Files changed (1) hide show

app.py +33 -128

app.py CHANGED Viewed

@@ -16,6 +16,9 @@ book_genres = ["Adventure", "Romance","Mystery", "Science Fiction","Fantasy","Th
 book_themes = ["Love and Relationships","Friendship","Family","Coming of Age","Identity and Self-discovery","Adventure and Exploration","Mystery and Intrigue","Science and Technology","Fantasy Worlds","Historical Events","War and Conflict","Survival","Good vs. Evil","Justice and Morality","Revenge","Betrayal","Hope and Resilience","Isolation and Loneliness","Social Justice","Environmental Conservation","Political Corruption","Human Rights","Dystopia","Utopia","Alien Encounters","Time Travel","Art and Creativity","Death and Mortality","Cultural Identity","Personal Growth","Addiction","Education and Knowledge","Freedom and Liberation","Equality and Inequality","Society and Class","Legacy and Inheritance","Religion and Spirituality","Grief and Loss","Ambition","Transformation","Humor and Satire","Survival of the Fittest","Dreams and Aspirations","Change and Adaptation","Forgiveness","Nature and the Environment","Exploration of the Unknown","Conflict Resolution","Fate and Destiny","Artificial Intelligence","Cybersecurity","Space Exploration","Parallel Universes","Economic Struggles","Social Media and Technology","Innovation and Invention","Psychological Thrills","Philosophical Contemplation","Ancient Mythology","Modern Mythology","Epic Journeys","The Power of Imagination","Unrequited Love","Secrets and Hidden Truths","Warriors and Heroes","Surviving Adversity","Dreams and Nightmares","Rivalry and Competition","Alien Worlds","Conspiracy","Apocalyptic Scenarios","Conformity vs. Individuality","Legacy and Heritage","Nature vs. Nurture","Moral Dilemmas","Adventure and Discovery","Journey of Self-Discovery","Unlikely Friendships","Struggle for Power","Exploration of Fear","The Supernatural","Cultural Clashes","Identity Crisis","The Quest for Knowledge","The Human Condition","Hidden Agendas","Escapism","The Pursuit of Happiness","Redemption","Rebellion","Feminism and Gender Issues","Exploration of Dreams","Innocence vs. Experience","Chaos and Order","Exploration of Evil"]
 writing_tones = ["Formal","Informal","Humorous","Serious","Sarcastic","Satirical","Melancholic","Optimistic","Pessimistic","Cynical","Hopeful","Lighthearted","Dark","Gothic","Whimsical","Mysterious","Eerie","Solemn","Playful","Thoughtful","Reflective","Ironic","Sensual","Nostalgic","Surreal","Dreamy","Awe-Inspiring","Introspective","Confessional","Dramatic","Exuberant","Melodramatic","Hypnotic","Inspirational","Tongue-in-Cheek","Witty","Calm","Passionate","Detached","Frightening","Intense","Calm","Suspenseful","Brave","Desperate","Eloquent","Vivid","Casual","Whispering","Eloquent","Bitter","Tragic","Pensive","Frenzied","Melodious","Resolute","Soothing","Brisk","Lyrical","Objective","Factual","Contemplative","Sardonic","Sympathetic","Objective","Sincere","Wistful","Stoic","Empathetic","Matter-of-fact","Sentimental","Sharp","Understated","Exaggerated","Casual","Bombastic","Poetic","Charming","Apologetic","Defensive","Confrontational","Inquisitive","Candid","Reverent","Matter-of-fact","Amusing","Enthusiastic","Questioning","Reproachful","Hopeless","Despondent","Wry","Sulking","Serene","Detached","Confident","Steadfast","Foolish","Impassioned","Indignant","Self-Deprecating","Wandering","Inspiring","Bewildered"]
 ######################################
 ########## Embeddings Class ##########
 ######################################
@@ -23,7 +26,7 @@ class EmbeddingGenerator:
     def __init__(self, model_id):
         self.tokenizer = AutoTokenizer.from_pretrained(model_id)
         self.model = AutoModel.from_pretrained(model_id)
     def calculate_cosine_similarity(self, embedding1, embedding2):
         return cosine_similarity(embedding1, embedding2)[0][0]
@@ -67,56 +70,56 @@ class EmbeddingGenerator:
         selected_string = ', '.join(selected_values)
         return selected_string
-# testing different embeddings models that can fit in colab,
-# need something smallish but also one that can create good semantic word embeddings for cosine similarity to work well
-#model_id = "sentence-transformers/all-MiniLM-L6-v2"
-#model_id = "BAAI/bge-small-en-v1.5"
-# idk if this will work with CPUs, will either be too slow or too big
-model_id = 'roberta-base'
 #instantiate our class
 embedding_generator = EmbeddingGenerator(model_id)
-#generate embeddings
 genre_embeddings = embedding_generator.get_embeddings(book_genres)
 theme_embeddings = embedding_generator.get_embeddings(book_themes)
 tone_embeddings = embedding_generator.get_embeddings(writing_tones)
 # Clear memory
 del embedding_generator
-# torch.cuda.empty_cache()
 # helper function to format the prompt appropriately.
 # For this creative writing tool, the user doesn't design the prompt itself
     #but rather genres, tones, & themes of a book to include
 def format_prompt(genres, tones, themes):
-    #reinstantiate our embeddings class so we can compare the embeddings
-    embedding_generator = EmbeddingGenerator("roberta-base")
-    # pick 2-5 random ones if user leaves the field blank
-    # lower threshold is to avoid selecting synonyms while upper threshold is to avoid antonyms
     if not genres:
-        genres = embedding_generator.select_values_with_medium_similarity(genre_embeddings, random.randint(3, 5), 0.01, 0.7)  # Adjust thresholds as needed
-    if not tones:
-        tones = embedding_generator.select_values_with_medium_similarity(tone_embeddings, random.randint(3, 5), 0.01, 0.7)  # Adjust thresholds as needed
     if not themes:
-        themes = embedding_generator.select_values_with_medium_similarity(theme_embeddings, random.randint(3, 5), 0.01, 0.7)  # Adjust thresholds as needed
     # we won't need our embeddings generator after this step
     del embedding_generator
-    #Alpaca format since we can't use mixtral on free CPU settings
-    prompt = "Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n"
     #prompt we are using for now
     user_prompt = f"Write a novel title and a detailed, creative summary/synopsis for a book that falls under the following \n genres: {genres}, \n themes: {themes}, \n and tones: {tones}."
     # create our prompt according to Mistral's guidelines
-    prompt += f"\n### Instruction:\n{user_prompt}\n"
     prompt += f"\n### Response:\n"
     return prompt
 def generate_novel_title_and_summary(genres, tones, themes, temperature=1.5, max_length=512, context_length=1024):
-    # Get our API key and define our inference api endpoint model
-    API_KEY = os.environ["API_KEY"]
-    API_ENDPOINT = 'https://huggingface.co/HuggingFaceH4/zephyr-7b-beta'
     prompt = format_prompt(genres, tones, themes)
     # Generate a new random seed for each request
@@ -134,6 +137,7 @@ def generate_novel_title_and_summary(genres, tones, themes, temperature=1.5, max
             "use_cache": False
         }
     }
     headers = {"Authorization": f"Bearer {API_KEY}"}
     try:
@@ -151,6 +155,9 @@ def generate_novel_title_and_summary(genres, tones, themes, temperature=1.5, max
     except Exception as e:
         return f"Error: {str(e)} - An error occurred while generating text."
 def launch_interface():
   iface = gr.Interface(
       fn=generate_novel_title_and_summary,
@@ -169,107 +176,5 @@ def launch_interface():
   iface.launch(debug=True)
-''' #setting this aside for now, going back to original deployment bc I know it works and I dont wanna play around with gradio rn
-# we could try something larger, I need to check the models
-# using zephyr for now because its pretty quick
-client = InferenceClient(
-    'HuggingFaceH4/zephyr-7b-beta'
-#    "v1olet/v1olet_marcoroni-go-bruins-merge-7B"
-)
-# main function
-def generate(genres, themes, tones, system_prompt, temperature=1.25, max_new_tokens=512, top_p=0.95, repetition_penalty=1.15,):
-    # check the temperature value, should not be too low, and make sure the values are floats
-    temperature = float(temperature)
-    if temperature < 1e-2:
-        temperature = 1e-2
-    top_p = float(top_p)
-    generate_kwargs = dict(
-        temperature=temperature,
-        max_new_tokens=max_new_tokens,
-        top_p=top_p,
-        repetition_penalty=repetition_penalty,
-        do_sample=True,
-        # lets choose a random seed
-        seed=random.randint(1, 1000000)
-    )
-    formatted_prompt = format_prompt(f"{system_prompt}, '' ", genres, tones, themes)
-    stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False, load_in_4bit=True, use_flash_attention_2=True)
-    output = ""
-    for response in stream:
-        output += response.token.text
-        yield output
-    return output
-additional_inputs=[
-    gr.Textbox(
-        label="System Prompt",
-        max_lines=1,
-        interactive=True,
-    ),
-    gr.Slider(
-        label="Temperature",
-        value=0.9,
-        minimum=0.0,
-        maximum=1.0,
-        step=0.05,
-        interactive=True,
-        info="Higher values produce more diverse outputs but can cause loss of coherence",
-    ),
-    gr.Slider(
-        label="Max new tokens",
-        value=256,
-        minimum=0,
-        maximum=1048,
-        step=64,
-        interactive=True,
-        info="The maximum numbers of new tokens (approx. wordcount) generated",
-    ),
-    gr.Slider(
-        label="Top-p (nucleus sampling)",
-        value=0.90,
-        minimum=0.0,
-        maximum=1,
-        step=0.05,
-        interactive=True,
-        info="Higher values sample more low-probability tokens",
-    ),
-    gr.Slider(
-        label="Repetition penalty",
-        value=1.2,
-        minimum=1.0,
-        maximum=2.0,
-        step=0.05,
-        interactive=True,
-        info="Penalize repeated tokens, encourages creativity",
-    )
-]
-def launch_interface():
-    iface = gr.Interface(
-    fn=generate,
-    inputs=[
-      gr.Textbox("", label="Book Genres (comma-separated, or leave blank!)"),
-      gr.Textbox("", label="Book Themes (comma-separated, or leave blank!)"),
-      gr.Textbox("", label="Writing Tone (comma-separated, or leave blank!)"),
-      ],
-    #additional_inputs=additional_inputs,
-    outputs="text",
-    live=False,
-    title="Novel Title and Summary Generator",
-    description='A fun creative writing tool, designed for when I have writer\'s block. Use it to practice building worlds, characters, scenes, etc. Write chapter 1, or a plot outline.' ,
-    theme='ParityError/Interstellar')
-    #,additional_inputs=additional_inputs)
-    iface.queue().launch(debug=True)
-'''
 if __name__=="__main__":
     launch_interface()

 book_themes = ["Love and Relationships","Friendship","Family","Coming of Age","Identity and Self-discovery","Adventure and Exploration","Mystery and Intrigue","Science and Technology","Fantasy Worlds","Historical Events","War and Conflict","Survival","Good vs. Evil","Justice and Morality","Revenge","Betrayal","Hope and Resilience","Isolation and Loneliness","Social Justice","Environmental Conservation","Political Corruption","Human Rights","Dystopia","Utopia","Alien Encounters","Time Travel","Art and Creativity","Death and Mortality","Cultural Identity","Personal Growth","Addiction","Education and Knowledge","Freedom and Liberation","Equality and Inequality","Society and Class","Legacy and Inheritance","Religion and Spirituality","Grief and Loss","Ambition","Transformation","Humor and Satire","Survival of the Fittest","Dreams and Aspirations","Change and Adaptation","Forgiveness","Nature and the Environment","Exploration of the Unknown","Conflict Resolution","Fate and Destiny","Artificial Intelligence","Cybersecurity","Space Exploration","Parallel Universes","Economic Struggles","Social Media and Technology","Innovation and Invention","Psychological Thrills","Philosophical Contemplation","Ancient Mythology","Modern Mythology","Epic Journeys","The Power of Imagination","Unrequited Love","Secrets and Hidden Truths","Warriors and Heroes","Surviving Adversity","Dreams and Nightmares","Rivalry and Competition","Alien Worlds","Conspiracy","Apocalyptic Scenarios","Conformity vs. Individuality","Legacy and Heritage","Nature vs. Nurture","Moral Dilemmas","Adventure and Discovery","Journey of Self-Discovery","Unlikely Friendships","Struggle for Power","Exploration of Fear","The Supernatural","Cultural Clashes","Identity Crisis","The Quest for Knowledge","The Human Condition","Hidden Agendas","Escapism","The Pursuit of Happiness","Redemption","Rebellion","Feminism and Gender Issues","Exploration of Dreams","Innocence vs. Experience","Chaos and Order","Exploration of Evil"]
 writing_tones = ["Formal","Informal","Humorous","Serious","Sarcastic","Satirical","Melancholic","Optimistic","Pessimistic","Cynical","Hopeful","Lighthearted","Dark","Gothic","Whimsical","Mysterious","Eerie","Solemn","Playful","Thoughtful","Reflective","Ironic","Sensual","Nostalgic","Surreal","Dreamy","Awe-Inspiring","Introspective","Confessional","Dramatic","Exuberant","Melodramatic","Hypnotic","Inspirational","Tongue-in-Cheek","Witty","Calm","Passionate","Detached","Frightening","Intense","Calm","Suspenseful","Brave","Desperate","Eloquent","Vivid","Casual","Whispering","Eloquent","Bitter","Tragic","Pensive","Frenzied","Melodious","Resolute","Soothing","Brisk","Lyrical","Objective","Factual","Contemplative","Sardonic","Sympathetic","Objective","Sincere","Wistful","Stoic","Empathetic","Matter-of-fact","Sentimental","Sharp","Understated","Exaggerated","Casual","Bombastic","Poetic","Charming","Apologetic","Defensive","Confrontational","Inquisitive","Candid","Reverent","Matter-of-fact","Amusing","Enthusiastic","Questioning","Reproachful","Hopeless","Despondent","Wry","Sulking","Serene","Detached","Confident","Steadfast","Foolish","Impassioned","Indignant","Self-Deprecating","Wandering","Inspiring","Bewildered"]
+API_ENDPOINT = 'https://api-inference.huggingface.co/HuggingFaceH4/zephyr-7b-beta'
+API_KEY = os.getenv('API_KEY')
 ######################################
 ########## Embeddings Class ##########
 ######################################
     def __init__(self, model_id):
         self.tokenizer = AutoTokenizer.from_pretrained(model_id)
         self.model = AutoModel.from_pretrained(model_id)
     def calculate_cosine_similarity(self, embedding1, embedding2):
         return cosine_similarity(embedding1, embedding2)[0][0]
         selected_string = ', '.join(selected_values)
         return selected_string
+###########################################
+########## CREATE OUR EMBEDDINGS ##########
+###########################################
+model_id = 'bert-base-uncased'
 #instantiate our class
 embedding_generator = EmbeddingGenerator(model_id)
 genre_embeddings = embedding_generator.get_embeddings(book_genres)
 theme_embeddings = embedding_generator.get_embeddings(book_themes)
 tone_embeddings = embedding_generator.get_embeddings(writing_tones)
 # Clear memory
 del embedding_generator
+#torch.cuda.empty_cache()
+###########################################
+############ PROMPT FORMATTING ############
+###########################################
 # helper function to format the prompt appropriately.
 # For this creative writing tool, the user doesn't design the prompt itself
     #but rather genres, tones, & themes of a book to include
 def format_prompt(genres, tones, themes):
+    #reinstantiate our embeddings class so we can compare the embeddings
+    embedding_generator = EmbeddingGenerator("bert-base-uncased")
+    # pick random ones if user leaves it blank but make sure they aren't opposites
     if not genres:
+        genres = embedding_generator.select_values_with_medium_similarity(genre_embeddings, random.randint(3, 5), 0.6, 0.75)  # Adjust thresholds as needed
+    if not tones:
+        tones = embedding_generator.select_values_with_medium_similarity(tone_embeddings, random.randint(3, 5), 0.9, 0.96)  # Adjust thresholds as needed
     if not themes:
+        themes = embedding_generator.select_values_with_medium_similarity(theme_embeddings, random.randint(3, 5), 0.45, 0.8)  # Adjust thresholds as needed
     # we won't need our embeddings generator after this step
     del embedding_generator
+    torch.cuda.empty_cache()
+    #Alpaca format
+    #prompt = "Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n"
     #prompt we are using for now
     user_prompt = f"Write a novel title and a detailed, creative summary/synopsis for a book that falls under the following \n genres: {genres}, \n themes: {themes}, \n and tones: {tones}."
     # create our prompt according to Mistral's guidelines
+    prompt = f"\n### Instruction:\n{user_prompt}\n"
     prompt += f"\n### Response:\n"
     return prompt
+###########################################
+############## MAIN FUNCTION ##############
+###########################################
 def generate_novel_title_and_summary(genres, tones, themes, temperature=1.5, max_length=512, context_length=1024):
     prompt = format_prompt(genres, tones, themes)
     # Generate a new random seed for each request
             "use_cache": False
         }
     }
+    # You can get an Inference API for FREE
     headers = {"Authorization": f"Bearer {API_KEY}"}
     try:
     except Exception as e:
         return f"Error: {str(e)} - An error occurred while generating text."
+#############################################
+########## GRADIO INTERFACE LAUNCH ##########
+#############################################
 def launch_interface():
   iface = gr.Interface(
       fn=generate_novel_title_and_summary,
   iface.launch(debug=True)
 if __name__=="__main__":
     launch_interface()