File size: 12,459 Bytes
2c702ea
 
e8cd6bf
de43e0c
780e6b5
 
 
 
0b58b05
978bbc7
0b58b05
73e58e0
 
 
21633e5
2c702ea
 
 
e52eec2
4076058
 
b7fa6f0
 
 
 
 
 
 
4076058
b7fa6f0
 
2c702ea
b7fa6f0
 
 
 
 
 
 
 
 
d00eed9
b7fa6f0
 
 
780e6b5
b7fa6f0
 
 
 
 
780e6b5
b7fa6f0
 
 
 
 
 
780e6b5
b7fa6f0
 
 
 
 
 
 
 
780e6b5
b7fa6f0
 
 
780e6b5
4076058
 
 
 
b7fa6f0
 
 
 
 
 
 
 
4076058
780e6b5
4076058
 
 
21633e5
780e6b5
 
b7fa6f0
4076058
 
 
2c702ea
4076058
 
 
2c702ea
4076058
21633e5
b7fa6f0
 
4076058
 
 
 
21633e5
 
 
b6e4fe6
4076058
e8cd6bf
 
4076058
 
 
0b58b05
4076058
0b58b05
 
 
 
 
 
 
 
 
 
 
 
ffd92cf
 
 
 
 
 
 
 
 
0b58b05
 
4076058
0b58b05
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e8cd6bf
4076058
 
 
0b58b05
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
445332e
0b58b05
780e6b5
0b58b05
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
# imports
import gradio as gr
from huggingface_hub import InferenceClient
import random
# for picking random values from the lists that are not antithetical to each other
import torch
from transformers import AutoTokenizer, AutoModel
from sklearn.metrics.pairwise import cosine_similarity
import os
import requests

# lists for random gen
# i want to change this to a different format - csv, json, or txt and read the file
# then call the class separately to create the embeddings so they dont have to run every single time the space restarts
book_genres = ["Adventure", "Romance","Mystery", "Science Fiction","Fantasy","Thriller","Horror","Historical Fiction","Biography","Autobiography","Self-Help","Non-Fiction","Science","Cooking","Travel","Dystopian","Young Adult","Children's","Poetry","Classic","Graphic Novel","Humor","Crime","Western","Memoir","Religion","Psychology","Philosophy","Business","Finance","Parenting","Health","Fitness","Art","Music","Sports","Politics","Education","Technology","Science Fiction Fantasy","Steampunk","Drama","Historical Non-Fiction","Biographical Fiction","Mythology","Anthology","Short Stories","Essays","Fairy Tales","Magic Realism","True Crime","Satire","Romantic Suspense","Paranormal","Urban Fantasy","War","Epic Fantasy","Contemporary Fiction","Legal Thriller","Espionage","Post-Apocalyptic","Time Travel","Cultural","Medical","Environmental","Artificial Intelligence","Cyberpunk","Space Opera","Alternate History","Historical Romance","Science Fiction Romance","Young Adult Fantasy","Adventure Fantasy","Superhero","Graphic Memoir","Travel Memoir","Political Thriller","Economic","Psychological Thriller","Nature","True Adventure","Historical Mystery","Social Science","Science Biography","Space Exploration","Pop Culture","Art History","Culinary","Nature Writing","Family Drama","Classic Literature","Cultural History","Political Science","Economics","Essays and Criticism","Art Criticism","Criminal Justice","Historical Biography","Personal Development","Cookbook","Fashion","Crafts and Hobbies","Memoir","Essays","Graphic Non-Fiction", "Fantasy Romance"]
book_themes = ["Love and Relationships","Friendship","Family","Coming of Age","Identity and Self-discovery","Adventure and Exploration","Mystery and Intrigue","Science and Technology","Fantasy Worlds","Historical Events","War and Conflict","Survival","Good vs. Evil","Justice and Morality","Revenge","Betrayal","Hope and Resilience","Isolation and Loneliness","Social Justice","Environmental Conservation","Political Corruption","Human Rights","Dystopia","Utopia","Alien Encounters","Time Travel","Art and Creativity","Death and Mortality","Cultural Identity","Personal Growth","Addiction","Education and Knowledge","Freedom and Liberation","Equality and Inequality","Society and Class","Legacy and Inheritance","Religion and Spirituality","Grief and Loss","Ambition","Transformation","Humor and Satire","Survival of the Fittest","Dreams and Aspirations","Change and Adaptation","Forgiveness","Nature and the Environment","Exploration of the Unknown","Conflict Resolution","Fate and Destiny","Artificial Intelligence","Cybersecurity","Space Exploration","Parallel Universes","Economic Struggles","Social Media and Technology","Innovation and Invention","Psychological Thrills","Philosophical Contemplation","Ancient Mythology","Modern Mythology","Epic Journeys","The Power of Imagination","Unrequited Love","Secrets and Hidden Truths","Warriors and Heroes","Surviving Adversity","Dreams and Nightmares","Rivalry and Competition","Alien Worlds","Conspiracy","Apocalyptic Scenarios","Conformity vs. Individuality","Legacy and Heritage","Nature vs. Nurture","Moral Dilemmas","Adventure and Discovery","Journey of Self-Discovery","Unlikely Friendships","Struggle for Power","Exploration of Fear","The Supernatural","Cultural Clashes","Identity Crisis","The Quest for Knowledge","The Human Condition","Hidden Agendas","Escapism","The Pursuit of Happiness","Redemption","Rebellion","Feminism and Gender Issues","Exploration of Dreams","Innocence vs. Experience","Chaos and Order","Exploration of Evil"]
writing_tones = ["Formal","Informal","Humorous","Serious","Sarcastic","Satirical","Melancholic","Optimistic","Pessimistic","Cynical","Hopeful","Lighthearted","Dark","Gothic","Whimsical","Mysterious","Eerie","Solemn","Playful","Thoughtful","Reflective","Ironic","Sensual","Nostalgic","Surreal","Dreamy","Awe-Inspiring","Introspective","Confessional","Dramatic","Exuberant","Melodramatic","Hypnotic","Inspirational","Tongue-in-Cheek","Witty","Calm","Passionate","Detached","Frightening","Intense","Calm","Suspenseful","Brave","Desperate","Eloquent","Vivid","Casual","Whispering","Eloquent","Bitter","Tragic","Pensive","Frenzied","Melodious","Resolute","Soothing","Brisk","Lyrical","Objective","Factual","Contemplative","Sardonic","Sympathetic","Objective","Sincere","Wistful","Stoic","Empathetic","Matter-of-fact","Sentimental","Sharp","Understated","Exaggerated","Casual","Bombastic","Poetic","Charming","Apologetic","Defensive","Confrontational","Inquisitive","Candid","Reverent","Matter-of-fact","Amusing","Enthusiastic","Questioning","Reproachful","Hopeless","Despondent","Wry","Sulking","Serene","Detached","Confident","Steadfast","Foolish","Impassioned","Indignant","Self-Deprecating","Wandering","Inspiring","Bewildered"]

API_ENDPOINT = 'https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-beta'
API_KEY = os.getenv('API_KEY')

######################################
########## Embeddings Class ##########
######################################
class EmbeddingGenerator:
    def __init__(self, model_id):
        self.tokenizer = AutoTokenizer.from_pretrained(model_id)
        self.model = AutoModel.from_pretrained(model_id)

    def calculate_cosine_similarity(self, embedding1, embedding2):
        return cosine_similarity(embedding1, embedding2)[0][0]

    def get_embeddings(self, text_items):
        embeddings = []
        for item in text_items:
            inputs = self.tokenizer(item, return_tensors="pt", padding=True, truncation=True)
            with torch.no_grad():
                outputs = self.model(**inputs)
                pooled_output = outputs['pooler_output']
            embeddings.append((pooled_output, item))  # Store the embedding along with the original string
        return embeddings

    def select_values_with_medium_similarity(self, embeddings, num_values_to_select, min_similarity, max_similarity):
        selected_values = []
        selected_indices = set()

        # Randomly select an initial embedding
        initial_index = random.randint(0, len(embeddings) - 1)
        initial_embedding, initial_item = embeddings[initial_index]
        selected_values.append(initial_item)
        selected_indices.add(initial_index)

        while len(selected_values) < num_values_to_select:
            # Filter embeddings that are within the desired range
            candidate_indices = [
                i for i, (embedding, _) in enumerate(embeddings)
                if i not in selected_indices and min_similarity < self.calculate_cosine_similarity(embedding, initial_embedding) < max_similarity
            ]

            if candidate_indices:
                # Randomly select an embedding from the filtered candidates
                index_to_select = random.choice(candidate_indices)
                selected_embedding, selected_item = embeddings[index_to_select]
                selected_values.append(selected_item)
                selected_indices.add(index_to_select)
            else:
                break

        # Concatenate the selected values into a single string
        selected_string = ', '.join(selected_values)
        return selected_string

###########################################
########## CREATE OUR EMBEDDINGS ##########
###########################################
model_id = 'bert-base-uncased'
#instantiate our class
embedding_generator = EmbeddingGenerator(model_id)
genre_embeddings = embedding_generator.get_embeddings(book_genres)
theme_embeddings = embedding_generator.get_embeddings(book_themes)
tone_embeddings = embedding_generator.get_embeddings(writing_tones)

# Clear memory
del embedding_generator
#torch.cuda.empty_cache()

###########################################
############ PROMPT FORMATTING ############
###########################################
# helper function to format the prompt appropriately. 
# For this creative writing tool, the user doesn't design the prompt itself 
    #but rather genres, tones, & themes of a book to include
def format_prompt(genres, tones, themes):
    #reinstantiate our embeddings class so we can compare the embeddings
    embedding_generator = EmbeddingGenerator("bert-base-uncased")
    # pick random ones if user leaves it blank but make sure they aren't opposites
    if not genres:
        genres = embedding_generator.select_values_with_medium_similarity(genre_embeddings, random.randint(3, 5), 0.6, 0.75)  # Adjust thresholds as needed
    if not tones:
        tones = embedding_generator.select_values_with_medium_similarity(tone_embeddings, random.randint(3, 5), 0.9, 0.96)  # Adjust thresholds as needed
    if not themes:
        themes = embedding_generator.select_values_with_medium_similarity(theme_embeddings, random.randint(3, 5), 0.45, 0.8)  # Adjust thresholds as needed

    # we won't need our embeddings generator after this step
    del embedding_generator
    torch.cuda.empty_cache()

    #Alpaca format
    #prompt = "Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n"
    #prompt we are using for now
    user_prompt = f"Write a novel title and a detailed, creative summary/synopsis for a book that falls under the following \n genres: {genres}, \n themes: {themes}, \n and tones: {tones}."
    # create our prompt according to Mistral's guidelines
    prompt = f"{user_prompt}\n"

    return prompt

###########################################
############## MAIN FUNCTION ##############
###########################################
def generate_novel_title_and_summary(genres, tones, themes, temperature=1.5, max_length=512, context_length=1024):

    prompt = format_prompt(genres, tones, themes)

    # Generate a new random seed for each request
    random_seed = random.randint(1, 1000000)

    # Set the random seed for PyTorch
    torch.manual_seed(random_seed)

    # Prepare the data for the Hugging Face API
    data = {
        "inputs": prompt,
        "options": {
            "temperature": temperature,
            "do_sample": True,
            "use_cache": False,
            "max_new_tokens":250,
            "min_new_tokens":100,
            "do_sample" : True,
            "use_cache" : False,
            "max_p" : 0.95,
            "repetition_penalty"  : 1.15,
        }
    }
    # You can get an Inference API for FREE
    headers = {"Authorization": f"Bearer {API_KEY}"}

    try:
        # Make the API request
        response = requests.post(API_ENDPOINT, json=data, headers=headers)

        if response.status_code == 200:
            result = response.json()
            generated_text = result[0].get("generated_text", "")
            return generated_text

        else:
            return f"Error: {response.status_code} - Unable to generate text."

    except Exception as e:
        return f"Error: {str(e)} - An error occurred while generating text."

#############################################
########## GRADIO INTERFACE LAUNCH ##########
#############################################
def launch_interface():
  iface = gr.Interface(
      fn=generate_novel_title_and_summary,
      inputs=[
          gr.Textbox("", label="Book Genres (comma-separated, or leave blank!)"),
          gr.Textbox("", label="Book Themes (comma-separated, or leave blank!)"),
          gr.Textbox("", label="Writing Tone (comma-separated, or leave blank!)"),
          gr.Slider(0.1, 10.0, 1.3, label="Temperature (Creativity)"),
      ],
      outputs="text",
      live=False,
      title="Novel Title and Summary Generator",
      description='A fun creative writing tool, designed for when I have writer\'s block. Use it to practice building worlds, characters, scenes, etc. Write chapter 1, or a plot outline.' ,
      theme='ParityError/Interstellar',
  )

  iface.launch(share=True)

if __name__=="__main__":
    launch_interface()