Spaces:

barunsaha
/

slide-deck-ai

Running

App Files Files Community

barunsaha commited on Apr 6, 2024

Commit

9c0dccd

1 Parent(s): 4f6ea2c

Reorganize files and set logging format globally

Browse files

Files changed (5) hide show

app.py +28 -55
global_config.py +8 -0
helpers/__init__.py +0 -0
llm_helper.py → helpers/llm_helper.py +35 -15
pptx_helper.py → helpers/pptx_helper.py +12 -14

app.py CHANGED Viewed

@@ -7,8 +7,7 @@ import json5
 import metaphor_python as metaphor
 import streamlit as st
-import llm_helper
-import pptx_helper
 from global_config import GlobalConfig
@@ -16,10 +15,7 @@ APP_TEXT = json5.loads(open(GlobalConfig.APP_STRINGS_FILE, 'r', encoding='utf-8'
 GB_CONVERTER = 2 ** 30
-logging.basicConfig(
-    level=GlobalConfig.LOG_LEVEL,
-    format='%(asctime)s - %(message)s',
-)
 @st.cache_data
@@ -27,11 +23,11 @@ def get_contents_wrapper(text: str) -> str:
     """
     Fetch and cache the slide deck contents on a topic by calling an external API.
-    :param text: The presentation topic
-    :return: The slide deck contents or outline in JSON format
     """
-    logging.info('LLM call because of cache miss...')
     return llm_helper.generate_slides_content(text).strip()
@@ -40,7 +36,7 @@ def get_metaphor_client_wrapper() -> metaphor.Metaphor:
     """
     Create a Metaphor client for semantic Web search.
-    :return: Metaphor instance
     """
     return metaphor.Metaphor(api_key=GlobalConfig.METAPHOR_API_KEY)
@@ -51,8 +47,8 @@ def get_web_search_results_wrapper(text: str) -> List[Tuple[str, str]]:
     """
     Fetch and cache the Web search results on a given topic.
-    :param text: The topic
-    :return: A list of (title, link) tuples
     """
     results = []
@@ -68,28 +64,6 @@ def get_web_search_results_wrapper(text: str) -> List[Tuple[str, str]]:
     return results
-# def get_disk_used_percentage() -> float:
-#     """
-#     Compute the disk usage.
-#
-#     :return: Percentage of the disk space currently used
-#     """
-#
-#     total, used, free = shutil.disk_usage(__file__)
-#     total = total // GB_CONVERTER
-#     used = used // GB_CONVERTER
-#     free = free // GB_CONVERTER
-#     used_perc = 100.0 * used / total
-#
-#     logging.debug(f'Total: {total} GB\n'
-#                   f'Used: {used} GB\n'
-#                   f'Free: {free} GB')
-#
-#     logging.debug('\n'.join(os.listdir()))
-#
-#     return used_perc
 def build_ui():
     """
     Display the input elements for content generation. Only covers the first step.
@@ -160,24 +134,23 @@ def generate_presentation(topic: str, pptx_template: str, progress_bar):
     """
     Process the inputs to generate the slides.
-    :param topic: The presentation topic based on which contents are to be generated
-    :param pptx_template: The PowerPoint template name to be used
-    :param progress_bar: Progress bar from the page
-    :return:
     """
     topic_length = len(topic)
-    logging.debug('Input length:: topic: %s', topic_length)
     if topic_length >= 10:
-        logging.debug('Topic: %s', topic)
         target_length = min(topic_length, GlobalConfig.LLM_MODEL_MAX_INPUT_LENGTH)
         try:
             # Step 1: Generate the contents in JSON format using an LLM
             json_str = process_slides_contents(topic[:target_length], progress_bar)
-            logging.debug('Truncated topic: %s', topic[:target_length])
-            logging.debug('Length of JSON: %d', len(json_str))
             # Step 2: Generate the slide deck based on the template specified
             if len(json_str) > 0:
@@ -210,15 +183,15 @@ def process_slides_contents(text: str, progress_bar: st.progress) -> str:
     """
     Convert given text into structured data and display. Update the UI.
-    :param text: The topic description for the presentation
-    :param progress_bar: Progress bar for this step
-    :return: The contents as a JSON-formatted string
     """
     json_str = ''
     try:
-        logging.info('Calling LLM for content generation on the topic: %s', text)
         json_str = get_contents_wrapper(text)
     except Exception as ex:
         st.error(
@@ -239,10 +212,10 @@ def generate_slide_deck(json_str: str, pptx_template: str, progress_bar) -> List
     """
     Create a slide deck.
-    :param json_str: The contents in JSON format
-    :param pptx_template: The PPTX template name
-    :param progress_bar: Progress bar
-    :return: A list of all slide headers and the title
     """
     progress_text = 'Creating the slide deck...give it a moment'
@@ -257,7 +230,7 @@ def generate_slide_deck(json_str: str, pptx_template: str, progress_bar) -> List
     temp = tempfile.NamedTemporaryFile(delete=False, suffix='.pptx')
     path = pathlib.Path(temp.name)
-    logging.info('Creating PPTX file...')
     all_headers = pptx_helper.generate_powerpoint_presentation(
         json_str,
         slides_template=pptx_template,
@@ -279,7 +252,7 @@ def show_bonus_stuff(ppt_headers: List[str]):
     """
     # Use the presentation title and the slide headers to find relevant info online
-    logging.info('Calling Metaphor search...')
     ppt_text = ' '.join(ppt_headers)
     search_results = get_web_search_results_wrapper(ppt_text)
     md_text_items = []
@@ -290,11 +263,11 @@ def show_bonus_stuff(ppt_headers: List[str]):
     with st.expander('Related Web references'):
         st.markdown('\n\n'.join(md_text_items))
-    logging.info('Done!')
     # # Avoid image generation. It costs time and an API call, so just limit to the text generation.
     # with st.expander('AI-generated image on the presentation topic'):
-    #     logging.info('Calling SDXL for image generation...')
     #     # img_empty.write('')
     #     # img_text.write(APP_TEXT['image_info'])
     #     image = get_ai_image_wrapper(ppt_text)
@@ -303,7 +276,7 @@ def show_bonus_stuff(ppt_headers: List[str]):
     #         image = base64.b64decode(image)
     #         st.image(image, caption=ppt_text)
     #         st.info('Tip: Right-click on the image to save it.', icon="💡️")
-    #         logging.info('Image added')
 def main():

 import metaphor_python as metaphor
 import streamlit as st
+from helpers import llm_helper, pptx_helper
 from global_config import GlobalConfig
 GB_CONVERTER = 2 ** 30
+logger = logging.getLogger(__name__)
 @st.cache_data
     """
     Fetch and cache the slide deck contents on a topic by calling an external API.
+    :param text: The presentation topic.
+    :return: The slide deck contents or outline in JSON format.
     """
+    logger.info('LLM call because of cache miss...')
     return llm_helper.generate_slides_content(text).strip()
     """
     Create a Metaphor client for semantic Web search.
+    :return: Metaphor instance.
     """
     return metaphor.Metaphor(api_key=GlobalConfig.METAPHOR_API_KEY)
     """
     Fetch and cache the Web search results on a given topic.
+    :param text: The topic.
+    :return: A list of (title, link) tuples.
     """
     results = []
     return results
 def build_ui():
     """
     Display the input elements for content generation. Only covers the first step.
     """
     Process the inputs to generate the slides.
+    :param topic: The presentation topic based on which contents are to be generated.
+    :param pptx_template: The PowerPoint template name to be used.
+    :param progress_bar: Progress bar from the page.
     """
     topic_length = len(topic)
+    logger.debug('Input length:: topic: %s', topic_length)
     if topic_length >= 10:
+        logger.debug('Topic: %s', topic)
         target_length = min(topic_length, GlobalConfig.LLM_MODEL_MAX_INPUT_LENGTH)
         try:
             # Step 1: Generate the contents in JSON format using an LLM
             json_str = process_slides_contents(topic[:target_length], progress_bar)
+            logger.debug('Truncated topic: %s', topic[:target_length])
+            logger.debug('Length of JSON: %d', len(json_str))
             # Step 2: Generate the slide deck based on the template specified
             if len(json_str) > 0:
     """
     Convert given text into structured data and display. Update the UI.
+    :param text: The topic description for the presentation.
+    :param progress_bar: Progress bar for this step.
+    :return: The contents as a JSON-formatted string.
     """
     json_str = ''
     try:
+        logger.info('Calling LLM for content generation on the topic: %s', text)
         json_str = get_contents_wrapper(text)
     except Exception as ex:
         st.error(
     """
     Create a slide deck.
+    :param json_str: The contents in JSON format.
+    :param pptx_template: The PPTX template name.
+    :param progress_bar: Progress bar.
+    :return: A list of all slide headers and the title.
     """
     progress_text = 'Creating the slide deck...give it a moment'
     temp = tempfile.NamedTemporaryFile(delete=False, suffix='.pptx')
     path = pathlib.Path(temp.name)
+    logger.info('Creating PPTX file...')
     all_headers = pptx_helper.generate_powerpoint_presentation(
         json_str,
         slides_template=pptx_template,
     """
     # Use the presentation title and the slide headers to find relevant info online
+    logger.info('Calling Metaphor search...')
     ppt_text = ' '.join(ppt_headers)
     search_results = get_web_search_results_wrapper(ppt_text)
     md_text_items = []
     with st.expander('Related Web references'):
         st.markdown('\n\n'.join(md_text_items))
+    logger.info('Done!')
     # # Avoid image generation. It costs time and an API call, so just limit to the text generation.
     # with st.expander('AI-generated image on the presentation topic'):
+    #     logger.info('Calling SDXL for image generation...')
     #     # img_empty.write('')
     #     # img_text.write(APP_TEXT['image_info'])
     #     image = get_ai_image_wrapper(ppt_text)
     #         image = base64.b64decode(image)
     #         st.image(image, caption=ppt_text)
     #         st.info('Tip: Right-click on the image to save it.', icon="💡️")
+    #         logger.info('Image added')
 def main():

global_config.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import os
 from dataclasses import dataclass
@@ -38,3 +39,10 @@ class GlobalConfig:
             'caption': 'Marvel in a monochrome dream'
         }
     }

+import logging
 import os
 from dataclasses import dataclass
             'caption': 'Marvel in a monochrome dream'
         }
     }
+logging.basicConfig(
+    level=GlobalConfig.LOG_LEVEL,
+    format='%(asctime)s - %(levelname)s - %(name)s - %(message)s',
+    datefmt='%Y-%m-%d %H:%M:%S'
+)

helpers/__init__.py ADDED Viewed

File without changes

llm_helper.py → helpers/llm_helper.py RENAMED Viewed

@@ -1,5 +1,7 @@
 import logging
 import requests
 from global_config import GlobalConfig
@@ -7,27 +9,45 @@ from global_config import GlobalConfig
 HF_API_URL = f"https://api-inference.huggingface.co/models/{GlobalConfig.HF_LLM_MODEL_NAME}"
 HF_API_HEADERS = {"Authorization": f"Bearer {GlobalConfig.HUGGINGFACEHUB_API_TOKEN}"}
-logging.basicConfig(
-    level=GlobalConfig.LOG_LEVEL,
-    format='%(asctime)s - %(message)s',
-)
-# llm = None
-def hf_api_query(payload: dict):
     """
     Invoke HF inference end-point API.
-    :param payload: The prompt for the LLM and related parameters
-    :return: The output from the LLM
     """
     try:
         response = requests.post(HF_API_URL, headers=HF_API_HEADERS, json=payload, timeout=15)
         result = response.json()
     except requests.exceptions.Timeout as te:
-        logging.error('*** Error: hf_api_query timeout! %s', str(te))
         result = {}
     return result
@@ -37,8 +57,8 @@ def generate_slides_content(topic: str) -> str:
     """
     Generate the outline/contents of slides for a presentation on a given topic.
-    :param topic: Topic on which slides are to be generated
-    :return: The content in JSON format
     """
     with open(GlobalConfig.SLIDES_TEMPLATE_FILE, 'r', encoding='utf-8') as in_file:
@@ -46,8 +66,8 @@ def generate_slides_content(topic: str) -> str:
         template_txt = template_txt.replace('<REPLACE_PLACEHOLDER>', topic)
     output = hf_api_query({
-        "inputs": template_txt,
-        "parameters": {
             'temperature': GlobalConfig.LLM_MODEL_TEMPERATURE,
             'min_length': GlobalConfig.LLM_MODEL_MIN_OUTPUT_LENGTH,
             'max_length': GlobalConfig.LLM_MODEL_MAX_OUTPUT_LENGTH,
@@ -56,7 +76,7 @@ def generate_slides_content(topic: str) -> str:
             'return_full_text': False,
             # "repetition_penalty": 0.0001
         },
-        "options": {
             'wait_for_model': True,
             'use_cache': True
         }
@@ -70,7 +90,7 @@ def generate_slides_content(topic: str) -> str:
         # logging.debug(f'{json_end_idx=}')
         output = output[:json_end_idx]
-    logging.debug('generate_slides_content: output: %s', output)
     return output

 import logging
 import requests
+from langchain_community.llms.huggingface_endpoint import HuggingFaceEndpoint
+from langchain_core.language_models import LLM
 from global_config import GlobalConfig
 HF_API_URL = f"https://api-inference.huggingface.co/models/{GlobalConfig.HF_LLM_MODEL_NAME}"
 HF_API_HEADERS = {"Authorization": f"Bearer {GlobalConfig.HUGGINGFACEHUB_API_TOKEN}"}
+logger = logging.getLogger(__name__)
+def get_hf_endpoint() -> LLM:
+    """
+    Get an LLM via the HuggingFaceEndpoint.
+    :return: The LLM.
+    """
+    logger.debug('Getting LLM via HF endpoint')
+    return HuggingFaceEndpoint(
+        repo_id=GlobalConfig.HF_LLM_MODEL_NAME,
+        max_new_tokens=GlobalConfig.LLM_MODEL_MAX_OUTPUT_LENGTH,
+        top_k=40,
+        top_p=0.95,
+        temperature=GlobalConfig.LLM_MODEL_TEMPERATURE,
+        repetition_penalty=1.03,
+        streaming=True,
+        huggingfacehub_api_token=GlobalConfig.HUGGINGFACEHUB_API_TOKEN,
+        return_full_text=False,
+        stop_sequences=['</s>'],
+    )
+def hf_api_query(payload: dict) -> dict:
     """
     Invoke HF inference end-point API.
+    :param payload: The prompt for the LLM and related parameters.
+    :return: The output from the LLM.
     """
     try:
         response = requests.post(HF_API_URL, headers=HF_API_HEADERS, json=payload, timeout=15)
         result = response.json()
     except requests.exceptions.Timeout as te:
+        logger.error('*** Error: hf_api_query timeout! %s', str(te))
         result = {}
     return result
     """
     Generate the outline/contents of slides for a presentation on a given topic.
+    :param topic: Topic on which slides are to be generated.
+    :return: The content in JSON format.
     """
     with open(GlobalConfig.SLIDES_TEMPLATE_FILE, 'r', encoding='utf-8') as in_file:
         template_txt = template_txt.replace('<REPLACE_PLACEHOLDER>', topic)
     output = hf_api_query({
+        'inputs': template_txt,
+        'parameters': {
             'temperature': GlobalConfig.LLM_MODEL_TEMPERATURE,
             'min_length': GlobalConfig.LLM_MODEL_MIN_OUTPUT_LENGTH,
             'max_length': GlobalConfig.LLM_MODEL_MAX_OUTPUT_LENGTH,
             'return_full_text': False,
             # "repetition_penalty": 0.0001
         },
+        'options': {
             'wait_for_model': True,
             'use_cache': True
         }
         # logging.debug(f'{json_end_idx=}')
         output = output[:json_end_idx]
+    logger.debug('generate_slides_content: output: %s', output)
     return output

pptx_helper.py → helpers/pptx_helper.py RENAMED Viewed

@@ -2,6 +2,7 @@ import logging
 import pathlib
 import re
 import tempfile
 from typing import List, Tuple
 import json5
@@ -28,17 +29,14 @@ SAMPLE_JSON_FOR_PPTX = '''
 }
 '''
-logging.basicConfig(
-    level=GlobalConfig.LOG_LEVEL,
-    format='%(asctime)s - %(message)s',
-)
 def remove_slide_number_from_heading(header: str) -> str:
     """
     Remove the slide number from a given slide header.
-    :param header: The header of a slide
     """
     if PATTERN.match(header):
@@ -56,16 +54,16 @@ def generate_powerpoint_presentation(
     """
     Create and save a PowerPoint presentation file containing the content in JSON format.
-    :param structured_data: The presentation contents as "JSON" (may contain trailing commas)
-    :param slides_template: The PPTX template to use
-    :param output_file_path: The path of the PPTX file to save as
-    :return A list of presentation title and slides headers
     """
     # The structured "JSON" might contain trailing commas, so using json5
     parsed_data = json5.loads(structured_data)
-    logging.debug(
         "*** Using PPTX template: %s",
         GlobalConfig.PPTX_TEMPLATE_FILES[slides_template]['file']
     )
@@ -77,7 +75,7 @@ def generate_powerpoint_presentation(
     title = slide.shapes.title
     subtitle = slide.placeholders[1]
     title.text = parsed_data['title']
-    logging.debug('Presentation title is: %s', title.text)
     subtitle.text = 'by Myself and SlideDeck AI :)'
     all_headers = [title.text, ]
@@ -125,9 +123,9 @@ def get_flat_list_of_contents(items: list, level: int) -> List[Tuple]:
     Flatten a (hierarchical) list of bullet points to a single list containing each item and
     its level.
-    :param items: A bullet point (string or list)
-    :param level: The current level of hierarchy
-    :return: A list of (bullet item text, hierarchical level) tuples
     """
     flat_list = []

 import pathlib
 import re
 import tempfile
 from typing import List, Tuple
 import json5
 }
 '''
+logger = logging.getLogger(__name__)
 def remove_slide_number_from_heading(header: str) -> str:
     """
     Remove the slide number from a given slide header.
+    :param header: The header of a slide.
     """
     if PATTERN.match(header):
     """
     Create and save a PowerPoint presentation file containing the content in JSON format.
+    :param structured_data: The presentation contents as "JSON" (may contain trailing commas).
+    :param slides_template: The PPTX template to use.
+    :param output_file_path: The path of the PPTX file to save as.
+    :return A list of presentation title and slides headers.
     """
     # The structured "JSON" might contain trailing commas, so using json5
     parsed_data = json5.loads(structured_data)
+    logger.debug(
         "*** Using PPTX template: %s",
         GlobalConfig.PPTX_TEMPLATE_FILES[slides_template]['file']
     )
     title = slide.shapes.title
     subtitle = slide.placeholders[1]
     title.text = parsed_data['title']
+    logger.debug('Presentation title is: %s', title.text)
     subtitle.text = 'by Myself and SlideDeck AI :)'
     all_headers = [title.text, ]
     Flatten a (hierarchical) list of bullet points to a single list containing each item and
     its level.
+    :param items: A bullet point (string or list).
+    :param level: The current level of hierarchy.
+    :return: A list of (bullet item text, hierarchical level) tuples.
     """
     flat_list = []