ArticleChatbot

Sleeping

App Files Files Community

Luca Foppiano commited on Oct 26, 2023

Commit

0b28b48

•

1 Parent(s): 6551eca

Setuptools build, pypy release, github action improvements (#5)

Browse files

Files changed (10) hide show

.github/workflows/ci-build.yml +5 -14
.github/workflows/ci-release.yml +80 -0
CHANGELOG.md +18 -0
README.md +16 -0
document_qa_engine.py → document_qa/document_qa_engine.py +0 -0
grobid_processors.py → document_qa/grobid_processors.py +0 -0
pyproject.toml +35 -0
pytest.ini +2 -0
requirements.txt +14 -15
streamlit_app.py +2 -2

.github/workflows/ci-build.yml CHANGED Viewed

@@ -14,9 +14,12 @@ jobs:
     steps:
     - uses: actions/checkout@v2
     - name: Set up Python 3.9
-      uses: actions/setup-python@v2
       with:
         python-version: "3.9"
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
@@ -30,16 +33,4 @@ jobs:
         flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
 #    - name: Test with pytest
 #      run: |
-#        pytest
-  docker-build-documentqa:
-    needs: [build]
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v2
-      - name: Build the Docker image
-        run: docker build . --file Dockerfile --tag lfoppiano/documentqa:develop
-      - name: Cleanup older than 24h images and containers
-        run: docker system prune --filter "until=24h" --force

     steps:
     - uses: actions/checkout@v2
     - name: Set up Python 3.9
+      uses: actions/setup-python@v4
       with:
         python-version: "3.9"
+        cache: 'pip'
+    - name: Cleanup more disk space
+      run: sudo rm -rf /usr/share/dotnet && sudo rm -rf /opt/ghc && sudo rm -rf "/usr/local/share/boost" && sudo rm -rf "$AGENT_TOOLSDIRECTORY"
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
         flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
 #    - name: Test with pytest
 #      run: |
+#        pytest

.github/workflows/ci-release.yml ADDED Viewed

	@@ -0,0 +1,80 @@

+name: Build release
+on:
+  workflow_dispatch:
+  push:
+    tags:
+      - 'v*'
+concurrency:
+  group: docker
+  cancel-in-progress: true
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+      - name: Set up Python 3.9
+        uses: actions/setup-python@v4
+        with:
+          python-version: "3.9"
+          cache: 'pip'
+      - name: Cleanup more disk space
+        run: sudo rm -rf /usr/share/dotnet && sudo rm -rf /opt/ghc && sudo rm -rf "/usr/local/share/boost" && sudo rm -rf "$AGENT_TOOLSDIRECTORY"
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install --upgrade flake8 pytest pycodestyle
+          if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
+      - name: Lint with flake8
+        run: |
+          # stop the build if there are Python syntax errors or undefined names
+          flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
+          # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
+          flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
+    #    - name: Test with pytest
+    #      run: |
+    #        pytest
+      - name: Build and Publish to PyPI
+        uses: conchylicultor/pypi-build-publish@v1
+        with:
+          pypi-token: ${{ secrets.PYPI_API_TOKEN }}
+  docker-build:
+    needs: [build]
+    runs-on: ubuntu-latest
+    steps:
+      - name: Set tags
+        id: set_tags
+        run: |
+          DOCKER_IMAGE=lfoppiano/document-insights-qa
+          VERSION=""
+          if [[ $GITHUB_REF == refs/tags/v* ]]; then
+            VERSION=${GITHUB_REF#refs/tags/v}
+          fi
+          if [[ $VERSION =~ ^[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$ ]]; then
+            TAGS="${VERSION}"
+          else
+            TAGS="latest"
+          fi
+          echo "TAGS=${TAGS}"
+          echo ::set-output name=tags::${TAGS}
+      - name: Create more disk space
+        run: sudo rm -rf /usr/share/dotnet && sudo rm -rf /opt/ghc && sudo rm -rf "/usr/local/share/boost" && sudo rm -rf "$AGENT_TOOLSDIRECTORY"
+      - uses: actions/checkout@v2
+      - name: Build and push
+        id: docker_build
+        uses: mr-smithers-excellent/docker-build-push@v5
+        with:
+          username: ${{ secrets.DOCKERHUB_USERNAME }}
+          password: ${{ secrets.DOCKERHUB_TOKEN }}
+          image: lfoppiano/document-insights-qa
+          registry: docker.io
+          pushImage: ${{ github.event_name != 'pull_request' }}
+          tags: ${{ steps.set_tags.outputs.tags }}
+      - name: Image digest
+        run: echo ${{ steps.docker_build.outputs.digest }}

CHANGELOG.md ADDED Viewed

	@@ -0,0 +1,18 @@

+# Changelog
+All notable changes to this project will be documented in this file.
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
+## [0.0.1] – 2023-05-13
+### Added
++ Kick off application
++ Support for GPT-3.5
++ Support for Mistral + SentenceTransformer
++ Streamlit application
++ Docker image
++ pypi package
+<!-- markdownlint-disable-file MD024 MD033 -->

README.md CHANGED Viewed

@@ -43,6 +43,22 @@ Allow to change the number of embedding chunks that are considered for respondin
 By default, the mode is set to LLM (Language Model) which enables question/answering. You can directly ask questions related to the document content, and the system will answer the question using content from the document.
 If you switch the mode to "Embedding," the system will return specific chunks from the document that are semantically related to your query. This mode helps to test why sometimes the answers are not satisfying or incomplete.
 ## Acknolwedgement

 By default, the mode is set to LLM (Language Model) which enables question/answering. You can directly ask questions related to the document content, and the system will answer the question using content from the document.
 If you switch the mode to "Embedding," the system will return specific chunks from the document that are semantically related to your query. This mode helps to test why sometimes the answers are not satisfying or incomplete.
+## Development notes
+To release a new version:
+- `bump-my-version bump patch`
+- `git push --tags
+To use docker:
+- docker run `lfoppiano/document-insights-qa:latest`
+To install the library with Pypi:
+- `pip install document-qa-engine`
 ## Acknolwedgement

document_qa_engine.py → document_qa/document_qa_engine.py RENAMED Viewed

File without changes

grobid_processors.py → document_qa/grobid_processors.py RENAMED Viewed

File without changes

pyproject.toml ADDED Viewed

	@@ -0,0 +1,35 @@

+[build-system]
+requires = ["setuptools", "setuptools-scm"]
+build-backend = "setuptools.build_meta"
+[tool.bumpversion]
+current_version = "0.0.1"
+commit = "true"
+tag = "true"
+tag_name = "v{new_version}"
+#[[tool.bumpversion.files]]
+#filename = "version.txt"
+#search = "{current_version}"
+#replace = "{new_version}"
+[project]
+name = "document-qa-engine"
+license = { file = "LICENSE" }
+authors = [
+    { name = "Luca Foppiano", email = "[email protected]" },
+]
+maintainers = [
+    { name = "Luca Foppiano", email = "[email protected]" }
+]
+description = "Scientific Document Insight Q/A"
+readme = "README.md"
+dynamic = ['version']
+[tool.setuptools_scm]
+[project.urls]
+Homepage = "https://document-insights.streamlit.app"
+Repository = "https://github.com/lfoppiano/document-qa"
+Changelog = "https://github.com/lfoppiano/document-qa/blob/main/CHANGELOG.md"

pytest.ini ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ [pytest]
2	+ testpaths = tests

requirements.txt CHANGED Viewed

@@ -1,15 +1,13 @@
 # Grobid
-grobid-quantities-client
-grobid-client-python
-grobid_tei_xml
 # Utils
 tqdm
-textdistance[extras]
-pyyaml
-dateparser
 pytest
-streamlit
 lxml
 Beautifulsoup4
 python-dotenv
@@ -17,11 +15,12 @@ watchdog
 # LLM
 chromadb==0.3.25
-tiktoken
-openai
-langchain==0.0.244
-promptlayer
-typing-inspect==0.8.0
-typing_extensions==4.5.0
-pydantic==1.10.8
-sentence_transformers

 # Grobid
+grobid-quantities-client==0.4.0
+grobid-client-python==0.0.5
+grobid_tei_xml==0.1.3
 # Utils
 tqdm
+pyyaml==6.0
 pytest
+streamlit==1.27.1
 lxml
 Beautifulsoup4
 python-dotenv
 # LLM
 chromadb==0.3.25
+tiktoken==0.4.0
+openai==0.27.7
+langchain==0.0.314
+promptlayer==0.2.4
+typing-inspect==0.9.0
+typing_extensions==4.8.0
+pydantic==2.4.2
+sentence_transformers==2.2.2
+bump-my-version

streamlit_app.py CHANGED Viewed

@@ -13,8 +13,8 @@ import streamlit as st
 from langchain.chat_models import PromptLayerChatOpenAI
 from langchain.embeddings import OpenAIEmbeddings, HuggingFaceEmbeddings
-from document_qa_engine import DocumentQAEngine
-from grobid_processors import GrobidAggregationProcessor, decorate_text_with_annotations
 from grobid_client_generic import GrobidClientGeneric
 if 'rqa' not in st.session_state:

 from langchain.chat_models import PromptLayerChatOpenAI
 from langchain.embeddings import OpenAIEmbeddings, HuggingFaceEmbeddings
+from document_qa.document_qa_engine import DocumentQAEngine
+from document_qa.grobid_processors import GrobidAggregationProcessor, decorate_text_with_annotations
 from grobid_client_generic import GrobidClientGeneric
 if 'rqa' not in st.session_state: