aiflows
/

VectorStoreFlowModule

Model card Files Files and versions Community

nbaldwin commited on Nov 24, 2023

Commit

c415e05

1 Parent(s): 46d0705

demo + readme

Browse files

Files changed (10) hide show

.gitignore +443 -1
ChromaDBFlow.py +58 -2
ChromaDBFlow.yaml +3 -3
README.md +219 -15
VectorStoreFlow.py +71 -0
VectorStoreFlow.yaml +3 -3
demo.yaml +85 -0
pip_requirements.py +0 -1
pip_requirements.txt +3 -0
run.py +108 -0

.gitignore CHANGED Viewed

	@@ -1 +1,443 @@
1	- __pycache__/*

+# Created by https://www.toptal.com/developers/gitignore/api/python,java,c++,pycharm,visualstudiocode,macos,linux,windows
+# Edit at https://www.toptal.com/developers/gitignore?templates=python,java,c++,pycharm,visualstudiocode,macos,linux,windows
+### C++ ###
+# Prerequisites
+*.d
+# Compiled Object files
+*.slo
+*.lo
+*.o
+*.obj
+# Precompiled Headers
+*.gch
+*.pch
+# Compiled Dynamic libraries
+*.so
+*.dylib
+*.dll
+# Fortran module files
+*.mod
+*.smod
+# Compiled Static libraries
+*.lai
+*.la
+*.a
+*.lib
+# Executables
+*.exe
+*.out
+*.app
+### Java ###
+# Compiled class file
+*.class
+# Log file
+*.log
+# BlueJ files
+*.ctxt
+# Mobile Tools for Java (J2ME)
+.mtj.tmp/
+# Package Files #
+*.jar
+*.war
+*.nar
+*.ear
+*.zip
+*.tar.gz
+*.rar
+# virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
+hs_err_pid*
+replay_pid*
+### Linux ###
+*~
+# temporary files which can be created if a process still has a handle open of a deleted file
+.fuse_hidden*
+# KDE directory preferences
+.directory
+# Linux trash folder which might appear on any partition or disk
+.Trash-*
+# .nfs files are created when an open file is removed but is still being accessed
+.nfs*
+### macOS ###
+# General
+.DS_Store
+.AppleDouble
+.LSOverride
+# Icon must end with two \r
+Icon
+# Thumbnails
+._*
+# Files that might appear in the root of a volume
+.DocumentRevisions-V100
+.fseventsd
+.Spotlight-V100
+.TemporaryItems
+.Trashes
+.VolumeIcon.icns
+.com.apple.timemachine.donotpresent
+# Directories potentially created on remote AFP share
+.AppleDB
+.AppleDesktop
+Network Trash Folder
+Temporary Items
+.apdisk
+### macOS Patch ###
+# iCloud generated files
+*.icloud
+### PyCharm ###
+# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
+# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
+# User-specific stuff
+.idea/**/workspace.xml
+.idea/**/tasks.xml
+.idea/**/usage.statistics.xml
+.idea/**/dictionaries
+.idea/**/shelf
+# AWS User-specific
+.idea/**/aws.xml
+# Generated files
+.idea/**/contentModel.xml
+# Sensitive or high-churn files
+.idea/**/dataSources/
+.idea/**/dataSources.ids
+.idea/**/dataSources.local.xml
+.idea/**/sqlDataSources.xml
+.idea/**/dynamic.xml
+.idea/**/uiDesigner.xml
+.idea/**/dbnavigator.xml
+# Gradle
+.idea/**/gradle.xml
+.idea/**/libraries
+# Gradle and Maven with auto-import
+# When using Gradle or Maven with auto-import, you should exclude module files,
+# since they will be recreated, and may cause churn.  Uncomment if using
+# auto-import.
+# .idea/artifacts
+# .idea/compiler.xml
+# .idea/jarRepositories.xml
+# .idea/modules.xml
+# .idea/*.iml
+# .idea/modules
+# *.iml
+# *.ipr
+# CMake
+cmake-build-*/
+# Mongo Explorer plugin
+.idea/**/mongoSettings.xml
+# File-based project format
+*.iws
+# IntelliJ
+out/
+# mpeltonen/sbt-idea plugin
+.idea_modules/
+# JIRA plugin
+atlassian-ide-plugin.xml
+# Cursive Clojure plugin
+.idea/replstate.xml
+# SonarLint plugin
+.idea/sonarlint/
+# Crashlytics plugin (for Android Studio and IntelliJ)
+com_crashlytics_export_strings.xml
+crashlytics.properties
+crashlytics-build.properties
+fabric.properties
+# Editor-based Rest Client
+.idea/httpRequests
+# Android studio 3.1+ serialized cache file
+.idea/caches/build_file_checksums.ser
+### PyCharm Patch ###
+# Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721
+# *.iml
+# modules.xml
+# .idea/misc.xml
+# *.ipr
+# Sonarlint plugin
+# https://plugins.jetbrains.com/plugin/7973-sonarlint
+.idea/**/sonarlint/
+# SonarQube Plugin
+# https://plugins.jetbrains.com/plugin/7238-sonarqube-community-plugin
+.idea/**/sonarIssues.xml
+# Markdown Navigator plugin
+# https://plugins.jetbrains.com/plugin/7896-markdown-navigator-enhanced
+.idea/**/markdown-navigator.xml
+.idea/**/markdown-navigator-enh.xml
+.idea/**/markdown-navigator/
+# Cache file creation bug
+# See https://youtrack.jetbrains.com/issue/JBR-2257
+.idea/$CACHE_FILE$
+# CodeStream plugin
+# https://plugins.jetbrains.com/plugin/12206-codestream
+.idea/codestream.xml
+# Azure Toolkit for IntelliJ plugin
+# https://plugins.jetbrains.com/plugin/8053-azure-toolkit-for-intellij
+.idea/**/azureSettings.xml
+### Python ###
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+### Python Patch ###
+# Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
+poetry.toml
+# ruff
+.ruff_cache/
+# LSP config files
+pyrightconfig.json
+### VisualStudioCode ###
+.vscode/*
+!.vscode/settings.json
+!.vscode/tasks.json
+!.vscode/launch.json
+!.vscode/extensions.json
+!.vscode/*.code-snippets
+# Local History for Visual Studio Code
+.history/
+# Built Visual Studio Code Extensions
+*.vsix
+### VisualStudioCode Patch ###
+# Ignore all local history of files
+.history
+.ionide
+### Windows ###
+# Windows thumbnail cache files
+Thumbs.db
+Thumbs.db:encryptable
+ehthumbs.db
+ehthumbs_vista.db
+# Dump file
+*.stackdump
+# Folder config file
+[Dd]esktop.ini
+# Recycle Bin used on file shares
+$RECYCLE.BIN/
+# Windows Installer files
+*.cab
+*.msi
+*.msix
+*.msm
+*.msp
+# Windows shortcuts
+*.lnk
+# End of https://www.toptal.com/developers/gitignore/api/python,java,c++,pycharm,visualstudiocode,macos,linux,windows
+.*
+flow_modules/

ChromaDBFlow.py CHANGED Viewed

@@ -12,7 +12,33 @@ from flows.base_flows import AtomicFlow
 import hydra
 class ChromaDBFlow(AtomicFlow):
     def __init__(self, backend,**kwargs):
         super().__init__(**kwargs)
         self.client = ChromaClient()
@@ -21,6 +47,13 @@ class ChromaDBFlow(AtomicFlow):
     @classmethod
     def _set_up_backend(cls, config):
         kwargs = {}
         kwargs["backend"] = \
@@ -30,6 +63,13 @@ class ChromaDBFlow(AtomicFlow):
     @classmethod
     def instantiate_from_config(cls, config):
         flow_config = deepcopy(config)
         kwargs = {"flow_config": flow_config}
@@ -41,13 +81,29 @@ class ChromaDBFlow(AtomicFlow):
         return cls(**kwargs)
     def get_input_keys(self) -> List[str]:
         return self.flow_config["input_keys"]
     def get_output_keys(self) -> List[str]:
         return self.flow_config["output_keys"]
     def run(self, input_data: Dict[str, Any]) -> Dict[str, Any]:
         api_information = self.backend.get_key()
         if api_information.backend_used == "openai":

 import hydra
 class ChromaDBFlow(AtomicFlow):
+    """ A flow that uses the ChromaDB model to write and read memories stored in a database
+    *Configuration Parameters*:
+    - `name` (str): The name of the flow. Default: "chroma_db"
+    - `description` (str): A description of the flow. This description is used to generate the help message of the flow.
+    Default: "ChromaDB is a document store that uses vector embeddings to store and retrieve documents."
+    - `backend` (Dict[str, Any]): The configuration of the backend which is used to fetch api keys. Default: LiteLLMBackend with the
+    default parameters of LiteLLMBackend (see flows.backends.LiteLLMBackend). Except for the following parameter whose default value is overwritten:
+        - `api_infos` (List[Dict[str, Any]]): The list of api infos. Default: No default value, this parameter is required.
+        - `model_name` (str): The name of the model. Default: "". In the current implementation, this parameter is not used.
+    - `n_results` (int): The number of results to retrieve when reading from the database. Default: 5
+    - Other parameters are inherited from the default configuration of AtomicFlow (see AtomicFlow)
+    *Input Interface*:
+    - `operation` (str): The operation to perform. It can be "write" or "read".
+    - `content` (str or List[str]): The content to write or read. If operation is "write", it must be a string or a list of strings. If operation is "read", it must be a string.
+    *Output Interface*:
+    - `retrieved` (str or List[str]): The retrieved content. If operation is "write", it is an empty string. If operation is "read", it is a string or a list of strings.
+    :param backend: The backend of the flow (used to retrieve the API key)
+    :type backend: LiteLLMBackend
+    :param \**kwargs: Additional arguments to pass to the flow.
+    """
     def __init__(self, backend,**kwargs):
         super().__init__(**kwargs)
         self.client = ChromaClient()
     @classmethod
     def _set_up_backend(cls, config):
+        """ This instantiates the backend of the flow from a configuration file.
+        :param config: The configuration of the backend.
+        :type config: Dict[str, Any]
+        :return: The backend of the flow.
+        :rtype: Dict[str, LiteLLMBackend]
+        """
         kwargs = {}
         kwargs["backend"] = \
     @classmethod
     def instantiate_from_config(cls, config):
+        """ This method instantiates the flow from a configuration file
+        :param config: The configuration of the flow.
+        :type config: Dict[str, Any]
+        :return: The instantiated flow.
+        :rtype: ChromaDBFlow
+        """
         flow_config = deepcopy(config)
         kwargs = {"flow_config": flow_config}
         return cls(**kwargs)
     def get_input_keys(self) -> List[str]:
+        """ This method returns the input keys of the flow.
+        :return: The input keys of the flow.
+        :rtype: List[str]
+        """
         return self.flow_config["input_keys"]
     def get_output_keys(self) -> List[str]:
+        """ This method returns the output keys of the flow.
+        :return: The output keys of the flow.
+        :rtype: List[str]
+        """
         return self.flow_config["output_keys"]
     def run(self, input_data: Dict[str, Any]) -> Dict[str, Any]:
+        """ This method runs the flow. It runs the ChromaDBFlow. It either writes or reads memories from the database.
+        :param input_data: The input data of the flow.
+        :type input_data: Dict[str, Any]
+        :return: The output data of the flow.
+        :rtype: Dict[str, Any]
+        """
         api_information = self.backend.get_key()
         if api_information.backend_used == "openai":

ChromaDBFlow.yaml CHANGED Viewed

@@ -4,11 +4,11 @@ description: ChromaDB is a document store that uses vector embeddings to store a
 backend:
   _target_: flows.backends.llm_lite.LiteLLMBackend
   api_infos: ???
-input_keys:
   - operation
   - content
-output_keys:
   - retrieved
 n_results: 5 # number of results to retrieve when query

 backend:
   _target_: flows.backends.llm_lite.LiteLLMBackend
   api_infos: ???
+  model_name: "" #Not used in current implementation
+input_interface:
   - operation
   - content
+output_interface:
   - retrieved
 n_results: 5 # number of results to retrieve when query

README.md CHANGED Viewed

@@ -1,25 +1,229 @@
----
-license: mit
----
-## Description
-ToDo
-&lt; Flow description &gt;
-## Configuration parameters
-&lt; Name 1 &gt; (&lt; Type 1 &gt;): &lt; Description 1 &gt;. Required parameter.
-&lt; Name 2 &gt; (&lt; Type 2 &gt;): &lt; Description 2 &gt;. Default value is: &lt; value 2 &gt;
-## Input interface
-&lt; Name 1 &gt; (&lt; Type 1 &gt;): &lt; Description 1 &gt;.
-(Note that the interface might depend on the state of the Flow.)
-## Output interface
-&lt; Name 1 &gt; (&lt; Type 1 &gt;): &lt; Description 1 &gt;.
-(Note that the interface might depend on the state of the Flow.)

+# Table of Contents
+* [run](#run)
+* [ChromaDBFlow](#ChromaDBFlow)
+  * [ChromaDBFlow](#ChromaDBFlow.ChromaDBFlow)
+    * [instantiate\_from\_config](#ChromaDBFlow.ChromaDBFlow.instantiate_from_config)
+    * [get\_input\_keys](#ChromaDBFlow.ChromaDBFlow.get_input_keys)
+    * [get\_output\_keys](#ChromaDBFlow.ChromaDBFlow.get_output_keys)
+    * [run](#ChromaDBFlow.ChromaDBFlow.run)
+* [VectorStoreFlow](#VectorStoreFlow)
+  * [VectorStoreFlow](#VectorStoreFlow.VectorStoreFlow)
+    * [instantiate\_from\_config](#VectorStoreFlow.VectorStoreFlow.instantiate_from_config)
+    * [package\_documents](#VectorStoreFlow.VectorStoreFlow.package_documents)
+    * [run](#VectorStoreFlow.VectorStoreFlow.run)
+* [\_\_init\_\_](#__init__)
+<a id="run"></a>
+# run
+<a id="ChromaDBFlow"></a>
+# ChromaDBFlow
+<a id="ChromaDBFlow.ChromaDBFlow"></a>
+## ChromaDBFlow Objects
+```python
+class ChromaDBFlow(AtomicFlow)
+```
+A flow that uses the ChromaDB model to write and read memories stored in a database
+*Configuration Parameters*:
+- `name` (str): The name of the flow. Default: "chroma_db"
+- `description` (str): A description of the flow. This description is used to generate the help message of the flow.
+Default: "ChromaDB is a document store that uses vector embeddings to store and retrieve documents."
+- `backend` (Dict[str, Any]): The configuration of the backend which is used to fetch api keys. Default: LiteLLMBackend with the
+default parameters of LiteLLMBackend (see flows.backends.LiteLLMBackend). Except for the following parameter whose default value is overwritten:
+    - `api_infos` (List[Dict[str, Any]]): The list of api infos. Default: No default value, this parameter is required.
+    - `model_name` (str): The name of the model. Default: "". In the current implementation, this parameter is not used.
+- `n_results` (int): The number of results to retrieve when reading from the database. Default: 5
+- Other parameters are inherited from the default configuration of AtomicFlow (see AtomicFlow)
+*Input Interface*:
+- `operation` (str): The operation to perform. It can be "write" or "read".
+- `content` (str or List[str]): The content to write or read. If operation is "write", it must be a string or a list of strings. If operation is "read", it must be a string.
+*Output Interface*:
+- `retrieved` (str or List[str]): The retrieved content. If operation is "write", it is an empty string. If operation is "read", it is a string or a list of strings.
+**Arguments**:
+- `backend` (`LiteLLMBackend`): The backend of the flow (used to retrieve the API key)
+- `\**kwargs`: Additional arguments to pass to the flow.
+<a id="ChromaDBFlow.ChromaDBFlow.instantiate_from_config"></a>
+#### instantiate\_from\_config
+```python
+@classmethod
+def instantiate_from_config(cls, config)
+```
+This method instantiates the flow from a configuration file
+**Arguments**:
+- `config` (`Dict[str, Any]`): The configuration of the flow.
+**Returns**:
+`ChromaDBFlow`: The instantiated flow.
+<a id="ChromaDBFlow.ChromaDBFlow.get_input_keys"></a>
+#### get\_input\_keys
+```python
+def get_input_keys() -> List[str]
+```
+This method returns the input keys of the flow.
+**Returns**:
+`List[str]`: The input keys of the flow.
+<a id="ChromaDBFlow.ChromaDBFlow.get_output_keys"></a>
+#### get\_output\_keys
+```python
+def get_output_keys() -> List[str]
+```
+This method returns the output keys of the flow.
+**Returns**:
+`List[str]`: The output keys of the flow.
+<a id="ChromaDBFlow.ChromaDBFlow.run"></a>
+#### run
+```python
+def run(input_data: Dict[str, Any]) -> Dict[str, Any]
+```
+This method runs the flow. It runs the ChromaDBFlow. It either writes or reads memories from the database.
+**Arguments**:
+- `input_data` (`Dict[str, Any]`): The input data of the flow.
+**Returns**:
+`Dict[str, Any]`: The output data of the flow.
+<a id="VectorStoreFlow"></a>
+# VectorStoreFlow
+<a id="VectorStoreFlow.VectorStoreFlow"></a>
+## VectorStoreFlow Objects
+```python
+class VectorStoreFlow(AtomicFlow)
+```
+A flow that uses the VectorStore model to write and read memories stored in a database (see VectorStoreFlow.yaml for the default configuration)
+*Configuration Parameters*:
+- `name` (str): The name of the flow. Default: "VecotrStoreFlow"
+- `description` (str): A description of the flow. This description is used to generate the help message of the flow.
+Default: "VectorStoreFlow"
+- `backend` (Dict[str, Any]): The configuration of the backend which is used to fetch api keys. Default: LiteLLMBackend with the
+default parameters of LiteLLMBackend (see flows.backends.LiteLLMBackend). Except for the following parameter whose default value is overwritten:
+    - `api_infos` (List[Dict[str, Any]]): The list of api infos. Default: No default value, this parameter is required.
+    - `model_name` (str): The name of the model. Default: "". In the current implementation, this parameter is not used.
+- `type` (str): The type of the vector store. It can be "chroma" or "faiss". Default: "chroma"
+- `embedding_size` (int): The size of the embeddings (only for faiss). Default: 1536
+- `retriever_config` (Dict[str, Any]): The configuration of the retriever. Default: empty dictionary
+- Other parameters are inherited from the default configuration of AtomicFlow (see AtomicFlow)
+*Input Interface*:
+- `operation` (str): The operation to perform. It can be "write" or "read".
+- `content` (str or List[str]): The content to write or read. If operation is "write", it must be a string or a list of strings. If operation is "read", it must be a string.
+*Output Interface*:
+- `retrieved` (str or List[str]): The retrieved content. If operation is "write", it is an empty string. If operation is "read", it is a string or a list of strings.
+**Arguments**:
+- `backend` (`LiteLLMBackend`): The backend of the flow (used to retrieve the API key)
+- `vector_db` (`VectorStoreRetriever`): The vector store retriever
+- `type` (`str`): The type of the vector store
+- `\**kwargs`: Additional arguments to pass to the flow. See :class:`flows.base_flows.AtomicFlow` for more details.
+<a id="VectorStoreFlow.VectorStoreFlow.instantiate_from_config"></a>
+#### instantiate\_from\_config
+```python
+@classmethod
+def instantiate_from_config(cls, config: Dict[str, Any])
+```
+This method instantiates the flow from a configuration file
+**Arguments**:
+- `config` (`Dict[str, Any]`): The configuration of the flow.
+**Returns**:
+`VectorStoreFlow`: The instantiated flow.
+<a id="VectorStoreFlow.VectorStoreFlow.package_documents"></a>
+#### package\_documents
+```python
+@staticmethod
+def package_documents(documents: List[str]) -> List[Document]
+```
+This method packages the documents in a list of Documents.
+**Arguments**:
+- `documents` (`List[str]`): The documents to package.
+**Returns**:
+`List[Document]`: The packaged documents.
+<a id="VectorStoreFlow.VectorStoreFlow.run"></a>
+#### run
+```python
+def run(input_data: Dict[str, Any]) -> Dict[str, Any]
+```
+This method runs the flow. It either writes or reads memories from the database.
+**Arguments**:
+- `input_data` (`Dict[str, Any]`): The input data of the flow.
+**Returns**:
+`Dict[str, Any]`: The output data of the flow.
+<a id="__init__"></a>
+# \_\_init\_\_

VectorStoreFlow.py CHANGED Viewed

@@ -14,6 +14,39 @@ import hydra
 class VectorStoreFlow(AtomicFlow):
     REQUIRED_KEYS_CONFIG = ["type"]
     vector_db: VectorStoreRetriever
@@ -21,10 +54,18 @@ class VectorStoreFlow(AtomicFlow):
     def __init__(self, backend,vector_db, **kwargs):
         super().__init__(**kwargs)
         self.vector_db = vector_db
     @classmethod
     def _set_up_backend(cls, config):
         kwargs = {}
         kwargs["backend"] = \
@@ -35,6 +76,15 @@ class VectorStoreFlow(AtomicFlow):
     @classmethod
     def _set_up_retriever(cls, api_information,config: Dict[str, Any]) -> Dict[str, Any]:
         embeddings = OpenAIEmbeddings(openai_api_key=api_information.api_key)
         kwargs = {}
@@ -60,6 +110,13 @@ class VectorStoreFlow(AtomicFlow):
     @classmethod
     def instantiate_from_config(cls, config: Dict[str, Any]):
         flow_config = deepcopy(config)
         kwargs = {"flow_config": flow_config}
@@ -74,10 +131,24 @@ class VectorStoreFlow(AtomicFlow):
     @staticmethod
     def package_documents(documents: List[str]) -> List[Document]:
         # TODO(yeeef): support metadata
         return [Document(page_content=doc, metadata={"": ""}) for doc in documents]
     def run(self, input_data: Dict[str, Any]) -> Dict[str, Any]:
         response = {}
         operation = input_data["operation"]

 class VectorStoreFlow(AtomicFlow):
+    """ A flow that uses the VectorStore model to write and read memories stored in a database (see VectorStoreFlow.yaml for the default configuration)
+    *Configuration Parameters*:
+    - `name` (str): The name of the flow. Default: "VecotrStoreFlow"
+    - `description` (str): A description of the flow. This description is used to generate the help message of the flow.
+    Default: "VectorStoreFlow"
+    - `backend` (Dict[str, Any]): The configuration of the backend which is used to fetch api keys. Default: LiteLLMBackend with the
+    default parameters of LiteLLMBackend (see flows.backends.LiteLLMBackend). Except for the following parameter whose default value is overwritten:
+        - `api_infos` (List[Dict[str, Any]]): The list of api infos. Default: No default value, this parameter is required.
+        - `model_name` (str): The name of the model. Default: "". In the current implementation, this parameter is not used.
+    - `type` (str): The type of the vector store. It can be "chroma" or "faiss". Default: "chroma"
+    - `embedding_size` (int): The size of the embeddings (only for faiss). Default: 1536
+    - `retriever_config` (Dict[str, Any]): The configuration of the retriever. Default: empty dictionary
+    - Other parameters are inherited from the default configuration of AtomicFlow (see AtomicFlow)
+    *Input Interface*:
+    - `operation` (str): The operation to perform. It can be "write" or "read".
+    - `content` (str or List[str]): The content to write or read. If operation is "write", it must be a string or a list of strings. If operation is "read", it must be a string.
+    *Output Interface*:
+    - `retrieved` (str or List[str]): The retrieved content. If operation is "write", it is an empty string. If operation is "read", it is a string or a list of strings.
+    :param backend: The backend of the flow (used to retrieve the API key)
+    :type backend: LiteLLMBackend
+    :param vector_db: The vector store retriever
+    :type vector_db: VectorStoreRetriever
+    :param type: The type of the vector store
+    :type type: str
+    :param \**kwargs: Additional arguments to pass to the flow. See :class:`flows.base_flows.AtomicFlow` for more details.
+    """
     REQUIRED_KEYS_CONFIG = ["type"]
     vector_db: VectorStoreRetriever
     def __init__(self, backend,vector_db, **kwargs):
         super().__init__(**kwargs)
         self.vector_db = vector_db
+        self.backend = backend
     @classmethod
     def _set_up_backend(cls, config):
+        """ This instantiates the backend of the flow from a configuration file.
+        :param config: The configuration of the backend.
+        :type config: Dict[str, Any]
+        :return: The backend of the flow.
+        :rtype: Dict[str, LiteLLMBackend]
+        """
         kwargs = {}
         kwargs["backend"] = \
     @classmethod
     def _set_up_retriever(cls, api_information,config: Dict[str, Any]) -> Dict[str, Any]:
+        """ This method sets up the retriever of the vector store retriever.
+        :param config: The configuration of the vector store retriever.
+        :type config: Dict[str, Any]
+        :param api_information: The api information of the vector store retriever.
+        :type api_information: ApiInfo
+        :return: The vector store retriever.
+        :rtype: Dict[str, VectorStoreRetriever]
+        """
         embeddings = OpenAIEmbeddings(openai_api_key=api_information.api_key)
         kwargs = {}
     @classmethod
     def instantiate_from_config(cls, config: Dict[str, Any]):
+        """ This method instantiates the flow from a configuration file
+        :param config: The configuration of the flow.
+        :type config: Dict[str, Any]
+        :return: The instantiated flow.
+        :rtype: VectorStoreFlow
+        """
         flow_config = deepcopy(config)
         kwargs = {"flow_config": flow_config}
     @staticmethod
     def package_documents(documents: List[str]) -> List[Document]:
+        """ This method packages the documents in a list of Documents.
+        :param documents: The documents to package.
+        :type documents: List[str]
+        :return: The packaged documents.
+        :rtype: List[Document]
+        """
         # TODO(yeeef): support metadata
         return [Document(page_content=doc, metadata={"": ""}) for doc in documents]
     def run(self, input_data: Dict[str, Any]) -> Dict[str, Any]:
+        """ This method runs the flow. It either writes or reads memories from the database.
+        :param input_data: The input data of the flow.
+        :type input_data: Dict[str, Any]
+        :return: The output data of the flow.
+        :rtype: Dict[str, Any]
+        """
         response = {}
         operation = input_data["operation"]

VectorStoreFlow.yaml CHANGED Viewed

@@ -3,7 +3,8 @@ description: "VectorStoreFlow"
 backend:
   _target_: flows.backends.llm_lite.LiteLLMBackend
-  api_infos: ?
 input_keys:
   - "operation" # read or write
@@ -13,7 +14,6 @@ output_keys:
   - "retrieved"
 type: "chroma"
-api_keys:
-  openai: "YOUR_OPENAI_API_KEY"

 backend:
   _target_: flows.backends.llm_lite.LiteLLMBackend
+  api_infos: ???
+  model_name: "" #Not used in current implementation
 input_keys:
   - "operation" # read or write
   - "retrieved"
 type: "chroma"

demo.yaml ADDED Viewed

	@@ -0,0 +1,85 @@

+chroma_demo_flow:
+  input_interface:
+    - "operation"
+    - "content"
+  output_interface:
+    - "retrieved"
+  _target_: flows.base_flows.SequentialFlow.instantiate_from_default_config
+  name: "demoChromaDBFlow"
+  description: "An example flow of how to read and writed in a ChromaDBFlowModule."
+  subflows_config:
+    chroma_db:
+      input_interface:
+        _target_: flows.interfaces.KeyInterface
+        keys_to_select: ["operation","content"]
+      _target_: aiflows.VectorStoreFlowModule.ChromaDBFlow.instantiate_from_default_config
+      backend:
+        _target_: flows.backends.llm_lite.LiteLLMBackend
+        api_infos: ???
+        model_name: "" #Not used in current implementation
+      n_results: 1 # number of results to retrieve when query
+  topology:
+    - goal: Write content to the ChromaDB
+      input_interface:
+        _target_: flows.interfaces.KeyInterface
+        keys_to_select: ["operation","content"]
+      flow: chroma_db
+      output_interface:
+        _target_: flows.interfaces.KeyInterface
+        keys_to_set:
+            operation: "read"
+        keys_to_rename:
+            retrieved: content
+        keys_to_select: ["operation","content"]
+    - goal: Read content from the ChromaDB
+      input_interface:
+        _target_: flows.interfaces.KeyInterface
+        keys_to_select: ["operation","content"]
+      flow: chroma_db
+      output_interface:
+        _target_: flows.interfaces.KeyInterface
+        keys_to_select: ["retrieved"]
+vector_store_demo_flow:
+  input_interface:
+      - "operation"
+      - "content"
+  output_interface:
+    - "retrieved"
+  name: "demoVectorStoreFlow"
+  description: "An example flow of how to read and write in a VectorStoreFlowModule."
+  _target_: flows.base_flows.SequentialFlow.instantiate_from_default_config
+  subflows_config:
+    vs_db:
+      _target_: aiflows.VectorStoreFlowModule.VectorStoreFlow.instantiate_from_default_config
+      backend:
+        _target_: flows.backends.llm_lite.LiteLLMBackend
+        api_infos: ???
+        model_name: "" #Not used in current implementation
+  topology:
+    - goal: Write content to the VectorStore
+      input_interface:
+        _target_: flows.interfaces.KeyInterface
+        keys_to_select: ["operation","content"]
+      flow: vs_db
+      output_interface:
+        _target_: flows.interfaces.KeyInterface
+        keys_to_set:
+            operation: "read"
+        keys_to_rename:
+            retrieved: content
+        keys_to_select: ["operation","content"]
+    - goal: Read content from the VectorStore
+      input_interface:
+        _target_: flows.interfaces.KeyInterface
+        keys_to_select: ["operation","content"]
+      flow: vs_db
+      output_interface:
+        _target_: flows.interfaces.KeyInterface
+        keys_to_select: ["retrieved"]

pip_requirements.py DELETED Viewed

	@@ -1 +0,0 @@
1	- # ToDo

pip_requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+langchain==0.0.336
+chromadb==0.3.29
+faiss-cpu==1.7.4

run.py ADDED Viewed

	@@ -0,0 +1,108 @@

+import os
+import hydra
+import flows
+from flows.flow_launchers import FlowLauncher
+from flows.utils.general_helpers import read_yaml_file
+from flows.backends.api_info import ApiInfo
+from flows import logging
+from flows.flow_cache import CACHING_PARAMETERS, clear_cache
+CACHING_PARAMETERS.do_caching = False  # Set to True to enable caching
+# clear_cache() # Uncomment this line to clear the cache
+logging.set_verbosity_debug()
+dependencies = [
+    {"url": "aiflows/VectorStoreFlowModule", "revision": os.getcwd()},
+]
+from flows import flow_verse
+flow_verse.sync_dependencies(dependencies)
+if __name__ == "__main__":
+    # OpenAI backend
+    api_information = [ApiInfo(backend_used="openai",
+                              api_key = os.getenv("OPENAI_API_KEY"))]
+    # Azure backend
+    # api_information = ApiInfo(backend_used = "azure",
+    #                           api_base = os.getenv("AZURE_API_BASE"),
+    #                           api_key = os.getenv("AZURE_OPENAI_KEY"),
+    #                           api_version =  os.getenv("AZURE_API_VERSION") )
+    root_dir = "."
+    cfg_path = os.path.join(root_dir, "demo.yaml")
+    cfg = read_yaml_file(cfg_path)
+    cfg["vector_store_demo_flow"]["subflows_config"]["vs_db"]["backend"]["api_infos"] = api_information
+    cfg["chroma_demo_flow"]["subflows_config"]["chroma_db"]["backend"]["api_infos"] = api_information
+    # ~~~ Get the data ~~~
+    # This can be a list of samples
+    data = {"id": 0, "operation": "write", "content": "demo of writing"}  # Add your data here
+    # ~~~ Run inference ~~~
+    path_to_output_file = None
+    # path_to_output_file = "output.jsonl"  # Uncomment this line to save the output to disk
+    #### CHROMA DEMO ####
+    ### DUMBY DEMO OF WRITING  "demo of writing" AND READIN ""  (Nothing)###
+    print("DEMO: ChromaDBFlow")
+    flow_with_interfaces_chroma = {
+        "flow": hydra.utils.instantiate(cfg['chroma_demo_flow'], _recursive_=False, _convert_="partial"),
+        "input_interface": (
+            None
+            if getattr(cfg, "input_interface", None) is None
+            else hydra.utils.instantiate(cfg['input_interface'], _recursive_=False)
+        ),
+        "output_interface": (
+            None
+            if getattr(cfg, "output_interface", None) is None
+            else hydra.utils.instantiate(cfg['output_interface'], _recursive_=False)
+        ),
+    }
+    _, outputs = FlowLauncher.launch(
+        flow_with_interfaces=flow_with_interfaces_chroma,
+        data=data,
+        path_to_output_file=path_to_output_file,
+    )
+    # ~~~ Print the output ~~~
+    flow_output_data = outputs[0]
+    print(flow_output_data)
+    #### END CHROM DEMO ####
+    #### VECTOR STORE DEMO ####
+    print("DEMO: VECTOR STORE DEMO")
+    flow_with_interfaces_vstore = {
+        "flow": hydra.utils.instantiate(cfg['vector_store_demo_flow'], _recursive_=False, _convert_="partial"),
+        "input_interface": (
+            None
+            if getattr(cfg, "input_interface", None) is None
+            else hydra.utils.instantiate(cfg['input_interface'], _recursive_=False)
+        ),
+        "output_interface": (
+            None
+            if getattr(cfg, "output_interface", None) is None
+            else hydra.utils.instantiate(cfg['output_interface'], _recursive_=False)
+        ),
+    }
+    _, outputs = FlowLauncher.launch(
+        flow_with_interfaces=flow_with_interfaces_vstore,
+        data=data,
+        path_to_output_file=path_to_output_file,
+    )
+    # ~~~ Print the output ~~~
+    flow_output_data = outputs[0]
+    print(flow_output_data)