demo + readme
Browse files- .gitignore +443 -1
- ChromaDBFlow.py +58 -2
- ChromaDBFlow.yaml +3 -3
- README.md +219 -15
- VectorStoreFlow.py +71 -0
- VectorStoreFlow.yaml +3 -3
- demo.yaml +85 -0
- pip_requirements.py +0 -1
- pip_requirements.txt +3 -0
- run.py +108 -0
.gitignore
CHANGED
@@ -1 +1,443 @@
|
|
1 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Created by https://www.toptal.com/developers/gitignore/api/python,java,c++,pycharm,visualstudiocode,macos,linux,windows
|
2 |
+
# Edit at https://www.toptal.com/developers/gitignore?templates=python,java,c++,pycharm,visualstudiocode,macos,linux,windows
|
3 |
+
|
4 |
+
### C++ ###
|
5 |
+
# Prerequisites
|
6 |
+
*.d
|
7 |
+
|
8 |
+
# Compiled Object files
|
9 |
+
*.slo
|
10 |
+
*.lo
|
11 |
+
*.o
|
12 |
+
*.obj
|
13 |
+
|
14 |
+
# Precompiled Headers
|
15 |
+
*.gch
|
16 |
+
*.pch
|
17 |
+
|
18 |
+
# Compiled Dynamic libraries
|
19 |
+
*.so
|
20 |
+
*.dylib
|
21 |
+
*.dll
|
22 |
+
|
23 |
+
# Fortran module files
|
24 |
+
*.mod
|
25 |
+
*.smod
|
26 |
+
|
27 |
+
# Compiled Static libraries
|
28 |
+
*.lai
|
29 |
+
*.la
|
30 |
+
*.a
|
31 |
+
*.lib
|
32 |
+
|
33 |
+
# Executables
|
34 |
+
*.exe
|
35 |
+
*.out
|
36 |
+
*.app
|
37 |
+
|
38 |
+
### Java ###
|
39 |
+
# Compiled class file
|
40 |
+
*.class
|
41 |
+
|
42 |
+
# Log file
|
43 |
+
*.log
|
44 |
+
|
45 |
+
# BlueJ files
|
46 |
+
*.ctxt
|
47 |
+
|
48 |
+
# Mobile Tools for Java (J2ME)
|
49 |
+
.mtj.tmp/
|
50 |
+
|
51 |
+
# Package Files #
|
52 |
+
*.jar
|
53 |
+
*.war
|
54 |
+
*.nar
|
55 |
+
*.ear
|
56 |
+
*.zip
|
57 |
+
*.tar.gz
|
58 |
+
*.rar
|
59 |
+
|
60 |
+
# virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
|
61 |
+
hs_err_pid*
|
62 |
+
replay_pid*
|
63 |
+
|
64 |
+
### Linux ###
|
65 |
+
*~
|
66 |
+
|
67 |
+
# temporary files which can be created if a process still has a handle open of a deleted file
|
68 |
+
.fuse_hidden*
|
69 |
+
|
70 |
+
# KDE directory preferences
|
71 |
+
.directory
|
72 |
+
|
73 |
+
# Linux trash folder which might appear on any partition or disk
|
74 |
+
.Trash-*
|
75 |
+
|
76 |
+
# .nfs files are created when an open file is removed but is still being accessed
|
77 |
+
.nfs*
|
78 |
+
|
79 |
+
### macOS ###
|
80 |
+
# General
|
81 |
+
.DS_Store
|
82 |
+
.AppleDouble
|
83 |
+
.LSOverride
|
84 |
+
|
85 |
+
# Icon must end with two \r
|
86 |
+
Icon
|
87 |
+
|
88 |
+
|
89 |
+
# Thumbnails
|
90 |
+
._*
|
91 |
+
|
92 |
+
# Files that might appear in the root of a volume
|
93 |
+
.DocumentRevisions-V100
|
94 |
+
.fseventsd
|
95 |
+
.Spotlight-V100
|
96 |
+
.TemporaryItems
|
97 |
+
.Trashes
|
98 |
+
.VolumeIcon.icns
|
99 |
+
.com.apple.timemachine.donotpresent
|
100 |
+
|
101 |
+
# Directories potentially created on remote AFP share
|
102 |
+
.AppleDB
|
103 |
+
.AppleDesktop
|
104 |
+
Network Trash Folder
|
105 |
+
Temporary Items
|
106 |
+
.apdisk
|
107 |
+
|
108 |
+
### macOS Patch ###
|
109 |
+
# iCloud generated files
|
110 |
+
*.icloud
|
111 |
+
|
112 |
+
### PyCharm ###
|
113 |
+
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
|
114 |
+
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
|
115 |
+
|
116 |
+
# User-specific stuff
|
117 |
+
.idea/**/workspace.xml
|
118 |
+
.idea/**/tasks.xml
|
119 |
+
.idea/**/usage.statistics.xml
|
120 |
+
.idea/**/dictionaries
|
121 |
+
.idea/**/shelf
|
122 |
+
|
123 |
+
# AWS User-specific
|
124 |
+
.idea/**/aws.xml
|
125 |
+
|
126 |
+
# Generated files
|
127 |
+
.idea/**/contentModel.xml
|
128 |
+
|
129 |
+
# Sensitive or high-churn files
|
130 |
+
.idea/**/dataSources/
|
131 |
+
.idea/**/dataSources.ids
|
132 |
+
.idea/**/dataSources.local.xml
|
133 |
+
.idea/**/sqlDataSources.xml
|
134 |
+
.idea/**/dynamic.xml
|
135 |
+
.idea/**/uiDesigner.xml
|
136 |
+
.idea/**/dbnavigator.xml
|
137 |
+
|
138 |
+
# Gradle
|
139 |
+
.idea/**/gradle.xml
|
140 |
+
.idea/**/libraries
|
141 |
+
|
142 |
+
# Gradle and Maven with auto-import
|
143 |
+
# When using Gradle or Maven with auto-import, you should exclude module files,
|
144 |
+
# since they will be recreated, and may cause churn. Uncomment if using
|
145 |
+
# auto-import.
|
146 |
+
# .idea/artifacts
|
147 |
+
# .idea/compiler.xml
|
148 |
+
# .idea/jarRepositories.xml
|
149 |
+
# .idea/modules.xml
|
150 |
+
# .idea/*.iml
|
151 |
+
# .idea/modules
|
152 |
+
# *.iml
|
153 |
+
# *.ipr
|
154 |
+
|
155 |
+
# CMake
|
156 |
+
cmake-build-*/
|
157 |
+
|
158 |
+
# Mongo Explorer plugin
|
159 |
+
.idea/**/mongoSettings.xml
|
160 |
+
|
161 |
+
# File-based project format
|
162 |
+
*.iws
|
163 |
+
|
164 |
+
# IntelliJ
|
165 |
+
out/
|
166 |
+
|
167 |
+
# mpeltonen/sbt-idea plugin
|
168 |
+
.idea_modules/
|
169 |
+
|
170 |
+
# JIRA plugin
|
171 |
+
atlassian-ide-plugin.xml
|
172 |
+
|
173 |
+
# Cursive Clojure plugin
|
174 |
+
.idea/replstate.xml
|
175 |
+
|
176 |
+
# SonarLint plugin
|
177 |
+
.idea/sonarlint/
|
178 |
+
|
179 |
+
# Crashlytics plugin (for Android Studio and IntelliJ)
|
180 |
+
com_crashlytics_export_strings.xml
|
181 |
+
crashlytics.properties
|
182 |
+
crashlytics-build.properties
|
183 |
+
fabric.properties
|
184 |
+
|
185 |
+
# Editor-based Rest Client
|
186 |
+
.idea/httpRequests
|
187 |
+
|
188 |
+
# Android studio 3.1+ serialized cache file
|
189 |
+
.idea/caches/build_file_checksums.ser
|
190 |
+
|
191 |
+
### PyCharm Patch ###
|
192 |
+
# Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721
|
193 |
+
|
194 |
+
# *.iml
|
195 |
+
# modules.xml
|
196 |
+
# .idea/misc.xml
|
197 |
+
# *.ipr
|
198 |
+
|
199 |
+
# Sonarlint plugin
|
200 |
+
# https://plugins.jetbrains.com/plugin/7973-sonarlint
|
201 |
+
.idea/**/sonarlint/
|
202 |
+
|
203 |
+
# SonarQube Plugin
|
204 |
+
# https://plugins.jetbrains.com/plugin/7238-sonarqube-community-plugin
|
205 |
+
.idea/**/sonarIssues.xml
|
206 |
+
|
207 |
+
# Markdown Navigator plugin
|
208 |
+
# https://plugins.jetbrains.com/plugin/7896-markdown-navigator-enhanced
|
209 |
+
.idea/**/markdown-navigator.xml
|
210 |
+
.idea/**/markdown-navigator-enh.xml
|
211 |
+
.idea/**/markdown-navigator/
|
212 |
+
|
213 |
+
# Cache file creation bug
|
214 |
+
# See https://youtrack.jetbrains.com/issue/JBR-2257
|
215 |
+
.idea/$CACHE_FILE$
|
216 |
+
|
217 |
+
# CodeStream plugin
|
218 |
+
# https://plugins.jetbrains.com/plugin/12206-codestream
|
219 |
+
.idea/codestream.xml
|
220 |
+
|
221 |
+
# Azure Toolkit for IntelliJ plugin
|
222 |
+
# https://plugins.jetbrains.com/plugin/8053-azure-toolkit-for-intellij
|
223 |
+
.idea/**/azureSettings.xml
|
224 |
+
|
225 |
+
### Python ###
|
226 |
+
# Byte-compiled / optimized / DLL files
|
227 |
+
__pycache__/
|
228 |
+
*.py[cod]
|
229 |
+
*$py.class
|
230 |
+
|
231 |
+
# C extensions
|
232 |
+
|
233 |
+
# Distribution / packaging
|
234 |
+
.Python
|
235 |
+
build/
|
236 |
+
develop-eggs/
|
237 |
+
dist/
|
238 |
+
downloads/
|
239 |
+
eggs/
|
240 |
+
.eggs/
|
241 |
+
lib/
|
242 |
+
lib64/
|
243 |
+
parts/
|
244 |
+
sdist/
|
245 |
+
var/
|
246 |
+
wheels/
|
247 |
+
share/python-wheels/
|
248 |
+
*.egg-info/
|
249 |
+
.installed.cfg
|
250 |
+
*.egg
|
251 |
+
MANIFEST
|
252 |
+
|
253 |
+
# PyInstaller
|
254 |
+
# Usually these files are written by a python script from a template
|
255 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
256 |
+
*.manifest
|
257 |
+
*.spec
|
258 |
+
|
259 |
+
# Installer logs
|
260 |
+
pip-log.txt
|
261 |
+
pip-delete-this-directory.txt
|
262 |
+
|
263 |
+
# Unit test / coverage reports
|
264 |
+
htmlcov/
|
265 |
+
.tox/
|
266 |
+
.nox/
|
267 |
+
.coverage
|
268 |
+
.coverage.*
|
269 |
+
.cache
|
270 |
+
nosetests.xml
|
271 |
+
coverage.xml
|
272 |
+
*.cover
|
273 |
+
*.py,cover
|
274 |
+
.hypothesis/
|
275 |
+
.pytest_cache/
|
276 |
+
cover/
|
277 |
+
|
278 |
+
# Translations
|
279 |
+
*.mo
|
280 |
+
*.pot
|
281 |
+
|
282 |
+
# Django stuff:
|
283 |
+
local_settings.py
|
284 |
+
db.sqlite3
|
285 |
+
db.sqlite3-journal
|
286 |
+
|
287 |
+
# Flask stuff:
|
288 |
+
instance/
|
289 |
+
.webassets-cache
|
290 |
+
|
291 |
+
# Scrapy stuff:
|
292 |
+
.scrapy
|
293 |
+
|
294 |
+
# Sphinx documentation
|
295 |
+
docs/_build/
|
296 |
+
|
297 |
+
# PyBuilder
|
298 |
+
.pybuilder/
|
299 |
+
target/
|
300 |
+
|
301 |
+
# Jupyter Notebook
|
302 |
+
.ipynb_checkpoints
|
303 |
+
|
304 |
+
# IPython
|
305 |
+
profile_default/
|
306 |
+
ipython_config.py
|
307 |
+
|
308 |
+
# pyenv
|
309 |
+
# For a library or package, you might want to ignore these files since the code is
|
310 |
+
# intended to run in multiple environments; otherwise, check them in:
|
311 |
+
# .python-version
|
312 |
+
|
313 |
+
# pipenv
|
314 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
315 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
316 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
317 |
+
# install all needed dependencies.
|
318 |
+
#Pipfile.lock
|
319 |
+
|
320 |
+
# poetry
|
321 |
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
322 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
323 |
+
# commonly ignored for libraries.
|
324 |
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
325 |
+
#poetry.lock
|
326 |
+
|
327 |
+
# pdm
|
328 |
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
329 |
+
#pdm.lock
|
330 |
+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
331 |
+
# in version control.
|
332 |
+
# https://pdm.fming.dev/#use-with-ide
|
333 |
+
.pdm.toml
|
334 |
+
|
335 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
336 |
+
__pypackages__/
|
337 |
+
|
338 |
+
# Celery stuff
|
339 |
+
celerybeat-schedule
|
340 |
+
celerybeat.pid
|
341 |
+
|
342 |
+
# SageMath parsed files
|
343 |
+
*.sage.py
|
344 |
+
|
345 |
+
# Environments
|
346 |
+
.env
|
347 |
+
.venv
|
348 |
+
env/
|
349 |
+
venv/
|
350 |
+
ENV/
|
351 |
+
env.bak/
|
352 |
+
venv.bak/
|
353 |
+
|
354 |
+
# Spyder project settings
|
355 |
+
.spyderproject
|
356 |
+
.spyproject
|
357 |
+
|
358 |
+
# Rope project settings
|
359 |
+
.ropeproject
|
360 |
+
|
361 |
+
# mkdocs documentation
|
362 |
+
/site
|
363 |
+
|
364 |
+
# mypy
|
365 |
+
.mypy_cache/
|
366 |
+
.dmypy.json
|
367 |
+
dmypy.json
|
368 |
+
|
369 |
+
# Pyre type checker
|
370 |
+
.pyre/
|
371 |
+
|
372 |
+
# pytype static type analyzer
|
373 |
+
.pytype/
|
374 |
+
|
375 |
+
# Cython debug symbols
|
376 |
+
cython_debug/
|
377 |
+
|
378 |
+
# PyCharm
|
379 |
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
380 |
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
381 |
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
382 |
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
383 |
+
#.idea/
|
384 |
+
|
385 |
+
### Python Patch ###
|
386 |
+
# Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
|
387 |
+
poetry.toml
|
388 |
+
|
389 |
+
# ruff
|
390 |
+
.ruff_cache/
|
391 |
+
|
392 |
+
# LSP config files
|
393 |
+
pyrightconfig.json
|
394 |
+
|
395 |
+
### VisualStudioCode ###
|
396 |
+
.vscode/*
|
397 |
+
!.vscode/settings.json
|
398 |
+
!.vscode/tasks.json
|
399 |
+
!.vscode/launch.json
|
400 |
+
!.vscode/extensions.json
|
401 |
+
!.vscode/*.code-snippets
|
402 |
+
|
403 |
+
# Local History for Visual Studio Code
|
404 |
+
.history/
|
405 |
+
|
406 |
+
# Built Visual Studio Code Extensions
|
407 |
+
*.vsix
|
408 |
+
|
409 |
+
### VisualStudioCode Patch ###
|
410 |
+
# Ignore all local history of files
|
411 |
+
.history
|
412 |
+
.ionide
|
413 |
+
|
414 |
+
### Windows ###
|
415 |
+
# Windows thumbnail cache files
|
416 |
+
Thumbs.db
|
417 |
+
Thumbs.db:encryptable
|
418 |
+
ehthumbs.db
|
419 |
+
ehthumbs_vista.db
|
420 |
+
|
421 |
+
# Dump file
|
422 |
+
*.stackdump
|
423 |
+
|
424 |
+
# Folder config file
|
425 |
+
[Dd]esktop.ini
|
426 |
+
|
427 |
+
# Recycle Bin used on file shares
|
428 |
+
$RECYCLE.BIN/
|
429 |
+
|
430 |
+
# Windows Installer files
|
431 |
+
*.cab
|
432 |
+
*.msi
|
433 |
+
*.msix
|
434 |
+
*.msm
|
435 |
+
*.msp
|
436 |
+
|
437 |
+
# Windows shortcuts
|
438 |
+
*.lnk
|
439 |
+
|
440 |
+
# End of https://www.toptal.com/developers/gitignore/api/python,java,c++,pycharm,visualstudiocode,macos,linux,windows
|
441 |
+
|
442 |
+
.*
|
443 |
+
flow_modules/
|
ChromaDBFlow.py
CHANGED
@@ -12,7 +12,33 @@ from flows.base_flows import AtomicFlow
|
|
12 |
import hydra
|
13 |
|
14 |
class ChromaDBFlow(AtomicFlow):
|
15 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
def __init__(self, backend,**kwargs):
|
17 |
super().__init__(**kwargs)
|
18 |
self.client = ChromaClient()
|
@@ -21,6 +47,13 @@ class ChromaDBFlow(AtomicFlow):
|
|
21 |
|
22 |
@classmethod
|
23 |
def _set_up_backend(cls, config):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
kwargs = {}
|
25 |
|
26 |
kwargs["backend"] = \
|
@@ -30,6 +63,13 @@ class ChromaDBFlow(AtomicFlow):
|
|
30 |
|
31 |
@classmethod
|
32 |
def instantiate_from_config(cls, config):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
flow_config = deepcopy(config)
|
34 |
|
35 |
kwargs = {"flow_config": flow_config}
|
@@ -41,13 +81,29 @@ class ChromaDBFlow(AtomicFlow):
|
|
41 |
return cls(**kwargs)
|
42 |
|
43 |
def get_input_keys(self) -> List[str]:
|
|
|
|
|
|
|
|
|
|
|
44 |
return self.flow_config["input_keys"]
|
45 |
|
46 |
def get_output_keys(self) -> List[str]:
|
|
|
|
|
|
|
|
|
|
|
47 |
return self.flow_config["output_keys"]
|
48 |
|
49 |
def run(self, input_data: Dict[str, Any]) -> Dict[str, Any]:
|
50 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
api_information = self.backend.get_key()
|
52 |
|
53 |
if api_information.backend_used == "openai":
|
|
|
12 |
import hydra
|
13 |
|
14 |
class ChromaDBFlow(AtomicFlow):
|
15 |
+
""" A flow that uses the ChromaDB model to write and read memories stored in a database
|
16 |
+
|
17 |
+
*Configuration Parameters*:
|
18 |
+
|
19 |
+
- `name` (str): The name of the flow. Default: "chroma_db"
|
20 |
+
- `description` (str): A description of the flow. This description is used to generate the help message of the flow.
|
21 |
+
Default: "ChromaDB is a document store that uses vector embeddings to store and retrieve documents."
|
22 |
+
- `backend` (Dict[str, Any]): The configuration of the backend which is used to fetch api keys. Default: LiteLLMBackend with the
|
23 |
+
default parameters of LiteLLMBackend (see flows.backends.LiteLLMBackend). Except for the following parameter whose default value is overwritten:
|
24 |
+
- `api_infos` (List[Dict[str, Any]]): The list of api infos. Default: No default value, this parameter is required.
|
25 |
+
- `model_name` (str): The name of the model. Default: "". In the current implementation, this parameter is not used.
|
26 |
+
- `n_results` (int): The number of results to retrieve when reading from the database. Default: 5
|
27 |
+
- Other parameters are inherited from the default configuration of AtomicFlow (see AtomicFlow)
|
28 |
+
|
29 |
+
*Input Interface*:
|
30 |
+
|
31 |
+
- `operation` (str): The operation to perform. It can be "write" or "read".
|
32 |
+
- `content` (str or List[str]): The content to write or read. If operation is "write", it must be a string or a list of strings. If operation is "read", it must be a string.
|
33 |
+
|
34 |
+
*Output Interface*:
|
35 |
+
|
36 |
+
- `retrieved` (str or List[str]): The retrieved content. If operation is "write", it is an empty string. If operation is "read", it is a string or a list of strings.
|
37 |
+
|
38 |
+
:param backend: The backend of the flow (used to retrieve the API key)
|
39 |
+
:type backend: LiteLLMBackend
|
40 |
+
:param \**kwargs: Additional arguments to pass to the flow.
|
41 |
+
"""
|
42 |
def __init__(self, backend,**kwargs):
|
43 |
super().__init__(**kwargs)
|
44 |
self.client = ChromaClient()
|
|
|
47 |
|
48 |
@classmethod
|
49 |
def _set_up_backend(cls, config):
|
50 |
+
""" This instantiates the backend of the flow from a configuration file.
|
51 |
+
|
52 |
+
:param config: The configuration of the backend.
|
53 |
+
:type config: Dict[str, Any]
|
54 |
+
:return: The backend of the flow.
|
55 |
+
:rtype: Dict[str, LiteLLMBackend]
|
56 |
+
"""
|
57 |
kwargs = {}
|
58 |
|
59 |
kwargs["backend"] = \
|
|
|
63 |
|
64 |
@classmethod
|
65 |
def instantiate_from_config(cls, config):
|
66 |
+
""" This method instantiates the flow from a configuration file
|
67 |
+
|
68 |
+
:param config: The configuration of the flow.
|
69 |
+
:type config: Dict[str, Any]
|
70 |
+
:return: The instantiated flow.
|
71 |
+
:rtype: ChromaDBFlow
|
72 |
+
"""
|
73 |
flow_config = deepcopy(config)
|
74 |
|
75 |
kwargs = {"flow_config": flow_config}
|
|
|
81 |
return cls(**kwargs)
|
82 |
|
83 |
def get_input_keys(self) -> List[str]:
|
84 |
+
""" This method returns the input keys of the flow.
|
85 |
+
|
86 |
+
:return: The input keys of the flow.
|
87 |
+
:rtype: List[str]
|
88 |
+
"""
|
89 |
return self.flow_config["input_keys"]
|
90 |
|
91 |
def get_output_keys(self) -> List[str]:
|
92 |
+
""" This method returns the output keys of the flow.
|
93 |
+
|
94 |
+
:return: The output keys of the flow.
|
95 |
+
:rtype: List[str]
|
96 |
+
"""
|
97 |
return self.flow_config["output_keys"]
|
98 |
|
99 |
def run(self, input_data: Dict[str, Any]) -> Dict[str, Any]:
|
100 |
+
""" This method runs the flow. It runs the ChromaDBFlow. It either writes or reads memories from the database.
|
101 |
+
|
102 |
+
:param input_data: The input data of the flow.
|
103 |
+
:type input_data: Dict[str, Any]
|
104 |
+
:return: The output data of the flow.
|
105 |
+
:rtype: Dict[str, Any]
|
106 |
+
"""
|
107 |
api_information = self.backend.get_key()
|
108 |
|
109 |
if api_information.backend_used == "openai":
|
ChromaDBFlow.yaml
CHANGED
@@ -4,11 +4,11 @@ description: ChromaDB is a document store that uses vector embeddings to store a
|
|
4 |
backend:
|
5 |
_target_: flows.backends.llm_lite.LiteLLMBackend
|
6 |
api_infos: ???
|
7 |
-
|
8 |
-
|
9 |
- operation
|
10 |
- content
|
11 |
-
|
12 |
- retrieved
|
13 |
|
14 |
n_results: 5 # number of results to retrieve when query
|
|
|
4 |
backend:
|
5 |
_target_: flows.backends.llm_lite.LiteLLMBackend
|
6 |
api_infos: ???
|
7 |
+
model_name: "" #Not used in current implementation
|
8 |
+
input_interface:
|
9 |
- operation
|
10 |
- content
|
11 |
+
output_interface:
|
12 |
- retrieved
|
13 |
|
14 |
n_results: 5 # number of results to retrieve when query
|
README.md
CHANGED
@@ -1,25 +1,229 @@
|
|
1 |
-
|
2 |
-
license: mit
|
3 |
-
---
|
4 |
-
## Description
|
5 |
-
ToDo
|
6 |
|
7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
|
9 |
-
|
10 |
|
11 |
-
|
12 |
|
13 |
-
|
14 |
|
15 |
-
|
16 |
|
17 |
-
|
18 |
|
19 |
-
|
20 |
|
21 |
-
|
|
|
|
|
22 |
|
23 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
|
25 |
-
(Note that the interface might depend on the state of the Flow.)
|
|
|
1 |
+
# Table of Contents
|
|
|
|
|
|
|
|
|
2 |
|
3 |
+
* [run](#run)
|
4 |
+
* [ChromaDBFlow](#ChromaDBFlow)
|
5 |
+
* [ChromaDBFlow](#ChromaDBFlow.ChromaDBFlow)
|
6 |
+
* [instantiate\_from\_config](#ChromaDBFlow.ChromaDBFlow.instantiate_from_config)
|
7 |
+
* [get\_input\_keys](#ChromaDBFlow.ChromaDBFlow.get_input_keys)
|
8 |
+
* [get\_output\_keys](#ChromaDBFlow.ChromaDBFlow.get_output_keys)
|
9 |
+
* [run](#ChromaDBFlow.ChromaDBFlow.run)
|
10 |
+
* [VectorStoreFlow](#VectorStoreFlow)
|
11 |
+
* [VectorStoreFlow](#VectorStoreFlow.VectorStoreFlow)
|
12 |
+
* [instantiate\_from\_config](#VectorStoreFlow.VectorStoreFlow.instantiate_from_config)
|
13 |
+
* [package\_documents](#VectorStoreFlow.VectorStoreFlow.package_documents)
|
14 |
+
* [run](#VectorStoreFlow.VectorStoreFlow.run)
|
15 |
+
* [\_\_init\_\_](#__init__)
|
16 |
|
17 |
+
<a id="run"></a>
|
18 |
|
19 |
+
# run
|
20 |
|
21 |
+
<a id="ChromaDBFlow"></a>
|
22 |
|
23 |
+
# ChromaDBFlow
|
24 |
|
25 |
+
<a id="ChromaDBFlow.ChromaDBFlow"></a>
|
26 |
|
27 |
+
## ChromaDBFlow Objects
|
28 |
|
29 |
+
```python
|
30 |
+
class ChromaDBFlow(AtomicFlow)
|
31 |
+
```
|
32 |
|
33 |
+
A flow that uses the ChromaDB model to write and read memories stored in a database
|
34 |
+
|
35 |
+
*Configuration Parameters*:
|
36 |
+
|
37 |
+
- `name` (str): The name of the flow. Default: "chroma_db"
|
38 |
+
- `description` (str): A description of the flow. This description is used to generate the help message of the flow.
|
39 |
+
Default: "ChromaDB is a document store that uses vector embeddings to store and retrieve documents."
|
40 |
+
- `backend` (Dict[str, Any]): The configuration of the backend which is used to fetch api keys. Default: LiteLLMBackend with the
|
41 |
+
default parameters of LiteLLMBackend (see flows.backends.LiteLLMBackend). Except for the following parameter whose default value is overwritten:
|
42 |
+
- `api_infos` (List[Dict[str, Any]]): The list of api infos. Default: No default value, this parameter is required.
|
43 |
+
- `model_name` (str): The name of the model. Default: "". In the current implementation, this parameter is not used.
|
44 |
+
- `n_results` (int): The number of results to retrieve when reading from the database. Default: 5
|
45 |
+
- Other parameters are inherited from the default configuration of AtomicFlow (see AtomicFlow)
|
46 |
+
|
47 |
+
*Input Interface*:
|
48 |
+
|
49 |
+
- `operation` (str): The operation to perform. It can be "write" or "read".
|
50 |
+
- `content` (str or List[str]): The content to write or read. If operation is "write", it must be a string or a list of strings. If operation is "read", it must be a string.
|
51 |
+
|
52 |
+
*Output Interface*:
|
53 |
+
|
54 |
+
- `retrieved` (str or List[str]): The retrieved content. If operation is "write", it is an empty string. If operation is "read", it is a string or a list of strings.
|
55 |
+
|
56 |
+
**Arguments**:
|
57 |
+
|
58 |
+
- `backend` (`LiteLLMBackend`): The backend of the flow (used to retrieve the API key)
|
59 |
+
- `\**kwargs`: Additional arguments to pass to the flow.
|
60 |
+
|
61 |
+
<a id="ChromaDBFlow.ChromaDBFlow.instantiate_from_config"></a>
|
62 |
+
|
63 |
+
#### instantiate\_from\_config
|
64 |
+
|
65 |
+
```python
|
66 |
+
@classmethod
|
67 |
+
def instantiate_from_config(cls, config)
|
68 |
+
```
|
69 |
+
|
70 |
+
This method instantiates the flow from a configuration file
|
71 |
+
|
72 |
+
**Arguments**:
|
73 |
+
|
74 |
+
- `config` (`Dict[str, Any]`): The configuration of the flow.
|
75 |
+
|
76 |
+
**Returns**:
|
77 |
+
|
78 |
+
`ChromaDBFlow`: The instantiated flow.
|
79 |
+
|
80 |
+
<a id="ChromaDBFlow.ChromaDBFlow.get_input_keys"></a>
|
81 |
+
|
82 |
+
#### get\_input\_keys
|
83 |
+
|
84 |
+
```python
|
85 |
+
def get_input_keys() -> List[str]
|
86 |
+
```
|
87 |
+
|
88 |
+
This method returns the input keys of the flow.
|
89 |
+
|
90 |
+
**Returns**:
|
91 |
+
|
92 |
+
`List[str]`: The input keys of the flow.
|
93 |
+
|
94 |
+
<a id="ChromaDBFlow.ChromaDBFlow.get_output_keys"></a>
|
95 |
+
|
96 |
+
#### get\_output\_keys
|
97 |
+
|
98 |
+
```python
|
99 |
+
def get_output_keys() -> List[str]
|
100 |
+
```
|
101 |
+
|
102 |
+
This method returns the output keys of the flow.
|
103 |
+
|
104 |
+
**Returns**:
|
105 |
+
|
106 |
+
`List[str]`: The output keys of the flow.
|
107 |
+
|
108 |
+
<a id="ChromaDBFlow.ChromaDBFlow.run"></a>
|
109 |
+
|
110 |
+
#### run
|
111 |
+
|
112 |
+
```python
|
113 |
+
def run(input_data: Dict[str, Any]) -> Dict[str, Any]
|
114 |
+
```
|
115 |
+
|
116 |
+
This method runs the flow. It runs the ChromaDBFlow. It either writes or reads memories from the database.
|
117 |
+
|
118 |
+
**Arguments**:
|
119 |
+
|
120 |
+
- `input_data` (`Dict[str, Any]`): The input data of the flow.
|
121 |
+
|
122 |
+
**Returns**:
|
123 |
+
|
124 |
+
`Dict[str, Any]`: The output data of the flow.
|
125 |
+
|
126 |
+
<a id="VectorStoreFlow"></a>
|
127 |
+
|
128 |
+
# VectorStoreFlow
|
129 |
+
|
130 |
+
<a id="VectorStoreFlow.VectorStoreFlow"></a>
|
131 |
+
|
132 |
+
## VectorStoreFlow Objects
|
133 |
+
|
134 |
+
```python
|
135 |
+
class VectorStoreFlow(AtomicFlow)
|
136 |
+
```
|
137 |
+
|
138 |
+
A flow that uses the VectorStore model to write and read memories stored in a database (see VectorStoreFlow.yaml for the default configuration)
|
139 |
+
|
140 |
+
*Configuration Parameters*:
|
141 |
+
|
142 |
+
- `name` (str): The name of the flow. Default: "VecotrStoreFlow"
|
143 |
+
- `description` (str): A description of the flow. This description is used to generate the help message of the flow.
|
144 |
+
Default: "VectorStoreFlow"
|
145 |
+
- `backend` (Dict[str, Any]): The configuration of the backend which is used to fetch api keys. Default: LiteLLMBackend with the
|
146 |
+
default parameters of LiteLLMBackend (see flows.backends.LiteLLMBackend). Except for the following parameter whose default value is overwritten:
|
147 |
+
- `api_infos` (List[Dict[str, Any]]): The list of api infos. Default: No default value, this parameter is required.
|
148 |
+
- `model_name` (str): The name of the model. Default: "". In the current implementation, this parameter is not used.
|
149 |
+
- `type` (str): The type of the vector store. It can be "chroma" or "faiss". Default: "chroma"
|
150 |
+
- `embedding_size` (int): The size of the embeddings (only for faiss). Default: 1536
|
151 |
+
- `retriever_config` (Dict[str, Any]): The configuration of the retriever. Default: empty dictionary
|
152 |
+
- Other parameters are inherited from the default configuration of AtomicFlow (see AtomicFlow)
|
153 |
+
|
154 |
+
*Input Interface*:
|
155 |
+
|
156 |
+
- `operation` (str): The operation to perform. It can be "write" or "read".
|
157 |
+
- `content` (str or List[str]): The content to write or read. If operation is "write", it must be a string or a list of strings. If operation is "read", it must be a string.
|
158 |
+
|
159 |
+
*Output Interface*:
|
160 |
+
|
161 |
+
- `retrieved` (str or List[str]): The retrieved content. If operation is "write", it is an empty string. If operation is "read", it is a string or a list of strings.
|
162 |
+
|
163 |
+
**Arguments**:
|
164 |
+
|
165 |
+
- `backend` (`LiteLLMBackend`): The backend of the flow (used to retrieve the API key)
|
166 |
+
- `vector_db` (`VectorStoreRetriever`): The vector store retriever
|
167 |
+
- `type` (`str`): The type of the vector store
|
168 |
+
- `\**kwargs`: Additional arguments to pass to the flow. See :class:`flows.base_flows.AtomicFlow` for more details.
|
169 |
+
|
170 |
+
<a id="VectorStoreFlow.VectorStoreFlow.instantiate_from_config"></a>
|
171 |
+
|
172 |
+
#### instantiate\_from\_config
|
173 |
+
|
174 |
+
```python
|
175 |
+
@classmethod
|
176 |
+
def instantiate_from_config(cls, config: Dict[str, Any])
|
177 |
+
```
|
178 |
+
|
179 |
+
This method instantiates the flow from a configuration file
|
180 |
+
|
181 |
+
**Arguments**:
|
182 |
+
|
183 |
+
- `config` (`Dict[str, Any]`): The configuration of the flow.
|
184 |
+
|
185 |
+
**Returns**:
|
186 |
+
|
187 |
+
`VectorStoreFlow`: The instantiated flow.
|
188 |
+
|
189 |
+
<a id="VectorStoreFlow.VectorStoreFlow.package_documents"></a>
|
190 |
+
|
191 |
+
#### package\_documents
|
192 |
+
|
193 |
+
```python
|
194 |
+
@staticmethod
|
195 |
+
def package_documents(documents: List[str]) -> List[Document]
|
196 |
+
```
|
197 |
+
|
198 |
+
This method packages the documents in a list of Documents.
|
199 |
+
|
200 |
+
**Arguments**:
|
201 |
+
|
202 |
+
- `documents` (`List[str]`): The documents to package.
|
203 |
+
|
204 |
+
**Returns**:
|
205 |
+
|
206 |
+
`List[Document]`: The packaged documents.
|
207 |
+
|
208 |
+
<a id="VectorStoreFlow.VectorStoreFlow.run"></a>
|
209 |
+
|
210 |
+
#### run
|
211 |
+
|
212 |
+
```python
|
213 |
+
def run(input_data: Dict[str, Any]) -> Dict[str, Any]
|
214 |
+
```
|
215 |
+
|
216 |
+
This method runs the flow. It either writes or reads memories from the database.
|
217 |
+
|
218 |
+
**Arguments**:
|
219 |
+
|
220 |
+
- `input_data` (`Dict[str, Any]`): The input data of the flow.
|
221 |
+
|
222 |
+
**Returns**:
|
223 |
+
|
224 |
+
`Dict[str, Any]`: The output data of the flow.
|
225 |
+
|
226 |
+
<a id="__init__"></a>
|
227 |
+
|
228 |
+
# \_\_init\_\_
|
229 |
|
|
VectorStoreFlow.py
CHANGED
@@ -14,6 +14,39 @@ import hydra
|
|
14 |
|
15 |
|
16 |
class VectorStoreFlow(AtomicFlow):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
REQUIRED_KEYS_CONFIG = ["type"]
|
18 |
|
19 |
vector_db: VectorStoreRetriever
|
@@ -21,10 +54,18 @@ class VectorStoreFlow(AtomicFlow):
|
|
21 |
def __init__(self, backend,vector_db, **kwargs):
|
22 |
super().__init__(**kwargs)
|
23 |
self.vector_db = vector_db
|
|
|
24 |
|
25 |
|
26 |
@classmethod
|
27 |
def _set_up_backend(cls, config):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
kwargs = {}
|
29 |
|
30 |
kwargs["backend"] = \
|
@@ -35,6 +76,15 @@ class VectorStoreFlow(AtomicFlow):
|
|
35 |
|
36 |
@classmethod
|
37 |
def _set_up_retriever(cls, api_information,config: Dict[str, Any]) -> Dict[str, Any]:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
|
39 |
embeddings = OpenAIEmbeddings(openai_api_key=api_information.api_key)
|
40 |
kwargs = {}
|
@@ -60,6 +110,13 @@ class VectorStoreFlow(AtomicFlow):
|
|
60 |
|
61 |
@classmethod
|
62 |
def instantiate_from_config(cls, config: Dict[str, Any]):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
63 |
flow_config = deepcopy(config)
|
64 |
|
65 |
kwargs = {"flow_config": flow_config}
|
@@ -74,10 +131,24 @@ class VectorStoreFlow(AtomicFlow):
|
|
74 |
|
75 |
@staticmethod
|
76 |
def package_documents(documents: List[str]) -> List[Document]:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
77 |
# TODO(yeeef): support metadata
|
78 |
return [Document(page_content=doc, metadata={"": ""}) for doc in documents]
|
79 |
|
80 |
def run(self, input_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
81 |
response = {}
|
82 |
|
83 |
operation = input_data["operation"]
|
|
|
14 |
|
15 |
|
16 |
class VectorStoreFlow(AtomicFlow):
|
17 |
+
""" A flow that uses the VectorStore model to write and read memories stored in a database (see VectorStoreFlow.yaml for the default configuration)
|
18 |
+
|
19 |
+
*Configuration Parameters*:
|
20 |
+
|
21 |
+
- `name` (str): The name of the flow. Default: "VecotrStoreFlow"
|
22 |
+
- `description` (str): A description of the flow. This description is used to generate the help message of the flow.
|
23 |
+
Default: "VectorStoreFlow"
|
24 |
+
- `backend` (Dict[str, Any]): The configuration of the backend which is used to fetch api keys. Default: LiteLLMBackend with the
|
25 |
+
default parameters of LiteLLMBackend (see flows.backends.LiteLLMBackend). Except for the following parameter whose default value is overwritten:
|
26 |
+
- `api_infos` (List[Dict[str, Any]]): The list of api infos. Default: No default value, this parameter is required.
|
27 |
+
- `model_name` (str): The name of the model. Default: "". In the current implementation, this parameter is not used.
|
28 |
+
- `type` (str): The type of the vector store. It can be "chroma" or "faiss". Default: "chroma"
|
29 |
+
- `embedding_size` (int): The size of the embeddings (only for faiss). Default: 1536
|
30 |
+
- `retriever_config` (Dict[str, Any]): The configuration of the retriever. Default: empty dictionary
|
31 |
+
- Other parameters are inherited from the default configuration of AtomicFlow (see AtomicFlow)
|
32 |
+
|
33 |
+
*Input Interface*:
|
34 |
+
|
35 |
+
- `operation` (str): The operation to perform. It can be "write" or "read".
|
36 |
+
- `content` (str or List[str]): The content to write or read. If operation is "write", it must be a string or a list of strings. If operation is "read", it must be a string.
|
37 |
+
|
38 |
+
*Output Interface*:
|
39 |
+
|
40 |
+
- `retrieved` (str or List[str]): The retrieved content. If operation is "write", it is an empty string. If operation is "read", it is a string or a list of strings.
|
41 |
+
|
42 |
+
:param backend: The backend of the flow (used to retrieve the API key)
|
43 |
+
:type backend: LiteLLMBackend
|
44 |
+
:param vector_db: The vector store retriever
|
45 |
+
:type vector_db: VectorStoreRetriever
|
46 |
+
:param type: The type of the vector store
|
47 |
+
:type type: str
|
48 |
+
:param \**kwargs: Additional arguments to pass to the flow. See :class:`flows.base_flows.AtomicFlow` for more details.
|
49 |
+
"""
|
50 |
REQUIRED_KEYS_CONFIG = ["type"]
|
51 |
|
52 |
vector_db: VectorStoreRetriever
|
|
|
54 |
def __init__(self, backend,vector_db, **kwargs):
|
55 |
super().__init__(**kwargs)
|
56 |
self.vector_db = vector_db
|
57 |
+
self.backend = backend
|
58 |
|
59 |
|
60 |
@classmethod
|
61 |
def _set_up_backend(cls, config):
|
62 |
+
""" This instantiates the backend of the flow from a configuration file.
|
63 |
+
|
64 |
+
:param config: The configuration of the backend.
|
65 |
+
:type config: Dict[str, Any]
|
66 |
+
:return: The backend of the flow.
|
67 |
+
:rtype: Dict[str, LiteLLMBackend]
|
68 |
+
"""
|
69 |
kwargs = {}
|
70 |
|
71 |
kwargs["backend"] = \
|
|
|
76 |
|
77 |
@classmethod
|
78 |
def _set_up_retriever(cls, api_information,config: Dict[str, Any]) -> Dict[str, Any]:
|
79 |
+
""" This method sets up the retriever of the vector store retriever.
|
80 |
+
|
81 |
+
:param config: The configuration of the vector store retriever.
|
82 |
+
:type config: Dict[str, Any]
|
83 |
+
:param api_information: The api information of the vector store retriever.
|
84 |
+
:type api_information: ApiInfo
|
85 |
+
:return: The vector store retriever.
|
86 |
+
:rtype: Dict[str, VectorStoreRetriever]
|
87 |
+
"""
|
88 |
|
89 |
embeddings = OpenAIEmbeddings(openai_api_key=api_information.api_key)
|
90 |
kwargs = {}
|
|
|
110 |
|
111 |
@classmethod
|
112 |
def instantiate_from_config(cls, config: Dict[str, Any]):
|
113 |
+
""" This method instantiates the flow from a configuration file
|
114 |
+
|
115 |
+
:param config: The configuration of the flow.
|
116 |
+
:type config: Dict[str, Any]
|
117 |
+
:return: The instantiated flow.
|
118 |
+
:rtype: VectorStoreFlow
|
119 |
+
"""
|
120 |
flow_config = deepcopy(config)
|
121 |
|
122 |
kwargs = {"flow_config": flow_config}
|
|
|
131 |
|
132 |
@staticmethod
|
133 |
def package_documents(documents: List[str]) -> List[Document]:
|
134 |
+
""" This method packages the documents in a list of Documents.
|
135 |
+
|
136 |
+
:param documents: The documents to package.
|
137 |
+
:type documents: List[str]
|
138 |
+
:return: The packaged documents.
|
139 |
+
:rtype: List[Document]
|
140 |
+
"""
|
141 |
# TODO(yeeef): support metadata
|
142 |
return [Document(page_content=doc, metadata={"": ""}) for doc in documents]
|
143 |
|
144 |
def run(self, input_data: Dict[str, Any]) -> Dict[str, Any]:
|
145 |
+
""" This method runs the flow. It either writes or reads memories from the database.
|
146 |
+
|
147 |
+
:param input_data: The input data of the flow.
|
148 |
+
:type input_data: Dict[str, Any]
|
149 |
+
:return: The output data of the flow.
|
150 |
+
:rtype: Dict[str, Any]
|
151 |
+
"""
|
152 |
response = {}
|
153 |
|
154 |
operation = input_data["operation"]
|
VectorStoreFlow.yaml
CHANGED
@@ -3,7 +3,8 @@ description: "VectorStoreFlow"
|
|
3 |
|
4 |
backend:
|
5 |
_target_: flows.backends.llm_lite.LiteLLMBackend
|
6 |
-
api_infos:
|
|
|
7 |
|
8 |
input_keys:
|
9 |
- "operation" # read or write
|
@@ -13,7 +14,6 @@ output_keys:
|
|
13 |
- "retrieved"
|
14 |
|
15 |
type: "chroma"
|
16 |
-
|
17 |
-
openai: "YOUR_OPENAI_API_KEY"
|
18 |
|
19 |
|
|
|
3 |
|
4 |
backend:
|
5 |
_target_: flows.backends.llm_lite.LiteLLMBackend
|
6 |
+
api_infos: ???
|
7 |
+
model_name: "" #Not used in current implementation
|
8 |
|
9 |
input_keys:
|
10 |
- "operation" # read or write
|
|
|
14 |
- "retrieved"
|
15 |
|
16 |
type: "chroma"
|
17 |
+
|
|
|
18 |
|
19 |
|
demo.yaml
ADDED
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
chroma_demo_flow:
|
2 |
+
input_interface:
|
3 |
+
- "operation"
|
4 |
+
- "content"
|
5 |
+
output_interface:
|
6 |
+
- "retrieved"
|
7 |
+
_target_: flows.base_flows.SequentialFlow.instantiate_from_default_config
|
8 |
+
name: "demoChromaDBFlow"
|
9 |
+
description: "An example flow of how to read and writed in a ChromaDBFlowModule."
|
10 |
+
subflows_config:
|
11 |
+
chroma_db:
|
12 |
+
input_interface:
|
13 |
+
_target_: flows.interfaces.KeyInterface
|
14 |
+
keys_to_select: ["operation","content"]
|
15 |
+
_target_: aiflows.VectorStoreFlowModule.ChromaDBFlow.instantiate_from_default_config
|
16 |
+
|
17 |
+
backend:
|
18 |
+
_target_: flows.backends.llm_lite.LiteLLMBackend
|
19 |
+
api_infos: ???
|
20 |
+
model_name: "" #Not used in current implementation
|
21 |
+
n_results: 1 # number of results to retrieve when query
|
22 |
+
topology:
|
23 |
+
- goal: Write content to the ChromaDB
|
24 |
+
input_interface:
|
25 |
+
_target_: flows.interfaces.KeyInterface
|
26 |
+
keys_to_select: ["operation","content"]
|
27 |
+
flow: chroma_db
|
28 |
+
output_interface:
|
29 |
+
_target_: flows.interfaces.KeyInterface
|
30 |
+
keys_to_set:
|
31 |
+
operation: "read"
|
32 |
+
keys_to_rename:
|
33 |
+
retrieved: content
|
34 |
+
keys_to_select: ["operation","content"]
|
35 |
+
|
36 |
+
- goal: Read content from the ChromaDB
|
37 |
+
input_interface:
|
38 |
+
_target_: flows.interfaces.KeyInterface
|
39 |
+
keys_to_select: ["operation","content"]
|
40 |
+
flow: chroma_db
|
41 |
+
output_interface:
|
42 |
+
_target_: flows.interfaces.KeyInterface
|
43 |
+
keys_to_select: ["retrieved"]
|
44 |
+
|
45 |
+
vector_store_demo_flow:
|
46 |
+
input_interface:
|
47 |
+
- "operation"
|
48 |
+
- "content"
|
49 |
+
output_interface:
|
50 |
+
- "retrieved"
|
51 |
+
name: "demoVectorStoreFlow"
|
52 |
+
description: "An example flow of how to read and write in a VectorStoreFlowModule."
|
53 |
+
_target_: flows.base_flows.SequentialFlow.instantiate_from_default_config
|
54 |
+
subflows_config:
|
55 |
+
|
56 |
+
vs_db:
|
57 |
+
_target_: aiflows.VectorStoreFlowModule.VectorStoreFlow.instantiate_from_default_config
|
58 |
+
backend:
|
59 |
+
_target_: flows.backends.llm_lite.LiteLLMBackend
|
60 |
+
api_infos: ???
|
61 |
+
model_name: "" #Not used in current implementation
|
62 |
+
|
63 |
+
topology:
|
64 |
+
- goal: Write content to the VectorStore
|
65 |
+
input_interface:
|
66 |
+
_target_: flows.interfaces.KeyInterface
|
67 |
+
keys_to_select: ["operation","content"]
|
68 |
+
flow: vs_db
|
69 |
+
output_interface:
|
70 |
+
_target_: flows.interfaces.KeyInterface
|
71 |
+
keys_to_set:
|
72 |
+
operation: "read"
|
73 |
+
keys_to_rename:
|
74 |
+
retrieved: content
|
75 |
+
keys_to_select: ["operation","content"]
|
76 |
+
|
77 |
+
- goal: Read content from the VectorStore
|
78 |
+
input_interface:
|
79 |
+
_target_: flows.interfaces.KeyInterface
|
80 |
+
keys_to_select: ["operation","content"]
|
81 |
+
flow: vs_db
|
82 |
+
output_interface:
|
83 |
+
_target_: flows.interfaces.KeyInterface
|
84 |
+
keys_to_select: ["retrieved"]
|
85 |
+
|
pip_requirements.py
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
# ToDo
|
|
|
|
pip_requirements.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
langchain==0.0.336
|
2 |
+
chromadb==0.3.29
|
3 |
+
faiss-cpu==1.7.4
|
run.py
ADDED
@@ -0,0 +1,108 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
import os
|
3 |
+
|
4 |
+
import hydra
|
5 |
+
|
6 |
+
import flows
|
7 |
+
from flows.flow_launchers import FlowLauncher
|
8 |
+
from flows.utils.general_helpers import read_yaml_file
|
9 |
+
from flows.backends.api_info import ApiInfo
|
10 |
+
from flows import logging
|
11 |
+
from flows.flow_cache import CACHING_PARAMETERS, clear_cache
|
12 |
+
|
13 |
+
CACHING_PARAMETERS.do_caching = False # Set to True to enable caching
|
14 |
+
# clear_cache() # Uncomment this line to clear the cache
|
15 |
+
|
16 |
+
logging.set_verbosity_debug()
|
17 |
+
|
18 |
+
dependencies = [
|
19 |
+
{"url": "aiflows/VectorStoreFlowModule", "revision": os.getcwd()},
|
20 |
+
]
|
21 |
+
from flows import flow_verse
|
22 |
+
flow_verse.sync_dependencies(dependencies)
|
23 |
+
|
24 |
+
if __name__ == "__main__":
|
25 |
+
|
26 |
+
# OpenAI backend
|
27 |
+
api_information = [ApiInfo(backend_used="openai",
|
28 |
+
api_key = os.getenv("OPENAI_API_KEY"))]
|
29 |
+
# Azure backend
|
30 |
+
# api_information = ApiInfo(backend_used = "azure",
|
31 |
+
# api_base = os.getenv("AZURE_API_BASE"),
|
32 |
+
# api_key = os.getenv("AZURE_OPENAI_KEY"),
|
33 |
+
# api_version = os.getenv("AZURE_API_VERSION") )
|
34 |
+
|
35 |
+
root_dir = "."
|
36 |
+
cfg_path = os.path.join(root_dir, "demo.yaml")
|
37 |
+
cfg = read_yaml_file(cfg_path)
|
38 |
+
|
39 |
+
cfg["vector_store_demo_flow"]["subflows_config"]["vs_db"]["backend"]["api_infos"] = api_information
|
40 |
+
cfg["chroma_demo_flow"]["subflows_config"]["chroma_db"]["backend"]["api_infos"] = api_information
|
41 |
+
|
42 |
+
|
43 |
+
# ~~~ Get the data ~~~
|
44 |
+
# This can be a list of samples
|
45 |
+
data = {"id": 0, "operation": "write", "content": "demo of writing"} # Add your data here
|
46 |
+
|
47 |
+
# ~~~ Run inference ~~~
|
48 |
+
path_to_output_file = None
|
49 |
+
# path_to_output_file = "output.jsonl" # Uncomment this line to save the output to disk
|
50 |
+
|
51 |
+
#### CHROMA DEMO ####
|
52 |
+
### DUMBY DEMO OF WRITING "demo of writing" AND READIN "" (Nothing)###
|
53 |
+
print("DEMO: ChromaDBFlow")
|
54 |
+
|
55 |
+
flow_with_interfaces_chroma = {
|
56 |
+
"flow": hydra.utils.instantiate(cfg['chroma_demo_flow'], _recursive_=False, _convert_="partial"),
|
57 |
+
"input_interface": (
|
58 |
+
None
|
59 |
+
if getattr(cfg, "input_interface", None) is None
|
60 |
+
else hydra.utils.instantiate(cfg['input_interface'], _recursive_=False)
|
61 |
+
),
|
62 |
+
"output_interface": (
|
63 |
+
None
|
64 |
+
if getattr(cfg, "output_interface", None) is None
|
65 |
+
else hydra.utils.instantiate(cfg['output_interface'], _recursive_=False)
|
66 |
+
),
|
67 |
+
}
|
68 |
+
|
69 |
+
_, outputs = FlowLauncher.launch(
|
70 |
+
flow_with_interfaces=flow_with_interfaces_chroma,
|
71 |
+
data=data,
|
72 |
+
path_to_output_file=path_to_output_file,
|
73 |
+
)
|
74 |
+
|
75 |
+
# ~~~ Print the output ~~~
|
76 |
+
flow_output_data = outputs[0]
|
77 |
+
print(flow_output_data)
|
78 |
+
|
79 |
+
#### END CHROM DEMO ####
|
80 |
+
|
81 |
+
#### VECTOR STORE DEMO ####
|
82 |
+
|
83 |
+
print("DEMO: VECTOR STORE DEMO")
|
84 |
+
|
85 |
+
flow_with_interfaces_vstore = {
|
86 |
+
"flow": hydra.utils.instantiate(cfg['vector_store_demo_flow'], _recursive_=False, _convert_="partial"),
|
87 |
+
"input_interface": (
|
88 |
+
None
|
89 |
+
if getattr(cfg, "input_interface", None) is None
|
90 |
+
else hydra.utils.instantiate(cfg['input_interface'], _recursive_=False)
|
91 |
+
),
|
92 |
+
"output_interface": (
|
93 |
+
None
|
94 |
+
if getattr(cfg, "output_interface", None) is None
|
95 |
+
else hydra.utils.instantiate(cfg['output_interface'], _recursive_=False)
|
96 |
+
),
|
97 |
+
}
|
98 |
+
|
99 |
+
|
100 |
+
_, outputs = FlowLauncher.launch(
|
101 |
+
flow_with_interfaces=flow_with_interfaces_vstore,
|
102 |
+
data=data,
|
103 |
+
path_to_output_file=path_to_output_file,
|
104 |
+
)
|
105 |
+
|
106 |
+
# ~~~ Print the output ~~~
|
107 |
+
flow_output_data = outputs[0]
|
108 |
+
print(flow_output_data)
|