-
Notifications
You must be signed in to change notification settings - Fork 276
Expand file tree
/
Copy pathrequirements.txt
More file actions
91 lines (80 loc) · 1.17 KB
/
requirements.txt
File metadata and controls
91 lines (80 loc) · 1.17 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
numpy<2.0.0
datasets
scipy
torch
torchvision
torchaudio
tqdm
transformers
math_verify
word2number
accelerate
rapidfuzz
colorlog
appdirs
datasketch
modelscope
addict
pytest
rich
docstring_parser
pydantic
nltk
colorama
gradio>5
json5
tiktoken
pyarrow==20.0.0 # larger than this will bug on python 3.10
cookiecutter # auto template gen from CLI
pypdf
# text2sql
func_timeout
sqlglot
pymysql
# general text
fasttext; python_version >= "3.13" # https://github.com/OpenDCAI/DataFlow/pull/470
fasttext-wheel; python_version < "3.13" # https://github.com/OpenDCAI/DataFlow/pull/470
langkit
openai
sentencepiece
datasketch
presidio_analyzer[transformers]
presidio_anonymizer
vendi-score==0.0.3
google-api-core
google-api-python-client
evaluate
contractions
symspellpy
simhash
# knowledge base cleaning
chonkie
trafilatura
lxml_html_clean
pymupdf
httpx[socks]
# dataflow agent
cloudpickle
fastapi
httpx
pandas
psutil
pyfiglet
pyyaml
requests
termcolor
uvicorn
sseclient-py
# speech
librosa
soundfile
# map visualize
# matplotlib - removed as no longer needed
# google vertex ai
google-cloud-aiplatform>=1.55
google-cloud-bigquery
google-genai
gcsfs
db-dtypes
google-cloud-bigquery-storage
distflow