-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpyproject.toml
More file actions
117 lines (105 loc) · 3.26 KB
/
pyproject.toml
File metadata and controls
117 lines (105 loc) · 3.26 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
[tool.poetry]
name = "cardio-guidelines-graph"
version = "0.1.0"
description = "A project for knowledge graph construction from German cardiovascular guidelines"
authors = ["pwiesenbach <pwiesenbach@example.com>"]
readme = "README.md"
repository = "https://github.com/dieterich-lab/CardioGuidelinesGraph"
packages = [{include = "cardio_graph_core", from = "src"}]
license = "MIT"
classifiers = [
"Development Status :: 3 - Alpha",
"Intended Audience :: Healthcare Industry",
"Intended Audience :: Science/Research",
"Topic :: Scientific/Engineering :: Medical Science Apps.",
"Topic :: Scientific/Engineering :: Artificial Intelligence",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.11"
]
[tool.poetry.dependencies]
python = ">=3.11,<3.12"
# Core dependencies
neo4j = "^5.14.0"
python-dotenv = "^1.0.0"
click = "^8.1.7"
langchain = ">=0.1.0,<0.2.0"
langchain-core = ">=0.1.28,<0.2.0"
langchain-text-splitters = ">=0.0.1,<0.1.0"
langchain-community = ">=0.0.21,<0.1.0"
langsmith = ">=0.1.0,<0.2.0"
ollama = "^0.1.7"
# PDF processing
pypdf2 = "^3.0.1"
pypdf = "^3.15.1" # Used by parse_pdf_as_markdown.py
pymupdf = "^1.23.7" # Better PDF extraction with tables support
pymupdf4llm = "*" # Markdown conversion for LLM-focused workflows
pdfplumber = "^0.10.3" # PDF data extraction focused on tables
docling = "^0.4.0"
pdf2image = "^1.16.3"
pillow = "^10.2.0"
tqdm = "^4.66.1" # Progress bar utility
tabulate = "^0.9.0" # Table formatting for markdown
# Data processing and analysis
numpy = "^1.26.3"
pandas = "^2.1.4"
matplotlib = "^3.8.2"
networkx = "^3.2.1"
scikit-learn = "^1.3.2"
# BAML client dependencies
pydantic = "^2.5.3"
typing-extensions = "^4.9.0"
aiohttp = "^3.9.1"
requests = "^2.31.0"
# Jupyter notebooks support
jupyter = "^1.0.0"
ipykernel = "^6.27.1"
baml-py = "0.217.0"
mysql-connector-python = "^9.4.0"
rdflib = "^7.0.0"
sqlalchemy = "^2.0.43"
pymysql = "^1.1.1"
spacy = "^3.7.0"
whoosh = "^2.7.4"
scispacy = "^0.5.4"
docling-ibm-models = {version = "0.2.0", extras = ["all"]}
psycopg2-binary = "^2.9.10"
[tool.poetry.group.dev.dependencies]
pytest = "^7.4.3"
pytest-cov = "^4.1.0"
black = "^23.12.0"
isort = "^5.13.2"
flake8 = "^6.1.0"
mypy = "^1.8.0"
pre-commit = "^3.6.0"
sphinx = "^7.2.6"
sphinx-rtd-theme = "^2.0.0"
[tool.poetry.scripts]
parse-pdfs = "cardio_graph_core.parsing.docling.parse_pdfs_with_docling:cli"
build-graph = "cardio_graph_core.extraction.guideline_graph_builder:main"
load-neo4j = "cardio_graph_core.neo4j.grounding_index_to_neo4j:main"
snomed-explorer = "cardio_graph_core.snomedct.snomed_query:main"
generate-cardio-subset = "cardio_graph_core.snomedct.generate_cardio_ontology:main"
test-row10 = "cardio_graph_core.test_runner:row10"
autotune-table22 = "cardio_graph_core.tuning.controller:main"
[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"
[tool.black]
line-length = 88
target-version = ['py311']
include = '\.pyi?$'
[tool.isort]
profile = "black"
line_length = 88
[tool.mypy]
python_version = "3.11"
warn_return_any = true
warn_unused_configs = true
disallow_untyped_defs = true
disallow_incomplete_defs = true
[tool.pytest.ini_options]
testpaths = ["tests"]
python_files = "test_*.py"
python_functions = "test_*"
python_classes = "Test*"
addopts = "--cov=src.cardio_graph_core"