-
Notifications
You must be signed in to change notification settings - Fork 7.7k
Expand file tree
/
Copy pathpyproject.toml
More file actions
66 lines (60 loc) · 1.89 KB
/
pyproject.toml
File metadata and controls
66 lines (60 loc) · 1.89 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"
[project]
name = "markitdown-ocr"
dynamic = ["version"]
description = 'OCR plugin for MarkItDown - Extracts text from images in PDF, DOCX, PPTX, and XLSX via LLM Vision'
readme = "README.md"
requires-python = ">=3.10"
license = "MIT"
keywords = ["markitdown", "ocr", "pdf", "docx", "xlsx", "pptx", "llm", "vision"]
authors = [
{ name = "Contributors", email = "noreply@github.com" },
]
classifiers = [
"Development Status :: 4 - Beta",
"Programming Language :: Python",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
"Programming Language :: Python :: Implementation :: CPython",
]
# Core dependencies — matches the file-format libraries markitdown already uses
dependencies = [
"markitdown>=0.1.0",
"pdfminer.six>=20251230",
"pdfplumber>=0.11.9",
"mammoth~=1.11.0",
"python-docx",
"python-pptx",
"pandas",
"openpyxl",
"Pillow>=9.0.0",
]
# llm_client is passed in by the user (same as for markitdown image descriptions);
# install openai or any OpenAI-compatible SDK separately.
#
# NOTE: PyMuPDF (fitz) is AGPL-3.0 licensed. Install the [pymupdf] extra only
# if you need fallback support for malformed PDFs and accept the AGPL terms.
[project.optional-dependencies]
llm = [
"openai>=1.0.0",
]
pymupdf = [
"PyMuPDF>=1.24.0",
]
all = [
"openai>=1.0.0",
"PyMuPDF>=1.24.0",
]
[project.urls]
Documentation = "https://github.com/microsoft/markitdown#readme"
Issues = "https://github.com/microsoft/markitdown/issues"
Source = "https://github.com/microsoft/markitdown"
[tool.hatch.version]
path = "src/markitdown_ocr/__about__.py"
# CRITICAL: Plugin entry point - MarkItDown will discover this plugin through this entry point
[project.entry-points."markitdown.plugin"]
ocr = "markitdown_ocr"