-
Notifications
You must be signed in to change notification settings - Fork 1.4k
Expand file tree
/
Copy pathmain.py
More file actions
36 lines (32 loc) · 1011 Bytes
/
main.py
File metadata and controls
36 lines (32 loc) · 1011 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
from llama_cpp import Llama
from llama_cpp.llama_chat_format import GraniteDoclingChatHandler
chat_handler = GraniteDoclingChatHandler.from_pretrained(
repo_id="ggml-org/granite-docling-258M-GGUF",
filename="mmproj*Q8_0*",
)
llama = Llama.from_pretrained(
repo_id="ggml-org/granite-docling-258M-GGUF",
filename="granite*Q8_0*",
chat_handler=chat_handler,
n_ctx=8192,
n_gpu_layers=-1,
)
response = llama.create_chat_completion(
messages=[
{
"role": "user",
"content": [
{"type": "image_url", "image_url": "https://huggingface.co/spaces/ibm-granite/granite-docling-258m-demo/resolve/main/data/images/new_arxiv.png"},
{"type": "text", "text": "Convert this page to docling."},
],
}
],
stream=True,
special=True,
)
for chunk in response:
delta = chunk["choices"][0]["delta"]
if "content" not in delta:
continue
print(delta["content"], end="", flush=True)
print()