File tree Expand file tree Collapse file tree
glasskube/trieve/clickhouse Expand file tree Collapse file tree Original file line number Diff line number Diff line change 4949 mountPath : /opt/user_functions/
5050 readOnly : true
5151 env :
52+ - name : CLICKHOUSE_ALWAYS_RUN_INITDB_SCRIPTS
53+ value : " true"
5254 - name : EMBEDDING_SERVER_URL
53- value : " " # todo
55+ value : " http://trieve-embedding-bgem3 " # todo
5456 - name : CLICKHOUSE_ADMIN_PASSWORD
5557 value : " password" # todo
5658
Original file line number Diff line number Diff line change 1+ <functions >
2+ <function >
3+ <name >embed_p</name >
4+ <type >executable_pool</type >
5+ <pool_size >10</pool_size >
6+ <send_chunk_header >true</send_chunk_header >
7+ <format >TabSeparated</format >
8+ <return_type >Array(Float32)</return_type >
9+ <argument >
10+ <type >String</type >
11+ </argument >
12+ <command >embed.py</command >
13+ </function >
14+ </functions >
Load Diff This file was deleted.
Original file line number Diff line number Diff line change 11resources :
2- - scripts.yaml
3- - functions.yaml
4- - clickhouse.yaml
2+ - clickhouse.yaml
3+
4+ configMapGenerator :
5+ - name : trieve-clickhouse-functions
6+ files :
7+ - embedding_function.xml
8+ options :
9+ disableNameSuffixHash : true
10+
11+ - name : trieve-clickhouse-scripts
12+ files :
13+ - load_embed.sh
14+ options :
15+ disableNameSuffixHash : true
Original file line number Diff line number Diff line change 1+ #! /usr/bin/env bash
2+ echo " STARTING TRIEVE CLICKHOUSE INIT SCRIPT"
3+
4+ apt -qq update
5+ apt -qq install -y python3 python3-pip
6+
7+ cat > requirements.txt << EOL
8+ tenacity==8.4.1
9+ requests==2.32.3
10+ EOL
11+
12+ pip install -r requirements.txt
13+
14+ cat > /opt/user_scripts/embed.py << EOL
15+ #!/usr/bin/python3
16+ import sys
17+ import requests
18+ import os
19+
20+ request_timeout = 3
21+
22+
23+ def completion_with_backoff(model_input):
24+ url = os.getenv("EMBEDDING_SERVER_URL")
25+ parameters = {"model": "dense-embeddings", "input": f"Search for {model_input}"}
26+ headers = {
27+ "Content-Type": "application/json",
28+ }
29+ try:
30+ response = requests.post(
31+ f"{url}/embeddings?api-version=2023-05-15",
32+ headers=headers,
33+ json=parameters,
34+ )
35+ response.raise_for_status()
36+ return [embedding["embedding"] for embedding in response.json()["data"]][0]
37+ except requests.exceptions.RequestException as e:
38+ raise Exception("Failed to send message to embedding server") from e
39+ except Exception as e:
40+ raise Exception("Failed to get text from embeddings") from e
41+
42+
43+ def embed(text):
44+ if text == "":
45+ return "NULL"
46+ try:
47+ response = completion_with_backoff(text)
48+ return response
49+ except:
50+ return "ERROR"
51+
52+
53+ for size in sys.stdin:
54+ try:
55+ # collect a batch for performance
56+ for row in range(0, int(size)):
57+ print(embed(sys.stdin.readline().strip()))
58+ sys.stdout.flush()
59+ except Exception as e:
60+ print(f"ERROR: {e}")
61+ sys.stdout.flush()
62+ continue
63+ EOL
64+
65+ chmod -v +x /opt/user_scripts/embed.py
66+
67+ echo " FINISHED TRIEVE CLICKHOUSE INIT SCRIPT"
Load Diff This file was deleted.
You can’t perform that action at this time.
0 commit comments