-
Notifications
You must be signed in to change notification settings - Fork 34
Expand file tree
/
Copy pathvalues.yaml
More file actions
687 lines (647 loc) · 17.3 KB
/
values.yaml
File metadata and controls
687 lines (647 loc) · 17.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
# We use global values scope to multiplex the docker-registry details to both mlrun and nuclio
global:
# External host/ip to reach the k8s node. This might take various values if k8s is run in a VM or a cloud env
externalHostAddress: localhost
registry: &userRegistry
url: mustprovide
secretName:
nuclio:
dashboard:
nodePort: 30050
infrastructure:
# Defines infra flavour, for instance: standalone, onprem, aws, azure, gcp...
kind: standalone
# Defines k8s flavour, for instance: eks, aks, gcp, kubespray, openshift, rancher, minikube...
provider: ~
inboundCidrs: ~
loadBalancerName: ~
aws:
bucketName: ~
s3NonAnonymous: false
domainNameCertificate: ~
# =============================================================================
# S3-compatible storage configuration
# These credentials are used by MLRun, Jupyter, and Kubeflow Pipelines
# to access the storage backend.
# =============================================================================
# storage.mode selects which backend credentials are injected into the 'storage-credentials' Secret.
# Options:
# s3 (default) - uses storage.s3.accessKey/secretKey/bucket with SeaweedFS endpoint
# azure-blob - uses storage.azure.* fields
storage:
mode: s3
s3:
accessKey: "seaweed"
secretKey: "seaweed123"
bucket: "mlrun"
azure:
containerName: ""
connectionString: ""
accountName: ""
accountKey: ""
sasToken: ""
clientSecret: ""
tenantId: ""
nuclio:
global:
registry: *userRegistry
# coupled with mlrun.nuclio.dashboardName template in mlrun chart
fullnameOverride: nuclio
controller:
enabled: true
dashboard:
enabled: true
# nodePort - taken from global.nuclio.dashboard.nodePort for re-usability
# k8s has deprecated docker support since v1.20
containerBuilderKind: kaniko
ingress:
enabled: false
securityContext:
runAsNonRoot: true
runAsUser: 1000
autoscaler:
enabled: false
dlx:
enabled: false
rbac:
create: true
# do not allow nuclio to listen on all namespaces
crdAccessMode: namespaced
crd:
create: true
platform:
logger:
sinks:
myStdoutLoggerSink:
kind: stdout
attributes:
encoding: console
timeFieldName: time
timeFieldEncoding: iso8601
system:
- level: debug
sink: myStdoutLoggerSink
functions:
- level: debug
sink: myStdoutLoggerSink
projectsLeader:
kind: mlrun
synchronizationInterval: 10m
apiAddress: http://mlrun-api-chief:8080/api
mlrun:
# set the type of filesystem to use: filesystem, s3
enabled: true
global:
registry: *userRegistry
defaultFunctionPodResources:
limits:
cpu: "2"
memory: "20Gi"
requests:
cpu: "25m"
memory: "1Mi"
storage: filesystem
# Storage auto-mount configuration for SeaweedFS S3-compatible storage
# This allows MLRun SDK to automatically mount storage without user-provided params
storageAutoMountType: secret_env
storageAutoMountParams: ~
secrets:
s3:
accessKey: ""
secretKey: ""
s3:
region: us-east-1
regionEndpoint: s3.us-east-1.amazonaws.com
bucket: mlrun
encrypt: false
secure: true
fullnameOverride: mlrun
nuclio:
mode: enabled
rbac:
create: true
v3io:
enabled: false
api:
image:
tag: 1.11.0-rc28
ingress:
enabled: false
annotations: {}
sidecars:
logCollector:
image:
tag: 1.11.0-rc28
enabled: true
fullnameOverride: mlrun-api
functionSpecServiceAccountDefault: ~
service:
type: NodePort
nodePort: 30070
volumes:
storageOverride:
persistentVolumeClaim:
claimName: mlrun-api-pvc
persistence:
enabled: true
existingClaim:
storageClass:
accessMode: "ReadWriteOnce"
size: "8Gi"
annotations:
helm.sh/resource-policy: "keep"
envFrom:
- configMapRef:
name: mlrun-common-env
- secretRef:
name: storage-credentials
optional: true
- configMapRef:
name: mlrun-pipelines-config
optional: true
- configMapRef:
name: mlrun-spark-config
optional: true
- configMapRef:
name: mlrun-override-env
optional: true
extraPersistentVolumeMounts: ~
# Set mlrun api workers count by setting the minReplicas value.
# This is recommended for production environments running at high scale.
# more informatino on https://github.com/v3io/helm-charts/blob/7add12015d51f7ce500a71cb40fbb658b2c9ff3d/stable/mlrun/values.yaml#L99-L102
# worker:
# minReplicas: 1
# Enable microservices is recommended for production environments running at high scale.
# more information on https://github.com/v3io/helm-charts/blob/7add12015d51f7ce500a71cb40fbb658b2c9ff3d/stable/mlrun/values.yaml#L8-L53
# microservices:
# enabled: false
# services:
# - name: alerts
ui:
image:
tag: 1.11.0-rc28
fullnameOverride: mlrun-ui
ingress:
enabled: false
service:
type: NodePort
nodePort: 30060
db:
name: db
fullnameOverride: mlrun-db
securityContext:
runAsUser: 999
podSecurityContext:
runAsUser: 999
fsGroup: 999
volumes:
storageOverride:
persistentVolumeClaim:
claimName: mlrun-db-pvc
persistence:
enabled: true
existingClaim:
storageClass:
accessMode: "ReadWriteOnce"
size: "8Gi"
annotations:
helm.sh/resource-policy: "keep"
httpDB:
dbType: mysql
dirPath: "/mlrun/db"
dsn: mysql+pymysql://root@mlrun-db:3306/mlrun
oldDsn: sqlite:////mlrun/db/mlrun.db?check_same_thread=false
modelMonitoring:
# If no dsn has been provided, will initiate monitoring database under mlrun-db service:
# mysql+pymysql://root@mlrun-db:3306/mlrun_model_monitoring
dsn:
ce:
mode: full
jupyterNotebook:
serviceAccount:
create: true
name: mlrun-jupyter
awsInstall: false
fullnameOverride: mlrun-jupyter
name: jupyter-notebook
enabled: true
service:
type: NodePort
nodePort: 30040
port: 8888
ingress:
enabled: false
annotations: {}
ingressClassName: ""
# kubernetes.io/ingress.class: nginx
# kubernetes.io/tls-acme: "true"
hosts:
- host: chart-example.local
paths: []
tls: []
# - secretName: chart-example-tls
# hosts:
# - chart-example.local
image:
repository: quay.io/mlrun/jupyter
tag: 1.11.0-rc28
pullPolicy: IfNotPresent
pullSecrets: []
busybox:
image: busybox
tag: 1.37
ce:
mode: full
# use this to override mlrunUIURL, by default it will be auto-resolved to externalHostAddress and
# mlrun UI node port
mlrunUIURL:
extraEnvKeyValue: {}
envFrom:
- configMapRef:
name: jupyter-common-env
optional: true
- secretRef:
name: storage-credentials
optional: true
persistence:
enabled: true
existingClaim:
storageClass:
accessMode: "ReadWriteOnce"
size: "8Gi"
annotations:
helm.sh/resource-policy: "keep"
nodeSelector:
{}
# node-role.kubernetes.io/node: "true"
# tier: cs
tolerations:
[]
# - key: "node-role.kubernetes.io/master"
# effect: NoSchedule
mpi-operator:
fullnameOverride: mpi-operator
crd:
create: true
rbac:
clusterResources:
create: true
namespaced:
create: true
deployment:
create: true
seaweedfs:
enabled: true
# Preserve resource names after chart upgrade (4.17 switched from seaweedfs.name to seaweedfs.fullname)
fullnameOverride: seaweedfs
global:
# Override parent chart's global.registry (which is a map) with empty string
# to prevent "map[secretName:<nil> url:mustprovide]" in image names
registry: ""
# Override default "001" to "000" for single-node setup (no replication)
replicationPlacement: "000"
# Disabled by default. Only needed if SeaweedFS master/volume pods need to
# delete other pods during data migration (e.g., multi-node rebalancing).
# Enabling this creates a ClusterRole, which conflicts in multi-NS deployments.
createClusterRole: false
# Disable individual component pods - allInOne runs everything in a single deployment
master:
enabled: false
volume:
enabled: false
filer:
enabled: false
# S3 auth config - enableAuth gates the seaweedfs-s3-config Secret creation in
# templates/seaweedfs/seaweedfs-s3-config.yaml even though the dedicated s3 pod
# is disabled. The secret is consumed by allInOne.s3.existingConfigSecret below.
s3:
port: 8333
enableAuth: true
# Single-pod mode: master + volume + filer + S3 gateway in one deployment.
# Reduces from 4 component pods down to 1, cutting CPU/memory footprint significantly.
allInOne:
enabled: true
s3:
enabled: true
port: 8333
enableAuth: true
# IAM config secret created by templates/seaweedfs/seaweedfs-s3-config.yaml
existingConfigSecret: "seaweedfs-s3-config"
# Storage: use PVC instead of default emptyDir
data:
type: "persistentVolumeClaim"
size: "10Gi"
resources:
requests:
memory: 256Mi
cpu: 100m
limits:
memory: 2Gi
# Admin server - user and policy management UI
admin:
enabled: true
port: 23646
# Point admin at the allInOne service since the standalone master StatefulSet
# is disabled. Without this the chart cannot auto-discover the master address.
masters: "seaweedfs-all-in-one:9333"
secret:
adminUser: "seaweed"
adminPassword: "seaweed123"
dataDir: "/data"
# Storage: use PVC instead of default emptyDir
data:
type: "persistentVolumeClaim"
size: "1Gi"
resources:
requests:
memory: 64Mi
cpu: 25m
limits:
memory: 128Mi
cpu: 1
# Custom NodePort service for Admin UI external access
adminService:
type: NodePort
port: 23646
nodePort: 30093
ingress:
enabled: false
className: ""
host: ""
path: /
pathType: Prefix
annotations: {}
tls: []
# Custom NodePort service for S3 API external access.
# Also provides the internal "seaweedfs-s3" cluster alias used by MLRun/KFP helpers.
s3Service:
type: NodePort
port: 8333
nodePort: 30094
spark-operator:
enabled: true
fullnameOverride: spark-operator
controller:
serviceName: spark-operator-webhook
webhook:
enable: true
spark:
# User should set this value accordingly when installing in another namespace
jobNamespaces:
- mlrun
serviceAccount:
name: sparkapp
pipelines:
archiveLogs: false
enabled: true
name: pipelines
# Log level for KFP api-server (DEBUG, INFO, WARNING, ERROR)
logLevel: INFO
ui:
enabled: false
metadata:
enabled: false
service:
type: NodePort
nodePort: 30100
crd:
enabled: true
priority_class:
enabled: true
persistence:
enabled: true
existingClaim:
storageClass:
accessMode: "ReadWriteOnce"
size: "20Gi"
annotations:
helm.sh/resource-policy: "keep"
nodeSelector:
{}
# node-role.kubernetes.io/node: "true"
# tier: cs
tolerations:
[]
# - key: "node-role.kubernetes.io/master"
# effect: NoSchedule
db:
username: root
securityContext:
runAsUser: 1001
runAsGroup: 1001
fsGroup: 1001
fsGroupChangePolicy: OnRootMismatch
images:
driver:
repository: ghcr.io/kubeflow/kfp-driver
tag: 2.15.0
launcher:
repository: ghcr.io/kubeflow/kfp-launcher
tag: 2.15.0
argoexec:
repository: gcr.io/iguazio/argoexec
tag: v3.4.17-license-compliance
workflowController:
repository: gcr.io/iguazio/workflow-controller
tag: v3.4.17-license-compliance
apiServer:
repository: ghcr.io/kubeflow/kfp-api-server
tag: 2.15.0
busybox:
repository: busybox
tag: "1.36"
persistenceagent:
repository: ghcr.io/kubeflow/kfp-persistence-agent
tag: 2.15.0
scheduledworkflow:
repository: ghcr.io/kubeflow/kfp-scheduled-workflow-controller
tag: 2.15.0
ui:
repository: ghcr.io/kubeflow/kfp-frontend
tag: 2.15.0
viewerCrdController:
repository: ghcr.io/kubeflow/kfp-viewer-crd-controller
tag: 2.15.0
visualizationServer:
repository: ghcr.io/kubeflow/kfp-visualization-server
tag: 2.15.0
metadata:
container:
repository: gcr.io/tfx-oss-public/ml_metadata_store_server
tag: 1.16.0
metadataEnvoy:
repository: ghcr.io/kubeflow/kfp-metadata-envoy
tag: 2.15.0
metadataWriter:
repository: ghcr.io/kubeflow/kfp-metadata-writer
tag: 2.15.0
mysql:
# MySQL 8.0.26 because >= 8.0.27 has deprecated the default_authentication_plugin option
# which is required by MLMD workloads (metadata-grpc-deployment and metadata-writer).
# https://dev.mysql.com/doc/refman/8.0/en/server-system-variables.html#sysvar_default_authentication_plugin
repository: gcr.io/iguazio/mysql
tag: "8.0.26"
cacheImage:
repository: gcr.io/google-containers/busybox
tag: latest
kube-prometheus-stack:
fullnameOverride: monitoring
enabled: true
prometheusOperator:
tls:
enabled: false
admissionWebhooks:
enabled: false
alertmanager:
enabled: false
grafana:
adminUser: admin
adminPassword: admin
initChownData:
enabled: false
additionalDataSources:
- name: iguazio
type: mysql
url: mlrun-db:3306
user: root
password:
database: mlrun_model_monitoring
editable: true
maxOpenConns: 100
maxIdleConns: 100
maxIdleConnsAuto: true
persistence:
type: pvc
enabled: true
size: 10Gi
grafana.ini:
auth.anonymous:
enabled: true
org_role: Editor
fullnameOverride: grafana
enabled: true
service:
type: NodePort
nodePort: 30010
ingress:
enabled: false
prometheus:
enabled: true
service:
type: NodePort
nodePort: 30020
kube-state-metrics:
fullnameOverride: state-metrics
prometheus-node-exporter:
fullnameOverride: node-exporter
hostNetwork: false
service:
port: 9100
hostRootFsMount:
enabled: false
timescaledb:
enabled: true
fullnameOverride: timescaledb
image:
repository: timescale/timescaledb-ha
tag: "pg17.7-ts2.24.0"
pullPolicy: IfNotPresent
service:
type: NodePort
port: 5432
nodePort: 30110
auth:
username: postgres
password: postgres
database: postgres
persistence:
enabled: true
size: "10Gi"
storageClass: ""
accessMode: "ReadWriteOnce"
annotations:
helm.sh/resource-policy: "keep"
resources:
requests:
memory: "256Mi"
cpu: "100m"
limits:
memory: "1Gi"
cpu: "1"
nodeSelector: {}
tolerations: []
securityContext:
runAsUser: 1000
runAsGroup: 1000
fsGroup: 1000
fsGroupChangePolicy: OnRootMismatch
strimzi-kafka-operator:
enabled: true
watchAnyNamespace: true
# defaultImageRegistry: quay.io
# defaultImageRepository: strimzi
# defaultImageTag: 0.48.0
kafka:
enabled: true
name: kafka-stream
# Bootstrap service alias configuration
bootstrapAlias:
# Create a service alias for simpler Kafka bootstrap server name
# When enabled, creates: {name}.{namespace}.svc.cluster.local:9092
# instead of the default: {name}-kafka-bootstrap.{namespace}.svc.cluster.local:9092
enabled: true
# Name for the bootstrap service alias (only used if enabled is true)
name: kafka-stream
replicas: 1
listeners:
- name: client
port: 9092
type: internal
tls: false
- name: controller
port: 9093
type: internal
tls: false
- name: internal
port: 9094
type: internal
tls: false
storage:
type: persistent-claim
size: 8Gi
class: ""
resources:
requests:
memory: "1Gi"
cpu: "500m"
limits:
memory: "2Gi"
cpu: "1000m"
config:
# Replication settings for single-node setup
default.replication.factor: 1
offsets.topic.replication.factor: 1
transaction.state.log.replication.factor: 1
transaction.state.log.min.isr: 1
zookeeper:
replicas: 0
# Kafka RBAC for user namespaces
# Enable this when installing in user namespaces (mlrun, mlrun1, etc.)
# When enabled, creates: ServiceAccount "kafka-client" + Role/RoleBinding
rbac:
# Enable RBAC for this namespace to access Kafka
enabled: true
# Operator namespace (where Kafka operator/cluster is running)
# Empty means "use the release namespace"
# Example: "controller" if that's where you installed the operator
operatorNamespace: ""
# NetworkPolicy for Kafka isolation in multi-NS deployments
egress:
enabled: false
# Spark configuration for multi-NS deployments
# Controls CE-level spark resources (mlrun-spark-config ConfigMap)
# In single-NS mode, both spark.enabled and spark-operator.enabled are true
# In multi-NS admin mode, spark.enabled is false (no ConfigMap needed)
# In multi-NS user mode, spark.enabled is true (ConfigMap needed for MLRun)
spark:
enabled: true