ce/charts/mlrun-ce/values.yaml at 0cfa2f1a5b027ef0c86a4393c4fe5c05dae6a660 · mlrun/ce · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
# We use global values scope to multiplex the docker-registry details to both mlrun and nuclio
global:
  # External host/ip to reach the k8s node. This might take various values if k8s is run in a VM or a cloud env
  externalHostAddress: localhost
  registry: &userRegistry
    url: mustprovide
    secretName:
  nuclio:
    dashboard:
      nodePort: 30050
  infrastructure:
    # Defines infra flavour, for instance: standalone, onprem, aws, azure, gcp...
    kind: standalone
    # Defines k8s flavour, for instance: eks, aks, gcp, kubespray, openshift, rancher, minikube...
    provider: ~
    inboundCidrs: ~
    loadBalancerName: ~
    aws:
      bucketName: ~
      s3NonAnonymous: false
      domainNameCertificate: ~

# =============================================================================
# S3-compatible storage configuration
# These credentials are used by MLRun, Jupyter, and Kubeflow Pipelines
# to access the storage backend.
# =============================================================================

# storage.mode selects which backend credentials are injected into the 'storage-credentials' Secret.
# Options:
#   s3 (default) - uses storage.s3.accessKey/secretKey/bucket with SeaweedFS endpoint
#   azure-blob   - uses storage.azure.* fields
storage:
  mode: s3
  s3:
    accessKey: "seaweed"
    secretKey: "seaweed123"
    bucket: "mlrun"
  azure:
    containerName: ""
    connectionString: ""
    accountName: ""
    accountKey: ""
    sasToken: ""
    clientSecret: ""
    tenantId: ""

nuclio:
  global:
    registry: *userRegistry
  # coupled with mlrun.nuclio.dashboardName template in mlrun chart
  fullnameOverride: nuclio
  controller:
    enabled: true
  dashboard:
    enabled: true
    # nodePort - taken from global.nuclio.dashboard.nodePort for re-usability

    # k8s has deprecated docker support since v1.20
    containerBuilderKind: kaniko
    ingress:
      enabled: false

    securityContext:
      runAsNonRoot: true
      runAsUser: 1000
  autoscaler:
    enabled: false
  dlx:
    enabled: false
  rbac:
    create: true

    # do not allow nuclio to listen on all namespaces
    crdAccessMode: namespaced
  crd:
    create: true
  platform:
    logger:
      sinks:
        myStdoutLoggerSink:
          kind: stdout
          attributes:
            encoding: console
            timeFieldName: time
            timeFieldEncoding: iso8601
      system:
        - level: debug
          sink: myStdoutLoggerSink
      functions:
        - level: debug
          sink: myStdoutLoggerSink
    projectsLeader:
      kind: mlrun
      synchronizationInterval: 10m
      apiAddress: http://mlrun-api-chief:8080/api

mlrun:
  # set the type of filesystem to use: filesystem, s3
  enabled: true
  global:
    registry: *userRegistry
  defaultFunctionPodResources:
    limits:
      cpu: "2"
      memory: "20Gi"
    requests:
      cpu: "25m"
      memory: "1Mi"
  storage: filesystem
  # Storage auto-mount configuration for SeaweedFS S3-compatible storage
  # This allows MLRun SDK to automatically mount storage without user-provided params
  storageAutoMountType: secret_env
  storageAutoMountParams: ~
  secrets:
    s3:
      accessKey: ""
      secretKey: ""
  s3:
    region: us-east-1
    regionEndpoint: s3.us-east-1.amazonaws.com
    bucket: mlrun
    encrypt: false
    secure: true
  fullnameOverride: mlrun
  nuclio:
    mode: enabled
  rbac:
    create: true
  v3io:
    enabled: false
  api:
    image:
      tag: 1.11.0-rc28
    ingress:
      enabled: false
      annotations: {}
    sidecars:
      logCollector:
        image:
          tag: 1.11.0-rc28
        enabled: true
    fullnameOverride: mlrun-api
    functionSpecServiceAccountDefault: ~
    service:
      type: NodePort
      nodePort: 30070
    volumes:
      storageOverride:
        persistentVolumeClaim:
          claimName: mlrun-api-pvc
    persistence:
      enabled: true
      existingClaim:
      storageClass:
      accessMode: "ReadWriteOnce"
      size: "8Gi"
      annotations:
        helm.sh/resource-policy: "keep"
    envFrom:
      - configMapRef:
          name: mlrun-common-env
      - secretRef:
          name: storage-credentials
          optional: true
      - configMapRef:
          name: mlrun-pipelines-config
          optional: true
      - configMapRef:
          name: mlrun-spark-config
          optional: true
      - configMapRef:
          name: mlrun-override-env
          optional: true
    extraPersistentVolumeMounts: ~

    # Set mlrun api workers count by setting the minReplicas value.
    # This is recommended for production environments running at high scale.
    # more informatino on https://github.com/v3io/helm-charts/blob/7add12015d51f7ce500a71cb40fbb658b2c9ff3d/stable/mlrun/values.yaml#L99-L102
    # worker:
      # minReplicas: 1

    # Enable microservices is recommended for production environments running at high scale.
    # more information on https://github.com/v3io/helm-charts/blob/7add12015d51f7ce500a71cb40fbb658b2c9ff3d/stable/mlrun/values.yaml#L8-L53
    # microservices:
      # enabled: false
      # services:
      # - name: alerts

  ui:
    image:
      tag: 1.11.0-rc28
    fullnameOverride: mlrun-ui
    ingress:
      enabled: false
    service:
      type: NodePort
      nodePort: 30060
  db:
    name: db
    fullnameOverride: mlrun-db
    securityContext:
      runAsUser: 999
    podSecurityContext:
      runAsUser: 999
      fsGroup: 999
    volumes:
      storageOverride:
        persistentVolumeClaim:
          claimName: mlrun-db-pvc
    persistence:
      enabled: true
      existingClaim:
      storageClass:
      accessMode: "ReadWriteOnce"
      size: "8Gi"
      annotations:
        helm.sh/resource-policy: "keep"

  httpDB:
    dbType: mysql
    dirPath: "/mlrun/db"
    dsn: mysql+pymysql://root@mlrun-db:3306/mlrun
    oldDsn: sqlite:////mlrun/db/mlrun.db?check_same_thread=false
  modelMonitoring:
    # If no dsn has been provided, will initiate monitoring database under mlrun-db service:
    # mysql+pymysql://root@mlrun-db:3306/mlrun_model_monitoring
    dsn:
  ce:
    mode: full

jupyterNotebook:
  serviceAccount:
    create: true
    name: mlrun-jupyter
  awsInstall: false
  fullnameOverride: mlrun-jupyter
  name: jupyter-notebook
  enabled: true
  service:
    type: NodePort
    nodePort: 30040
    port: 8888
  ingress:
    enabled: false
    annotations: {}
    ingressClassName: ""

    # kubernetes.io/ingress.class: nginx
    # kubernetes.io/tls-acme: "true"
    hosts:
      - host: chart-example.local
        paths: []
    tls: []

    #  - secretName: chart-example-tls
    #    hosts:
    #      - chart-example.local
  image:
    repository: quay.io/mlrun/jupyter
    tag: 1.11.0-rc28
    pullPolicy: IfNotPresent
    pullSecrets: []
  busybox:
    image: busybox
    tag: 1.37
  ce:
    mode: full

  # use this to override mlrunUIURL, by default it will be auto-resolved to externalHostAddress and
  # mlrun UI node port
  mlrunUIURL:
  extraEnvKeyValue: {}
  envFrom:
    - configMapRef:
        name: jupyter-common-env
        optional: true
    - secretRef:
        name: storage-credentials
        optional: true
  persistence:
    enabled: true
    existingClaim:
    storageClass:
    accessMode: "ReadWriteOnce"
    size: "8Gi"
    annotations:
      helm.sh/resource-policy: "keep"

  nodeSelector:
    {}
    # node-role.kubernetes.io/node: "true"
    # tier: cs
  tolerations:
    []
    #  - key: "node-role.kubernetes.io/master"
    #    effect: NoSchedule

mpi-operator:
  fullnameOverride: mpi-operator
  crd:
    create: true
  rbac:
    clusterResources:
      create: true
    namespaced:
      create: true
  deployment:
    create: true

seaweedfs:
  enabled: true
  # Preserve resource names after chart upgrade (4.17 switched from seaweedfs.name to seaweedfs.fullname)
  fullnameOverride: seaweedfs
  global:
    # Override parent chart's global.registry (which is a map) with empty string
    # to prevent "map[secretName:<nil> url:mustprovide]" in image names
    registry: ""
    # Override default "001" to "000" for single-node setup (no replication)
    replicationPlacement: "000"
    # Disabled by default. Only needed if SeaweedFS master/volume pods need to
    # delete other pods during data migration (e.g., multi-node rebalancing).
    # Enabling this creates a ClusterRole, which conflicts in multi-NS deployments.
    createClusterRole: false

  # Disable individual component pods - allInOne runs everything in a single deployment
  master:
    enabled: false
  volume:
    enabled: false
  filer:
    enabled: false

  # S3 auth config - enableAuth gates the seaweedfs-s3-config Secret creation in
  # templates/seaweedfs/seaweedfs-s3-config.yaml even though the dedicated s3 pod
  # is disabled. The secret is consumed by allInOne.s3.existingConfigSecret below.
  s3:
    port: 8333
    enableAuth: true

  # Single-pod mode: master + volume + filer + S3 gateway in one deployment.
  # Reduces from 4 component pods down to 1, cutting CPU/memory footprint significantly.
  allInOne:
    enabled: true
    s3:
      enabled: true
      port: 8333
      enableAuth: true
      # IAM config secret created by templates/seaweedfs/seaweedfs-s3-config.yaml
      existingConfigSecret: "seaweedfs-s3-config"
    # Storage: use PVC instead of default emptyDir
    data:
      type: "persistentVolumeClaim"
      size: "10Gi"
    resources:
      requests:
        memory: 256Mi
        cpu: 100m
      limits:
        memory: 2Gi

  # Admin server - user and policy management UI
  admin:
    enabled: true
    port: 23646
    # Point admin at the allInOne service since the standalone master StatefulSet
    # is disabled. Without this the chart cannot auto-discover the master address.
    masters: "seaweedfs-all-in-one:9333"
    secret:
      adminUser: "seaweed"
      adminPassword: "seaweed123"
    dataDir: "/data"
    # Storage: use PVC instead of default emptyDir
    data:
      type: "persistentVolumeClaim"
      size: "1Gi"
    resources:
      requests:
        memory: 64Mi
        cpu: 25m
      limits:
        memory: 128Mi
        cpu: 1

  # Custom NodePort service for Admin UI external access
  adminService:
    type: NodePort
    port: 23646
    nodePort: 30093
    ingress:
      enabled: false
      className: ""
      host: ""
      path: /
      pathType: Prefix
      annotations: {}
      tls: []

  # Custom NodePort service for S3 API external access.
  # Also provides the internal "seaweedfs-s3" cluster alias used by MLRun/KFP helpers.
  s3Service:
    type: NodePort
    port: 8333
    nodePort: 30094

spark-operator:
  enabled: true
  fullnameOverride: spark-operator
  controller:
    serviceName: spark-operator-webhook
  webhook:
    enable: true
  spark:
    # User should set this value accordingly when installing in another namespace
    jobNamespaces:
      - mlrun
    serviceAccount:
      name: sparkapp

pipelines:
  archiveLogs: false
  enabled: true
  name: pipelines
  # Log level for KFP api-server (DEBUG, INFO, WARNING, ERROR)
  logLevel: INFO
  ui:
    enabled: false
  metadata:
    enabled: false
  service:
    type: NodePort
    nodePort: 30100
  crd:
    enabled: true
  priority_class:
    enabled: true
  persistence:
    enabled: true
    existingClaim:
    storageClass:
    accessMode: "ReadWriteOnce"
    size: "20Gi"
    annotations:
      helm.sh/resource-policy: "keep"
  nodeSelector:
    {}
    # node-role.kubernetes.io/node: "true"
    # tier: cs
  tolerations:
    []
    #  - key: "node-role.kubernetes.io/master"
    #    effect: NoSchedule
  db:
    username: root
    securityContext:
      runAsUser: 1001
      runAsGroup: 1001
      fsGroup: 1001
      fsGroupChangePolicy: OnRootMismatch
  images:
    driver:
      repository: ghcr.io/kubeflow/kfp-driver
      tag: 2.15.0
    launcher:
      repository: ghcr.io/kubeflow/kfp-launcher
      tag: 2.15.0
    argoexec:
      repository: gcr.io/iguazio/argoexec
      tag: v3.4.17-license-compliance
    workflowController:
      repository: gcr.io/iguazio/workflow-controller
      tag: v3.4.17-license-compliance
    apiServer:
      repository: ghcr.io/kubeflow/kfp-api-server
      tag: 2.15.0
      busybox:
        repository: busybox
        tag: "1.36"
    persistenceagent:
      repository: ghcr.io/kubeflow/kfp-persistence-agent
      tag: 2.15.0
    scheduledworkflow:
      repository: ghcr.io/kubeflow/kfp-scheduled-workflow-controller
      tag: 2.15.0
    ui:
      repository: ghcr.io/kubeflow/kfp-frontend
      tag: 2.15.0
    viewerCrdController:
      repository: ghcr.io/kubeflow/kfp-viewer-crd-controller
      tag: 2.15.0
    visualizationServer:
      repository: ghcr.io/kubeflow/kfp-visualization-server
      tag: 2.15.0
    metadata:
      container:
        repository: gcr.io/tfx-oss-public/ml_metadata_store_server
        tag: 1.16.0
    metadataEnvoy:
      repository: ghcr.io/kubeflow/kfp-metadata-envoy
      tag: 2.15.0
    metadataWriter:
      repository: ghcr.io/kubeflow/kfp-metadata-writer
      tag: 2.15.0
    mysql:
      # MySQL 8.0.26 because >= 8.0.27 has deprecated the default_authentication_plugin option
      # which is required by MLMD workloads (metadata-grpc-deployment and metadata-writer).
      # https://dev.mysql.com/doc/refman/8.0/en/server-system-variables.html#sysvar_default_authentication_plugin
      repository: gcr.io/iguazio/mysql
      tag: "8.0.26"
    cacheImage:
      repository: gcr.io/google-containers/busybox
      tag: latest

kube-prometheus-stack:
  fullnameOverride: monitoring
  enabled: true
  prometheusOperator:
    tls:
      enabled: false
    admissionWebhooks:
      enabled: false
  alertmanager:
    enabled: false
  grafana:
    adminUser: admin
    adminPassword: admin
    initChownData:
      enabled: false
    additionalDataSources:
      - name: iguazio
        type: mysql
        url: mlrun-db:3306
        user: root
        password:
        database: mlrun_model_monitoring
        editable: true
        maxOpenConns: 100
        maxIdleConns: 100
        maxIdleConnsAuto: true
    persistence:
      type: pvc
      enabled: true
      size: 10Gi
    grafana.ini:
      auth.anonymous:
        enabled: true
        org_role: Editor
    fullnameOverride: grafana
    enabled: true
    service:
      type: NodePort
      nodePort: 30010
    ingress:
      enabled: false
  prometheus:
    enabled: true
    service:
      type: NodePort
      nodePort: 30020
  kube-state-metrics:
    fullnameOverride: state-metrics
  prometheus-node-exporter:
    fullnameOverride: node-exporter
    hostNetwork: false
    service:
      port: 9100
    hostRootFsMount:
      enabled: false

timescaledb:
  enabled: true
  fullnameOverride: timescaledb
  image:
    repository: timescale/timescaledb-ha
    tag: "pg17.7-ts2.24.0"
    pullPolicy: IfNotPresent
  service:
    type: NodePort
    port: 5432
    nodePort: 30110
  auth:
    username: postgres
    password: postgres
    database: postgres
  persistence:
    enabled: true
    size: "10Gi"
    storageClass: ""
    accessMode: "ReadWriteOnce"
    annotations:
      helm.sh/resource-policy: "keep"
  resources:
    requests:
      memory: "256Mi"
      cpu: "100m"
    limits:
      memory: "1Gi"
      cpu: "1"
  nodeSelector: {}
  tolerations: []
  securityContext:
    runAsUser: 1000
    runAsGroup: 1000
    fsGroup: 1000
    fsGroupChangePolicy: OnRootMismatch

strimzi-kafka-operator:
  enabled: true
  watchAnyNamespace: true
#  defaultImageRegistry: quay.io
#  defaultImageRepository: strimzi
#  defaultImageTag: 0.48.0

kafka:
  enabled: true
  name: kafka-stream

  # Bootstrap service alias configuration
  bootstrapAlias:
    # Create a service alias for simpler Kafka bootstrap server name
    # When enabled, creates: {name}.{namespace}.svc.cluster.local:9092
    # instead of the default: {name}-kafka-bootstrap.{namespace}.svc.cluster.local:9092
    enabled: true
    # Name for the bootstrap service alias (only used if enabled is true)
    name: kafka-stream

  replicas: 1

  listeners:
    - name: client
      port: 9092
      type: internal
      tls: false
    - name: controller
      port: 9093
      type: internal
      tls: false
    - name: internal
      port: 9094
      type: internal
      tls: false

  storage:
    type: persistent-claim
    size: 8Gi
    class: ""

  resources:
    requests:
      memory: "1Gi"
      cpu: "500m"
    limits:
      memory: "2Gi"
      cpu: "1000m"

  config:
    # Replication settings for single-node setup
    default.replication.factor: 1
    offsets.topic.replication.factor: 1
    transaction.state.log.replication.factor: 1
    transaction.state.log.min.isr: 1

  zookeeper:
    replicas: 0

  # Kafka RBAC for user namespaces
  # Enable this when installing in user namespaces (mlrun, mlrun1, etc.)
  # When enabled, creates: ServiceAccount "kafka-client" + Role/RoleBinding
  rbac:
    # Enable RBAC for this namespace to access Kafka
    enabled: true

    # Operator namespace (where Kafka operator/cluster is running)
    # Empty means "use the release namespace"
    # Example: "controller" if that's where you installed the operator
    operatorNamespace: ""

  # NetworkPolicy for Kafka isolation in multi-NS deployments
  egress:
    enabled: false
# Spark configuration for multi-NS deployments
# Controls CE-level spark resources (mlrun-spark-config ConfigMap)
# In single-NS mode, both spark.enabled and spark-operator.enabled are true
# In multi-NS admin mode, spark.enabled is false (no ConfigMap needed)
# In multi-NS user mode, spark.enabled is true (ConfigMap needed for MLRun)
spark:
  enabled: true