rancher / opni

Multi Cluster Observability with AIOps

Home Page:https://opni.io

Geek Repo:Geek Repo

Github PK Tool:Github PK Tool

Cortex: switching from filesystem backend to s3 backend fails

alexandreLamarre opened this issue · comments

Cortex ingester still requires the PVC /data to be mounted even though the backend was switched to s3

Monitoring cluster CRD

{
    "enabled": true,
    "revision": {
        "revision": "652795"
    },
    "cortexWorkloads": {
        "targets": {
            "alertmanager": {
                "replicas": 3
            },
            "compactor": {
                "replicas": 3
            },
            "distributor": {
                "replicas": 1
            },
            "ingester": {
                "replicas": 3
            },
            "purger": {
                "replicas": 1
            },
            "querier": {
                "replicas": 3
            },
            "query-frontend": {
                "replicas": 1
            },
            "ruler": {
                "replicas": 3
            },
            "store-gateway": {
                "replicas": 3
            }
        }
    },
    "cortexConfig": {
        "limits": {
            "ingestionRate": 600000,
            "ingestionRateStrategy": "local",
            "ingestionBurstSize": 1000000,
            "acceptHaSamples": false,
            "haClusterLabel": "cluster",
            "haReplicaLabel": "__replica__",
            "haMaxClusters": 0,
            "maxLabelNameLength": 1024,
            "maxLabelValueLength": 2048,
            "maxLabelNamesPerSeries": 30,
            "maxLabelsSizeBytes": 0,
            "maxMetadataLength": 1024,
            "rejectOldSamples": false,
            "rejectOldSamplesMaxAge": "1209600s",
            "creationGracePeriod": "600s",
            "enforceMetadataMetricName": true,
            "enforceMetricName": true,
            "ingestionTenantShardSize": 0,
            "maxExemplars": 0,
            "maxSeriesPerQuery": 100000,
            "maxSeriesPerUser": 5000000,
            "maxSeriesPerMetric": 50000,
            "maxGlobalSeriesPerUser": 0,
            "maxGlobalSeriesPerMetric": 0,
            "maxMetadataPerUser": 8000,
            "maxMetadataPerMetric": 10,
            "maxGlobalMetadataPerUser": 0,
            "maxGlobalMetadataPerMetric": 0,
            "outOfOrderTimeWindow": "0s",
            "maxFetchedChunksPerQuery": 2000000,
            "maxFetchedSeriesPerQuery": 0,
            "maxFetchedChunkBytesPerQuery": 0,
            "maxFetchedDataBytesPerQuery": 0,
            "maxQueryLookback": "0s",
            "maxQueryLength": "0s",
            "maxQueryParallelism": 14,
            "maxCacheFreshness": "60s",
            "maxQueriersPerTenant": 0,
            "maxOutstandingRequestsPerTenant": 100,
            "rulerEvaluationDelayDuration": "0s",
            "rulerTenantShardSize": 0,
            "rulerMaxRulesPerRuleGroup": 0,
            "rulerMaxRuleGroupsPerTenant": 0,
            "storeGatewayTenantShardSize": 0,
            "maxDownloadedBytesPerRequest": 0,
            "compactorBlocksRetentionPeriod": "2592000s",
            "compactorTenantShardSize": 0,
            "alertmanagerReceiversFirewallBlockPrivateAddresses": false,
            "alertmanagerNotificationRateLimit": 0,
            "alertmanagerMaxConfigSizeBytes": 0,
            "alertmanagerMaxTemplatesCount": 0,
            "alertmanagerMaxTemplateSizeBytes": 0,
            "alertmanagerMaxDispatcherAggregationGroups": 0,
            "alertmanagerMaxAlertsCount": 0,
            "alertmanagerMaxAlertsSizeBytes": 0
        },
        "runtimeConfig": {
            "multiKvConfig": {},
            "ingesterLimits": {}
        },
        "compactor": {
            "blockRanges": [
                "7200s",
                "43200s",
                "86400s"
            ],
            "blockSyncConcurrency": 20,
            "metaSyncConcurrency": 20,
            "consistencyDelay": "0s",
            "compactionInterval": "3600s",
            "compactionRetries": 3,
            "compactionConcurrency": 1,
            "cleanupInterval": "900s",
            "cleanupConcurrency": 20,
            "deletionDelay": "43200s",
            "tenantCleanupDelay": "21600s",
            "skipBlocksWithOutOfOrderChunksEnabled": false,
            "blockFilesConcurrency": 10,
            "blocksFetchConcurrency": 3,
            "blockDeletionMarksMigrationEnabled": false,
            "blockVisitMarkerTimeout": "300s",
            "blockVisitMarkerFileUpdateInterval": "60s",
            "acceptMalformedIndex": false
        },
        "querier": {
            "maxConcurrent": 20,
            "timeout": "120s",
            "iterators": false,
            "batchIterators": true,
            "ingesterStreaming": true,
            "ingesterMetadataStreaming": false,
            "maxSamples": 50000000,
            "queryIngestersWithin": "0s",
            "queryStoreForLabelsEnabled": false,
            "perStepStatsEnabled": false,
            "queryStoreAfter": "0s",
            "maxQueryIntoFuture": "600s",
            "defaultEvaluationInterval": "60s",
            "lookbackDelta": "300s",
            "shuffleShardingIngestersLookbackPeriod": "0s",
            "thanosEngine": false
        },
        "storage": {
            "backend": "s3",
            "s3": {
                "endpoint": "",
                "region": "us-east-1",
                "bucketName": "",
                "secretAccessKey": "***",
                "accessKeyId": ,
                "insecure": false,
                "signatureVersion": "v4",
                "bucketLookupType": "auto",
                "sse": {},
                "http": {
                    "idleConnTimeout": "90s",
                    "responseHeaderTimeout": "120s",
                    "insecureSkipVerify": false,
                    "tlsHandshakeTimeout": "10s",
                    "expectContinueTimeout": "1s",
                    "maxIdleConnections": 100,
                    "maxIdleConnectionsPerHost": 100,
                    "maxConnectionsPerHost": 0
                }
            },
            "gcs": {},
            "azure": {
                "maxRetries": 20,
                "http": {
                    "idleConnTimeout": "90s",
                    "responseHeaderTimeout": "120s",
                    "insecureSkipVerify": false,
                    "tlsHandshakeTimeout": "10s",
                    "expectContinueTimeout": "1s",
                    "maxIdleConnections": 100,
                    "maxIdleConnectionsPerHost": 100,
                    "maxConnectionsPerHost": 0
                }
            },
            "swift": {
                "authVersion": 0,
                "maxRetries": 3,
                "connectTimeout": "10s",
                "requestTimeout": "5s"
            },
            "filesystem": {}
        },
        "logLevel": "debug"
    },
    "grafana": {
        "enabled": false,
        "version": "latest"
    }
Ingester spec

status:
  phase: Pending
  conditions:
    - type: Initialized
      status: 'True'
      lastProbeTime: null
      lastTransitionTime: '2023-10-03T14:52:05Z'
    - type: Ready
      status: 'False'
      lastProbeTime: null
      lastTransitionTime: '2023-10-03T14:52:05Z'
      reason: ContainersNotReady
      message: 'containers with unready status: [ingester]'
    - type: ContainersReady
      status: 'False'
      lastProbeTime: null
      lastTransitionTime: '2023-10-03T14:52:05Z'
      reason: ContainersNotReady
      message: 'containers with unready status: [ingester]'
    - type: PodScheduled
      status: 'True'
      lastProbeTime: null
      lastTransitionTime: '2023-10-03T14:52:05Z'
  hostIP: 10.0.15.181
  startTime: '2023-10-03T14:52:05Z'
  containerStatuses:
    - name: ingester
      state:
        waiting:
          reason: ContainerCreating
      lastState: {}
      ready: false
      restartCount: 0
      image: >-
        docker.io/alex7285/opni@sha256:f1a972305d475f496d10681e51fd545039027f9c04370f3bcaf43f6fa160cd83
      imageID: ''
      started: false
  qosClass: BestEffort
spec:
  volumes:
    - name: data
      persistentVolumeClaim:
        claimName: data-cortex-ingester-0
    - name: config
      secret:
        secretName: cortex
        defaultMode: 420
    - name: runtime-config
      configMap:
        name: cortex-runtime-config
        defaultMode: 420
    - name: client-certs
      secret:
        secretName: cortex-serving-cert-keys
        items:
          - key: tls.crt
            path: tls.crt
          - key: tls.key
            path: tls.key
          - key: ca.crt
            path: ca.crt
        defaultMode: 420
    - name: opni-gateway-client-cert
      secret:
        secretName: opni-gateway-client-cert
        items:
          - key: tls.crt
            path: tls.crt
          - key: tls.key
            path: tls.key
          - key: ca.crt
            path: ca.crt
        defaultMode: 420
    - name: server-certs
      secret:
        secretName: cortex-serving-cert-keys
        items:
          - key: tls.crt
            path: tls.crt
          - key: tls.key
            path: tls.key
          - key: ca.crt
            path: ca.crt
        defaultMode: 420
    - name: etcd-client-certs
      secret:
        secretName: etcd-client-cert-keys
        items:
          - key: tls.crt
            path: tls.crt
          - key: tls.key
            path: tls.key
          - key: ca.crt
            path: ca.crt
        defaultMode: 420
    - name: etcd-server-cacert
      secret:
        secretName: etcd-serving-cert-keys
        items:
          - key: ca.crt
            path: ca.crt
        defaultMode: 420
    - name: kube-api-access-v69js
      projected:
        sources:
          - serviceAccountToken:
              expirationSeconds: 3607
              path: token
          - configMap:
              name: kube-root-ca.crt
              items:
                - key: ca.crt
                  path: ca.crt
          - downwardAPI:
              items:
                - path: namespace
                  fieldRef:
                    apiVersion: v1
                    fieldPath: metadata.namespace
        defaultMode: 420
  containers:
    - name: ingester
      image: >-
        docker.io/alex7285/opni@sha256:f1a972305d475f496d10681e51fd545039027f9c04370f3bcaf43f6fa160cd83
      args:
        - cortex
        - '-target=ingester'
        - '-config.file=/etc/cortex/cortex.yaml'
      ports:
        - name: http-metrics
          containerPort: 8080
          protocol: TCP
        - name: gossip
          containerPort: 7946
          protocol: TCP
        - name: grpc
          containerPort: 9095
          protocol: TCP
      resources: {}
      volumeMounts:
        - name: data
          mountPath: /data
        - name: config
          mountPath: /etc/cortex
        - name: runtime-config
          mountPath: /etc/cortex-runtime-config
        - name: client-certs
          readOnly: true
          mountPath: /run/cortex/certs/client
        - name: opni-gateway-client-cert
          readOnly: true
          mountPath: /run/gateway/certs/client
        - name: server-certs
          readOnly: true
          mountPath: /run/cortex/certs/server
        - name: etcd-client-certs
          readOnly: true
          mountPath: /run/etcd/certs/client
        - name: etcd-server-cacert
          mountPath: /run/etcd/certs/server
        - name: kube-api-access-v69js
          readOnly: true
          mountPath: /var/run/secrets/kubernetes.io/serviceaccount
      livenessProbe:
        exec:
          command:
            - /usr/bin/curl
            - '-k'
            - https://127.0.0.1:8080/ready
            - '--key'
            - /run/cortex/certs/client/tls.key
            - '--cert'
            - /run/cortex/certs/client/tls.crt
            - '--cacert'
            - /run/cortex/certs/client/ca.crt
        initialDelaySeconds: 5
        timeoutSeconds: 1
        periodSeconds: 10
        successThreshold: 1
        failureThreshold: 3
      readinessProbe:
        exec:
          command:
            - /usr/bin/curl
            - '-k'
            - https://127.0.0.1:8080/ready
            - '--key'
            - /run/cortex/certs/client/tls.key
            - '--cert'
            - /run/cortex/certs/client/tls.crt
            - '--cacert'
            - /run/cortex/certs/client/ca.crt
        initialDelaySeconds: 5
        timeoutSeconds: 1
        periodSeconds: 10
        successThreshold: 1
        failureThreshold: 3
      startupProbe:
        exec:
          command:
            - /usr/bin/curl
            - '-k'
            - https://127.0.0.1:8080/ready
            - '--key'
            - /run/cortex/certs/client/tls.key
            - '--cert'
            - /run/cortex/certs/client/tls.crt
            - '--cacert'
            - /run/cortex/certs/client/ca.crt
        initialDelaySeconds: 5
        timeoutSeconds: 1
        periodSeconds: 10
        successThreshold: 1
        failureThreshold: 3
      lifecycle:
        preStop:
          httpGet:
            path: /ingester/shutdown
            port: http-metrics
            scheme: HTTP
      terminationMessagePath: /dev/termination-log
      terminationMessagePolicy: File
      imagePullPolicy: IfNotPresent
      securityContext:
        readOnlyRootFilesystem: true
  restartPolicy: Always
  terminationGracePeriodSeconds: 600
  dnsPolicy: ClusterFirst
  serviceAccountName: cortex
  serviceAccount: cortex
  nodeName: large-k3s-pool1-554656e9-tglkx
  securityContext: {}
  hostname: cortex-ingester-0
  subdomain: cortex-ingester
  affinity:
    podAntiAffinity:
      preferredDuringSchedulingIgnoredDuringExecution:
        - weight: 100
          podAffinityTerm:
            labelSelector:
              matchExpressions:
                - key: app.kubernetes.io/component
                  operator: In
                  values:
                    - ingester
            topologyKey: kubernetes.io/hostname
  schedulerName: default-scheduler
  tolerations:
    - key: node.kubernetes.io/not-ready
      operator: Exists
      effect: NoExecute
      tolerationSeconds: 300
    - key: node.kubernetes.io/unreachable
      operator: Exists
      effect: NoExecute
      tolerationSeconds: 300
  priority: 0
  enableServiceLinks: true
  preemptionPolicy: PreemptLowerPriority