Cortex: switching from filesystem backend to s3 backend fails
alexandreLamarre opened this issue · comments
Cortex ingester still requires the PVC /data
to be mounted even though the backend was switched to s3
Monitoring cluster CRD
{
"enabled": true,
"revision": {
"revision": "652795"
},
"cortexWorkloads": {
"targets": {
"alertmanager": {
"replicas": 3
},
"compactor": {
"replicas": 3
},
"distributor": {
"replicas": 1
},
"ingester": {
"replicas": 3
},
"purger": {
"replicas": 1
},
"querier": {
"replicas": 3
},
"query-frontend": {
"replicas": 1
},
"ruler": {
"replicas": 3
},
"store-gateway": {
"replicas": 3
}
}
},
"cortexConfig": {
"limits": {
"ingestionRate": 600000,
"ingestionRateStrategy": "local",
"ingestionBurstSize": 1000000,
"acceptHaSamples": false,
"haClusterLabel": "cluster",
"haReplicaLabel": "__replica__",
"haMaxClusters": 0,
"maxLabelNameLength": 1024,
"maxLabelValueLength": 2048,
"maxLabelNamesPerSeries": 30,
"maxLabelsSizeBytes": 0,
"maxMetadataLength": 1024,
"rejectOldSamples": false,
"rejectOldSamplesMaxAge": "1209600s",
"creationGracePeriod": "600s",
"enforceMetadataMetricName": true,
"enforceMetricName": true,
"ingestionTenantShardSize": 0,
"maxExemplars": 0,
"maxSeriesPerQuery": 100000,
"maxSeriesPerUser": 5000000,
"maxSeriesPerMetric": 50000,
"maxGlobalSeriesPerUser": 0,
"maxGlobalSeriesPerMetric": 0,
"maxMetadataPerUser": 8000,
"maxMetadataPerMetric": 10,
"maxGlobalMetadataPerUser": 0,
"maxGlobalMetadataPerMetric": 0,
"outOfOrderTimeWindow": "0s",
"maxFetchedChunksPerQuery": 2000000,
"maxFetchedSeriesPerQuery": 0,
"maxFetchedChunkBytesPerQuery": 0,
"maxFetchedDataBytesPerQuery": 0,
"maxQueryLookback": "0s",
"maxQueryLength": "0s",
"maxQueryParallelism": 14,
"maxCacheFreshness": "60s",
"maxQueriersPerTenant": 0,
"maxOutstandingRequestsPerTenant": 100,
"rulerEvaluationDelayDuration": "0s",
"rulerTenantShardSize": 0,
"rulerMaxRulesPerRuleGroup": 0,
"rulerMaxRuleGroupsPerTenant": 0,
"storeGatewayTenantShardSize": 0,
"maxDownloadedBytesPerRequest": 0,
"compactorBlocksRetentionPeriod": "2592000s",
"compactorTenantShardSize": 0,
"alertmanagerReceiversFirewallBlockPrivateAddresses": false,
"alertmanagerNotificationRateLimit": 0,
"alertmanagerMaxConfigSizeBytes": 0,
"alertmanagerMaxTemplatesCount": 0,
"alertmanagerMaxTemplateSizeBytes": 0,
"alertmanagerMaxDispatcherAggregationGroups": 0,
"alertmanagerMaxAlertsCount": 0,
"alertmanagerMaxAlertsSizeBytes": 0
},
"runtimeConfig": {
"multiKvConfig": {},
"ingesterLimits": {}
},
"compactor": {
"blockRanges": [
"7200s",
"43200s",
"86400s"
],
"blockSyncConcurrency": 20,
"metaSyncConcurrency": 20,
"consistencyDelay": "0s",
"compactionInterval": "3600s",
"compactionRetries": 3,
"compactionConcurrency": 1,
"cleanupInterval": "900s",
"cleanupConcurrency": 20,
"deletionDelay": "43200s",
"tenantCleanupDelay": "21600s",
"skipBlocksWithOutOfOrderChunksEnabled": false,
"blockFilesConcurrency": 10,
"blocksFetchConcurrency": 3,
"blockDeletionMarksMigrationEnabled": false,
"blockVisitMarkerTimeout": "300s",
"blockVisitMarkerFileUpdateInterval": "60s",
"acceptMalformedIndex": false
},
"querier": {
"maxConcurrent": 20,
"timeout": "120s",
"iterators": false,
"batchIterators": true,
"ingesterStreaming": true,
"ingesterMetadataStreaming": false,
"maxSamples": 50000000,
"queryIngestersWithin": "0s",
"queryStoreForLabelsEnabled": false,
"perStepStatsEnabled": false,
"queryStoreAfter": "0s",
"maxQueryIntoFuture": "600s",
"defaultEvaluationInterval": "60s",
"lookbackDelta": "300s",
"shuffleShardingIngestersLookbackPeriod": "0s",
"thanosEngine": false
},
"storage": {
"backend": "s3",
"s3": {
"endpoint": "",
"region": "us-east-1",
"bucketName": "",
"secretAccessKey": "***",
"accessKeyId": ,
"insecure": false,
"signatureVersion": "v4",
"bucketLookupType": "auto",
"sse": {},
"http": {
"idleConnTimeout": "90s",
"responseHeaderTimeout": "120s",
"insecureSkipVerify": false,
"tlsHandshakeTimeout": "10s",
"expectContinueTimeout": "1s",
"maxIdleConnections": 100,
"maxIdleConnectionsPerHost": 100,
"maxConnectionsPerHost": 0
}
},
"gcs": {},
"azure": {
"maxRetries": 20,
"http": {
"idleConnTimeout": "90s",
"responseHeaderTimeout": "120s",
"insecureSkipVerify": false,
"tlsHandshakeTimeout": "10s",
"expectContinueTimeout": "1s",
"maxIdleConnections": 100,
"maxIdleConnectionsPerHost": 100,
"maxConnectionsPerHost": 0
}
},
"swift": {
"authVersion": 0,
"maxRetries": 3,
"connectTimeout": "10s",
"requestTimeout": "5s"
},
"filesystem": {}
},
"logLevel": "debug"
},
"grafana": {
"enabled": false,
"version": "latest"
}
Ingester spec
status: phase: Pending conditions: - type: Initialized status: 'True' lastProbeTime: null lastTransitionTime: '2023-10-03T14:52:05Z' - type: Ready status: 'False' lastProbeTime: null lastTransitionTime: '2023-10-03T14:52:05Z' reason: ContainersNotReady message: 'containers with unready status: [ingester]' - type: ContainersReady status: 'False' lastProbeTime: null lastTransitionTime: '2023-10-03T14:52:05Z' reason: ContainersNotReady message: 'containers with unready status: [ingester]' - type: PodScheduled status: 'True' lastProbeTime: null lastTransitionTime: '2023-10-03T14:52:05Z' hostIP: 10.0.15.181 startTime: '2023-10-03T14:52:05Z' containerStatuses: - name: ingester state: waiting: reason: ContainerCreating lastState: {} ready: false restartCount: 0 image: >- docker.io/alex7285/opni@sha256:f1a972305d475f496d10681e51fd545039027f9c04370f3bcaf43f6fa160cd83 imageID: '' started: false qosClass: BestEffort spec: volumes: - name: data persistentVolumeClaim: claimName: data-cortex-ingester-0 - name: config secret: secretName: cortex defaultMode: 420 - name: runtime-config configMap: name: cortex-runtime-config defaultMode: 420 - name: client-certs secret: secretName: cortex-serving-cert-keys items: - key: tls.crt path: tls.crt - key: tls.key path: tls.key - key: ca.crt path: ca.crt defaultMode: 420 - name: opni-gateway-client-cert secret: secretName: opni-gateway-client-cert items: - key: tls.crt path: tls.crt - key: tls.key path: tls.key - key: ca.crt path: ca.crt defaultMode: 420 - name: server-certs secret: secretName: cortex-serving-cert-keys items: - key: tls.crt path: tls.crt - key: tls.key path: tls.key - key: ca.crt path: ca.crt defaultMode: 420 - name: etcd-client-certs secret: secretName: etcd-client-cert-keys items: - key: tls.crt path: tls.crt - key: tls.key path: tls.key - key: ca.crt path: ca.crt defaultMode: 420 - name: etcd-server-cacert secret: secretName: etcd-serving-cert-keys items: - key: ca.crt path: ca.crt defaultMode: 420 - name: kube-api-access-v69js projected: sources: - serviceAccountToken: expirationSeconds: 3607 path: token - configMap: name: kube-root-ca.crt items: - key: ca.crt path: ca.crt - downwardAPI: items: - path: namespace fieldRef: apiVersion: v1 fieldPath: metadata.namespace defaultMode: 420 containers: - name: ingester image: >- docker.io/alex7285/opni@sha256:f1a972305d475f496d10681e51fd545039027f9c04370f3bcaf43f6fa160cd83 args: - cortex - '-target=ingester' - '-config.file=/etc/cortex/cortex.yaml' ports: - name: http-metrics containerPort: 8080 protocol: TCP - name: gossip containerPort: 7946 protocol: TCP - name: grpc containerPort: 9095 protocol: TCP resources: {} volumeMounts: - name: data mountPath: /data - name: config mountPath: /etc/cortex - name: runtime-config mountPath: /etc/cortex-runtime-config - name: client-certs readOnly: true mountPath: /run/cortex/certs/client - name: opni-gateway-client-cert readOnly: true mountPath: /run/gateway/certs/client - name: server-certs readOnly: true mountPath: /run/cortex/certs/server - name: etcd-client-certs readOnly: true mountPath: /run/etcd/certs/client - name: etcd-server-cacert mountPath: /run/etcd/certs/server - name: kube-api-access-v69js readOnly: true mountPath: /var/run/secrets/kubernetes.io/serviceaccount livenessProbe: exec: command: - /usr/bin/curl - '-k' - https://127.0.0.1:8080/ready - '--key' - /run/cortex/certs/client/tls.key - '--cert' - /run/cortex/certs/client/tls.crt - '--cacert' - /run/cortex/certs/client/ca.crt initialDelaySeconds: 5 timeoutSeconds: 1 periodSeconds: 10 successThreshold: 1 failureThreshold: 3 readinessProbe: exec: command: - /usr/bin/curl - '-k' - https://127.0.0.1:8080/ready - '--key' - /run/cortex/certs/client/tls.key - '--cert' - /run/cortex/certs/client/tls.crt - '--cacert' - /run/cortex/certs/client/ca.crt initialDelaySeconds: 5 timeoutSeconds: 1 periodSeconds: 10 successThreshold: 1 failureThreshold: 3 startupProbe: exec: command: - /usr/bin/curl - '-k' - https://127.0.0.1:8080/ready - '--key' - /run/cortex/certs/client/tls.key - '--cert' - /run/cortex/certs/client/tls.crt - '--cacert' - /run/cortex/certs/client/ca.crt initialDelaySeconds: 5 timeoutSeconds: 1 periodSeconds: 10 successThreshold: 1 failureThreshold: 3 lifecycle: preStop: httpGet: path: /ingester/shutdown port: http-metrics scheme: HTTP terminationMessagePath: /dev/termination-log terminationMessagePolicy: File imagePullPolicy: IfNotPresent securityContext: readOnlyRootFilesystem: true restartPolicy: Always terminationGracePeriodSeconds: 600 dnsPolicy: ClusterFirst serviceAccountName: cortex serviceAccount: cortex nodeName: large-k3s-pool1-554656e9-tglkx securityContext: {} hostname: cortex-ingester-0 subdomain: cortex-ingester affinity: podAntiAffinity: preferredDuringSchedulingIgnoredDuringExecution: - weight: 100 podAffinityTerm: labelSelector: matchExpressions: - key: app.kubernetes.io/component operator: In values: - ingester topologyKey: kubernetes.io/hostname schedulerName: default-scheduler tolerations: - key: node.kubernetes.io/not-ready operator: Exists effect: NoExecute tolerationSeconds: 300 - key: node.kubernetes.io/unreachable operator: Exists effect: NoExecute tolerationSeconds: 300 priority: 0 enableServiceLinks: true preemptionPolicy: PreemptLowerPriority