404 for grafana / prometheus requests
JonnyBeeGod opened this issue · comments
For deploying my backend (Swift Vapor application + Postgres + Traefik) I basically followed the steps in these two great tutorials -> https://dockerswarm.rocks/traefik/ and https://dockerswarm.rocks/swarmprom/
The backend works fine, however requests to Grafana or Prometheus are answered with 404:
[19/Feb/2021:05:52:42 +0000] "GET / HTTP/2.0" 404 19 "-" "-" 13 "-" "-" 0ms`
Looking at the logs of e.g. prometheus they seem to be up and running so I suspect there is a problem with the traefik routing. Also I double checked $(DOMAIN) which is correct. Help would be greatly appreciated
This is my stack:
version: '3.7'
volumes:
db_data:
letsencrypt:
networks:
backend:
internal: true
traefik-public:
external: true
configs:
traefik_config.toml:
file: ./traefik_config/traefik-tls.toml
services:
traefik:
image: "traefik:v2.4"
container_name: "traefik"
deploy:
placement:
constraints:
# Make the traefik service run only on the node with this label
# as the node with it has the volume for the certificates
- node.labels.traefik-public.traefik-public-certificates == true
command:
- --log.level=DEBUG # debug while we get it working, for more levels/info see https://docs.traefik.io/observability/logs/
- --api.insecure=true
- --providers.docker=true
- --providers.docker.swarmMode=true
- --providers.docker.exposedbydefault=false
- --entrypoints.web.address=:80
- --entrypoints.websecure.address=:443
# Add a constraint to only use services with the label "traefik.constraint-label=traefik-public"
- --providers.docker.constraints=Label(`traefik.constraint-label`, `traefik-public`)
# Create the certificate resolver "le" for Let's Encrypt, uses the environment variable EMAIL
# Use the TLS Challenge for Let's Encrypt
- --providers.file.filename=/traefik_config.toml
- --certificatesresolvers.le.acme.httpchallenge=true
- --certificatesresolvers.le.acme.httpchallenge.entrypoint=web
# - --certificatesresolvers.le.acme.caserver=https://acme-v02.api.letsencrypt.org/directory
- --certificatesresolvers.le.acme.email=${EMAIL?Variable not set}
- --certificatesresolvers.le.acme.storage=/letsencrypt/acme.json
# Enable the access log, with HTTP requests
- --accesslog
# Enable the Traefik log, for configurations and errors
- --log
ports:
- "80:80"
- "443:443"
volumes:
- letsencrypt:/letsencrypt
- "/var/run/docker.sock:/var/run/docker.sock:ro"
configs:
- traefik_config.toml
networks:
- traefik-public
app:
image: mycustomapplicationimage:0.0.6
container_name: backend
deploy:
placement:
constraints:
# Make the traefik service run only on the node with this label
# as the node with it has the volume for the certificates
- node.labels.traefik-public.traefik-public-certificates == true
labels:
- traefik.enable=true
- traefik.docker.network=traefik-public
- traefik.constraint-label=traefik-public
- traefik.http.routers.traefik-public-http.rule=Host(`example.com`, `www.example.com`)
- traefik.http.routers.traefik-public-http.entrypoints=web
- traefik.http.routers.traefik-public-http.middlewares=https-redirect
- traefik.http.routers.traefik-public-https.rule=Host(`example.com`, `www.example.com`)
- traefik.http.routers.traefik-public-https.entrypoints=websecure
- traefik.http.routers.traefik-public-https.tls=true
- traefik.http.middlewares.https-redirect.redirectscheme.scheme=https
- traefik.http.middlewares.https-redirect.redirectscheme.permanent=true
- traefik.http.routers.traefik-public-http.tls.certresolver=le
# Define the port inside of the Docker service to use
- traefik.http.services.traefik-public.loadbalancer.server.port=8081
networks:
- traefik-public
- backend
build:
context: .
environment:
DB_HOST: db
DB_NAME: "${DB_NAME}"
DB_PASS: "${DB_PASS}"
DB_PORT: "${DB_PORT}"
DB_USER: "${DB_USER}"
RSA_PRIVATE_KEY: "${RSA_PRIVATE_KEY}"
restart: always
# user: '0' # uncomment to run as root for testing purposes even though Dockerfile defines 'vapor' user.
command: ["serve", "--env", "production", "--hostname", "0.0.0.0", "--port", "8081"]
migrate:
image: mycustomapplicationimage:0.0.6
build:
context: .
environment:
DB_HOST: db
DB_NAME: "${DB_NAME}"
DB_PASS: "${DB_PASS}"
DB_PORT: "${DB_PORT}"
DB_USER: "${DB_USER}"
RSA_PRIVATE_KEY: ${RSA_PRIVATE_KEY}
networks:
- backend
command: ["migrate", "--yes"]
deploy:
replicas: 0
revert:
image: mycustomapplicationimage:0.0.6
build:
context: .
environment:
DB_HOST: db
DB_NAME: "${DB_NAME}"
DB_PASS: "${DB_PASS}"
DB_PORT: "${DB_PORT}"
DB_USER: "${DB_USER}"
RSA_PRIVATE_KEY: "${RSA_PRIVATE_KEY}"
networks:
- backend
command: ["migrate", "--revert", "--yes"]
deploy:
replicas: 0
db:
image: postgres:12-alpine
sysctls:
# NOTES: these values are needed here because docker swarm kills long running idle
# connections by default after 15 minutes see https://github.com/moby/moby/issues/31208
# info about these values are here https://tldp.org/HOWTO/TCP-Keepalive-HOWTO/usingkeepalive.html
- net.ipv4.tcp_keepalive_intvl=600
- net.ipv4.tcp_keepalive_probes=9
- net.ipv4.tcp_keepalive_time=600
restart: always
volumes:
- db_data:/var/lib/postgresql/data/pgdata
networks:
- backend
environment:
PGDATA: /var/lib/postgresql/data/pgdata
POSTGRES_USER: ${DB_USER}
POSTGRES_PASSWORD: ${DB_PASS}
POSTGRES_DB: ${DB_NAME}
and
version: "3.3"
networks:
net:
driver: overlay
attachable: true
traefik-public:
external: true
volumes:
prometheus: {}
grafana: {}
alertmanager: {}
configs:
dockerd_config:
file: ./dockerd-exporter/Caddyfile
node_rules:
file: ./prometheus/rules/swarm_node.rules.yml
task_rules:
file: ./prometheus/rules/swarm_task.rules.yml
services:
dockerd-exporter:
image: stefanprodan/caddy
networks:
- net
environment:
- DOCKER_GWBRIDGE_IP=172.18.0.1
configs:
- source: dockerd_config
target: /etc/caddy/Caddyfile
deploy:
mode: global
resources:
limits:
memory: 128M
reservations:
memory: 64M
cadvisor:
image: google/cadvisor
networks:
- net
command: -logtostderr -docker_only
volumes:
- /var/run/docker.sock:/var/run/docker.sock:ro
- /:/rootfs:ro
- /var/run:/var/run
- /sys:/sys:ro
- /var/lib/docker/:/var/lib/docker:ro
deploy:
mode: global
resources:
limits:
memory: 128M
reservations:
memory: 64M
grafana:
image: stefanprodan/swarmprom-grafana:5.3.4
networks:
- default
- net
- traefik-public
environment:
- GF_SECURITY_ADMIN_USER=${ADMIN_USER:-admin}
- GF_SECURITY_ADMIN_PASSWORD=${ADMIN_PASSWORD:-admin}
- GF_USERS_ALLOW_SIGN_UP=false
#- GF_SERVER_ROOT_URL=${GF_SERVER_ROOT_URL:-localhost}
#- GF_SMTP_ENABLED=${GF_SMTP_ENABLED:-false}
#- GF_SMTP_FROM_ADDRESS=${GF_SMTP_FROM_ADDRESS:-grafana@test.com}
#- GF_SMTP_FROM_NAME=${GF_SMTP_FROM_NAME:-Grafana}
#- GF_SMTP_HOST=${GF_SMTP_HOST:-smtp:25}
#- GF_SMTP_USER=${GF_SMTP_USER}
#- GF_SMTP_PASSWORD=${GF_SMTP_PASSWORD}
volumes:
- grafana:/var/lib/grafana
deploy:
mode: replicated
replicas: 1
placement:
constraints:
- node.role == manager
resources:
limits:
memory: 128M
reservations:
memory: 64M
labels:
- traefik.enable=true
- traefik.docker.network=traefik-public
- traefik.constraint-label=traefik-public
- traefik.http.routers.swarmprom-grafana-http.rule=Host(`grafana.${DOMAIN?Variable not set}`)
- traefik.http.routers.swarmprom-grafana-http.entrypoints=http
- traefik.http.routers.swarmprom-grafana-http.middlewares=https-redirect
- traefik.http.routers.swarmprom-grafana-https.rule=Host(`grafana.${DOMAIN?Variable not set}`)
- traefik.http.routers.swarmprom-grafana-https.entrypoints=https
- traefik.http.routers.swarmprom-grafana-https.tls=true
- traefik.http.routers.swarmprom-grafana-https.tls.certresolver=le
- traefik.http.services.swarmprom-grafana.loadbalancer.server.port=3000
alertmanager:
image: stefanprodan/swarmprom-alertmanager:v0.14.0
networks:
- default
- net
- traefik-public
environment:
- SLACK_URL=${SLACK_URL:-https://hooks.slack.com/services/TOKEN}
- SLACK_CHANNEL=${SLACK_CHANNEL:-general}
- SLACK_USER=${SLACK_USER:-alertmanager}
command:
- '--config.file=/etc/alertmanager/alertmanager.yml'
- '--storage.path=/alertmanager'
volumes:
- alertmanager:/alertmanager
deploy:
mode: replicated
replicas: 1
placement:
constraints:
- node.role == manager
resources:
limits:
memory: 128M
reservations:
memory: 64M
labels:
- traefik.enable=true
- traefik.docker.network=traefik-public
- traefik.constraint-label=traefik-public
- traefik.http.routers.swarmprom-alertmanager-http.rule=Host(`alertmanager.${DOMAIN?Variable not set}`)
- traefik.http.routers.swarmprom-alertmanager-http.entrypoints=http
- traefik.http.routers.swarmprom-alertmanager-http.middlewares=https-redirect
- traefik.http.routers.swarmprom-alertmanager-https.rule=Host(`alertmanager.${DOMAIN?Variable not set}`)
- traefik.http.routers.swarmprom-alertmanager-https.entrypoints=https
- traefik.http.routers.swarmprom-alertmanager-https.tls=true
- traefik.http.routers.swarmprom-alertmanager-https.tls.certresolver=le
- traefik.http.services.swarmprom-alertmanager.loadbalancer.server.port=9093
- traefik.http.middlewares.swarmprom-alertmanager-auth.basicauth.users=${ADMIN_USER?Variable not set}:${HASHED_PASSWORD?Variable not set}
- traefik.http.routers.swarmprom-alertmanager-https.middlewares=swarmprom-alertmanager-auth
unsee:
image: cloudflare/unsee:v0.8.0
networks:
- default
- net
- traefik-public
environment:
- "ALERTMANAGER_URIS=default:http://alertmanager:9093"
deploy:
mode: replicated
replicas: 1
labels:
- traefik.enable=true
- traefik.docker.network=traefik-public
- traefik.constraint-label=traefik-public
- traefik.http.routers.swarmprom-unsee-http.rule=Host(`unsee.${DOMAIN?Variable not set}`)
- traefik.http.routers.swarmprom-unsee-http.entrypoints=http
- traefik.http.routers.swarmprom-unsee-http.middlewares=https-redirect
- traefik.http.routers.swarmprom-unsee-https.rule=Host(`unsee.${DOMAIN?Variable not set}`)
- traefik.http.routers.swarmprom-unsee-https.entrypoints=https
- traefik.http.routers.swarmprom-unsee-https.tls=true
- traefik.http.routers.swarmprom-unsee-https.tls.certresolver=le
- traefik.http.services.swarmprom-unsee.loadbalancer.server.port=8080
- traefik.http.middlewares.swarmprom-unsee-auth.basicauth.users=${ADMIN_USER?Variable not set}:${HASHED_PASSWORD?Variable not set}
- traefik.http.routers.swarmprom-unsee-https.middlewares=swarmprom-unsee-auth
node-exporter:
image: stefanprodan/swarmprom-node-exporter:v0.16.0
networks:
- net
environment:
- NODE_ID={{.Node.ID}}
volumes:
- /proc:/host/proc:ro
- /sys:/host/sys:ro
- /:/rootfs:ro
- /etc/hostname:/etc/nodename
command:
- '--path.sysfs=/host/sys'
- '--path.procfs=/host/proc'
- '--collector.textfile.directory=/etc/node-exporter/'
- '--collector.filesystem.ignored-mount-points=^/(sys|proc|dev|host|etc)($$|/)'
- '--no-collector.ipvs'
deploy:
mode: global
resources:
limits:
memory: 128M
reservations:
memory: 64M
prometheus:
image: stefanprodan/swarmprom-prometheus:v2.5.0
networks:
- default
- net
- traefik-public
command:
- '--config.file=/etc/prometheus/prometheus.yml'
- '--storage.tsdb.path=/prometheus'
- '--storage.tsdb.retention=${PROMETHEUS_RETENTION:-24h}'
volumes:
- prometheus:/prometheus
configs:
- source: node_rules
target: /etc/prometheus/swarm_node.rules.yml
- source: task_rules
target: /etc/prometheus/swarm_task.rules.yml
deploy:
mode: replicated
replicas: 1
placement:
constraints:
- node.role == manager
resources:
limits:
memory: 2048M
reservations:
memory: 128M
labels:
- traefik.enable=true
- traefik.docker.network=traefik-public
- traefik.constraint-label=traefik-public
- traefik.http.routers.swarmprom-prometheus-http.rule=Host(`prometheus.${DOMAIN?Variable not set}`)
- traefik.http.routers.swarmprom-prometheus-http.entrypoints=http
- traefik.http.routers.swarmprom-prometheus-http.middlewares=https-redirect
- traefik.http.routers.swarmprom-prometheus-https.rule=Host(`prometheus.${DOMAIN?Variable not set}`)
- traefik.http.routers.swarmprom-prometheus-https.entrypoints=https
- traefik.http.routers.swarmprom-prometheus-https.tls=true
- traefik.http.routers.swarmprom-prometheus-https.tls.certresolver=le
- traefik.http.services.swarmprom-prometheus.loadbalancer.server.port=9090
- traefik.http.middlewares.swarmprom-prometheus-auth.basicauth.users=${ADMIN_USER?Variable not set}:${HASHED_PASSWORD?Variable not set}
- traefik.http.routers.swarmprom-prometheus-https.middlewares=swarmprom-prometheus-auth
Traefik Dashboard helped me find the error :) The entrypoints I specified in first compose file did not match those of the second. Now it works :)
Thanks for reporting back and closing the issue 👍
Sorry for the long delay! 🙈 I wanted to personally address each issue/PR and they piled up through time, but now I'm checking each one in order.