Docker Swarm + Promtail + Loki + Grafana

Fluentd, Fluentd Bit, Logstash и Promtail являются клиентами Loki, а Distributor и Ingester — компонентами Loki.

auth_enabled: false

server:
  http_listen_port: 3100

common:
  instance_addr: 127.0.0.1
  path_prefix: /loki
  storage:
    filesystem:
      chunks_directory: /loki/chunks
      rules_directory: /loki/rules
  replication_factor: 1
  ring:
    kvstore:
      store: inmemory

schema_config:
  configs:
    - from: 2020-10-24
      store: tsdb
      object_store: filesystem
      schema: v13
      index:
        prefix: index_
        period: 24h

ruler:
  alertmanager_url: http://localhost:9093

limits_config:
  retention_period: 720h
  reject_old_samples: true
  reject_old_samples_max_age: 720h
  allow_structured_metadata: true
  max_query_length: 721h
  
ingester:
  lifecycler:
    ring:
      kvstore:
        store: inmemory
      replication_factor: 1
    final_sleep: 0s
  chunk_idle_period: 1h
  max_chunk_age: 1h
  chunk_target_size: 1048576
  chunk_retain_period: 30s

table_manager:
  retention_deletes_enabled: true
  retention_period: 720h
server:
  http_listen_port: 9080
  http_listen_address: 0.0.0.0

positions:
  filename: /tmp/positions.yaml
  # Запись позиций каждые 15 секунд
  sync_period: 15s

clients:
  - url: http://loki:3100/loki/api/v1/push
    backoff_config:
      min_period: 5s
      max_period: 1m
      max_retries: 10
    batchwait: 10s
    batchsize: 1048576
    timeout: 30s

scrape_configs:

  - job_name: docker_containers
    docker_sd_configs:
    
      - host: unix:///var/run/docker.sock
        refresh_interval: 15s
        filters:
          - name: label
            values: ["logging=promtail"]

    relabel_configs:

      - target_label: job
        replacement: docker_containers
      - source_labels: ['__meta_docker_container_name']
        regex: '/(.*)'
        target_label: 'container'
        action: replace
      - source_labels: ['__meta_docker_container_id']
        target_label: 'container_id'
        action: replace
 
      # Основной источник логов - stdout/stderr контейнеров
      - source_labels: ['__meta_docker_container_name']
        regex: '/(.*)'
        target_label: 'container_name'
      - source_labels: ['__meta_docker_container_log_stream']
        target_label: 'log_stream'
        action: replace
 
      # Метаданные Docker Swarm
      - source_labels: ['__meta_docker_container_label_com_docker_swarm_service_name']
        target_label: 'service_name'
      - source_labels: ['__meta_docker_container_label_com_docker_swarm_task_name']
        target_label: 'task_name'
      - source_labels: ['__meta_docker_container_label_com_docker_swarm_node_id']
        target_label: 'node_id'
      - source_labels: ['__meta_docker_container_label_com_docker_swarm_stack_namespace']
        target_label: 'stack'
 
      # Добавляем hostname ноды
      - target_label: 'node_host'
        replacement: '${HOSTNAME}'
 
      # Путь к логам контейнера
      - source_labels: ['__meta_docker_container_id']
        target_label: '__path__'
        replacement: '/var/lib/docker/containers/*-json.log'
 
    # Пайплайн для обработки логов
    pipeline_stages:
      - docker: {}
      - timestamp:
          source: current_time
          format: RFC3339

monitoring

version: "3.8"

x-logging:
  &default-logging
  driver: "json-file"
  options:
    max-size: "1m"
    max-file: "1"
    tag: "{{.Name}}"

services:

  loki:
    image: grafana/loki:3.5.8
    ports:
      - "3100:3100"
    configs:
      - source: loki_config
        target: /etc/loki/local-config.yaml
    volumes:
      - loki_data:/loki
      - /etc/localtime:/etc/localtime:ro
      - /etc/timezone:/etc/timezone:ro
    command: 
      - -config.file=/etc/loki/local-config.yaml
      - -config.expand-env=true
      - -target=all
    environment:
      - TZ=${SYSTEM_TIMEZONE:-Europe/Moscow}
    networks:
      - monitoring
    deploy:
      placement:
        constraints:
          - node.role == manager
      resources:
        limits:
          memory: 2G  # Уменьшаем память
          cpus: '1.0'
        reservations:
          memory: 1G
          cpus: '0.5'
      restart_policy:
        condition: on-failure
        delay: 10s
        max_attempts: 3
    logging: *default-logging
    labels:
      logging: "promtail"
      logging_jobname: "docker_containers"
 

  promtail:
    image: grafana/promtail:3.5.8
    configs:
      - source: promtail_config
        target: /etc/promtail/config.yaml
    volumes:
      - /var/run/docker.sock:/var/run/docker.sock:ro
      - /var/lib/docker/containers:/var/lib/docker/containers:ro
      - /var/log:/var/log:ro
      - promtail_positions:/tmp
      - /etc/localtime:/etc/localtime:ro
      - /etc/timezone:/etc/timezone:ro
    command: 
      - -config.file=/etc/promtail/config.yaml
      - -config.expand-env=true
    environment:
      - TZ=${SYSTEM_TIMEZONE:-Europe/Moscow}
      - HOSTNAME={{.Node.Hostname}}
    networks:
      - monitoring
    deploy:
      mode: global
      resources:
        limits:
          memory: 512M
          cpus: '0.5'
        reservations:
          memory: 256M
          cpus: '0.25'
      restart_policy:
        condition: any
        delay: 30s
        max_attempts: 10
 

  grafana:
    image: grafana/grafana:12.1.4
    ports:
      - "3000:3000"
    environment:
      - TZ=${SYSTEM_TIMEZONE:-Europe/Moscow}
      # GF
      - GF_SECURITY_ADMIN_PASSWORD=admin
      - GF_SECURITY_ADMIN_USER=admin
      - GF_USERS_ALLOW_SIGN_UP=false
      - GF_AUTH_ANONYMOUS_ENABLED=true
      - GF_AUTH_ANONYMOUS_ORG_ROLE=Viewer
      # Настройки для решения проблемы блокировки БД
      - GF_DATABASE_TYPE=sqlite3
      - GF_DATABASE_PATH=grafana.db
      # - GF_DATABASE_MAX_IDLE_CONN=1
      # - GF_DATABASE_MAX_OPEN_CONN=1
      - GF_DATABASE_CONN_MAX_LIFETIME=14400
      # Отключаем функции, которые могут блокировать БД
      - GF_ALERTING_ENABLED=false
      - GF_REPORTING_ENABLED=false
      - GF_LIVE_ENABLED=false
    volumes:
      - grafana_data:/var/lib/grafana
      - /etc/localtime:/etc/localtime:ro
      - /etc/timezone:/etc/timezone:ro
    networks:
      - monitoring
    deploy:
      placement:
        constraints:
          - node.role == manager
      resources:
        limits:
          memory: 1G
          cpus: '0.5'
        reservations:
          memory: 512M
          cpus: '0.25'
      restart_policy:
        condition: on-failure
        delay: 10s
        max_attempts: 5
 

configs:
  loki_config:
    external: true
  promtail_config:
    external: true

networks:
  monitoring:
    driver: overlay
    attachable: true

volumes:
  promtail_positions:
    driver: local
  loki_data:
    driver: local
  grafana_data:
    driver: local
x-logging:
  &default-logging
  driver: "json-file"
  options:
    max-size: "1m"
    max-file: "1"
    tag: "{{.Name}}"

services:

  loki:
    logging: *default-logging
    labels:
      logging: "promtail"
      logging_jobname: "docker_containers"