Это старая версия документа!


Docker Swarm + Loki + Protmail + Grafana

  • grafana/loki:3.5.8
  • grafana/promtail:3.5.8
  • grafana/grafana:10.2.2
auth_enabled: false

server:
  http_listen_port: 3100

common:
  instance_addr: 127.0.0.1
  path_prefix: /loki
  storage:
    filesystem:
      chunks_directory: /loki/chunks
      rules_directory: /loki/rules
  replication_factor: 1
  ring:
    kvstore:
      store: inmemory

schema_config:
  configs:
    - from: 2020-10-24
      store: tsdb
      object_store: filesystem
      schema: v13
      index:
        prefix: index_
        period: 24h

ruler:
  alertmanager_url: http://localhost:9093

limits_config:
  retention_period: 720h
  reject_old_samples: true
  reject_old_samples_max_age: 720h

table_manager:
  retention_deletes_enabled: true
  retention_period: 720h
server:
  http_listen_port: 9080
  http_listen_address: 0.0.0.0

positions:
  filename: /tmp/positions.yaml
  # Запись позиций каждые 15 секунд
  sync_period: 15s

clients:
  - url: http://loki:3100/loki/api/v1/push
    backoff_config:
      min_period: 10s
      max_period: 5m
      max_retries: 50
    batchwait: 30s
    batchsize: 2097152
    timeout: 60s
    external_labels:
      cluster: docker-swarm
      host: "${HOSTNAME}"

scrape_configs:

  - job_name: system
    static_configs:
    - targets:
        - localhost
      labels:
        job: varlogs
        __path__: /var/log/*log
 
  # See https://grafana.com/docs/loki/latest/send-data/promtail/configuration/#docker_sd_config
  # Also see https://prometheus.io/docs/prometheus/latest/configuration/configuration/#dockerswarm_sd_config
  - job_name: docker_containers
    docker_sd_configs:
      - host: unix:///var/run/docker.sock
        refresh_interval: 60s
        filters:
          - name: status
            values: ["running"]
    relabel_configs:
      - target_label: job
        replacement: docker_containers
      - source_labels: [__meta_docker_container_name]
        regex: '/(.*)'
        target_label: container
        action: replace
      - source_labels: [__meta_docker_container_label_com_docker_swarm_service_name]
        target_label: service
        regex: ".+"
        action: keep
      - source_labels: [__meta_docker_container_label_com_docker_swarm_service_name]
        target_label: swarm_service_name
      - source_labels: [__meta_docker_stack_namespace]
        target_label: stack
        action: replace
      - source_labels: [__meta_docker_swarm_node_id]
        target_label: node_id
        action: replace
      - source_labels: [__meta_docker_container_label_com_docker_swarm_node_hostname]
        target_label: node
        action: replace
      - source_labels: [__meta_docker_container_log_stream]
        target_label: log_stream
        action: replace
      - source_labels: [__meta_docker_container_id]
        target_label: container_id
        action: replace
      - source_labels: [__meta_docker_container_image]
        target_label: image
        action: replace
      # Правило дропа для тестирования
      # - source_labels: [__meta_docker_container_name]
      #   regex: '(promtail|loki|grafana)'
      #   action: drop
    pipeline_stages:
      - docker: {}
      - timestamp:
          source: current_time
          format: RFC3339

monitoring

version: "3.8"

services:

  loki:
    image: grafana/loki:3.5.8
    ports:
      - "3100:3100"
    configs:
      - source: loki_config
        target: /etc/loki/local-config.yaml
    volumes:
      - loki_data:/loki
      - /etc/localtime:/etc/localtime:ro
      - /etc/timezone:/etc/timezone:ro
    command: 
      - -config.file=/etc/loki/local-config.yaml
      - -config.expand-env=true
      - -target=all
    networks:
      - monitoring
    deploy:
      placement:
        constraints:
          - node.role == manager
      resources:
        limits:
          memory: 2G  # Уменьшаем память
          cpus: '1.0'
        reservations:
          memory: 1G
          cpus: '0.5'
      restart_policy:
        condition: on-failure
        delay: 10s
        max_attempts: 3

  promtail:
    image: grafana/promtail:3.5.8
    configs:
      - source: promtail_config
        target: /etc/promtail/config.yaml
    volumes:
      - /var/run/docker.sock:/var/run/docker.sock:ro
      - /var/log:/var/log:ro
      - /var/lib/docker/containers:/var/lib/docker/containers:ro
      - /var/log/journal:/var/log/journal:ro
      - /run/systemd/journal:/run/systemd/journal:ro
      - promtail_positions:/tmp
      - /etc/localtime:/etc/localtime:ro
      - /etc/timezone:/etc/timezone:ro
    command: 
      - -config.file=/etc/promtail/config.yaml
      - -client.external-labels=host=${HOSTNAME}
      - -config.expand-env=true
      #- -log.level=info
    environment:
      - HOSTNAME={{.Node.Hostname}}
    networks:
      - monitoring
    deploy:
      mode: global
      resources:
        limits:
          memory: 512M
          cpus: '0.5'
        reservations:
          memory: 256M
          cpus: '0.25'
      restart_policy:
        condition: any
        delay: 30s
        max_attempts: 10

  grafana:
    image: grafana/grafana:10.2.2
    ports:
      - "3000:3000"
    environment:
      - GF_SECURITY_ADMIN_PASSWORD=admin
      - GF_SECURITY_ADMIN_USER=admin
      - GF_USERS_ALLOW_SIGN_UP=false
      - GF_AUTH_ANONYMOUS_ENABLED=true
      - GF_AUTH_ANONYMOUS_ORG_ROLE=Viewer
      # Настройки для решения проблемы блокировки БД
      - GF_DATABASE_TYPE=sqlite3
      - GF_DATABASE_PATH=grafana.db
      - GF_DATABASE_MAX_IDLE_CONN=1
      - GF_DATABASE_MAX_OPEN_CONN=1
      - GF_DATABASE_CONN_MAX_LIFETIME=14400
      # Отключаем функции, которые могут блокировать БД
      - GF_ALERTING_ENABLED=false
      - GF_REPORTING_ENABLED=false
      - GF_LIVE_ENABLED=false
    volumes:
      - grafana_data:/var/lib/grafana
      - /etc/localtime:/etc/localtime:ro
      - /etc/timezone:/etc/timezone:ro
    networks:
      - monitoring
    deploy:
      placement:
        constraints:
          - node.role == manager
      resources:
        limits:
          memory: 1G
          cpus: '0.5'
        reservations:
          memory: 512M
          cpus: '0.25'
      restart_policy:
        condition: on-failure
        delay: 10s
        max_attempts: 5

configs:
  loki_config:
    external: true
  promtail_config:
    external: true

networks:
  monitoring:
    driver: overlay
    attachable: true
 

volumes:
  promtail_positions:
    driver: local
  loki_data:
    driver: local
  grafana_data:
    driver: local