Это старая версия документа!


Обработка логов syslog + Docker

fluent-bit.conf

[SERVICE]
    flush                     1
    log_level                 info
    daemon                    off
    storage.path              /var/log/flb-storage/
    storage.sync              normal
    storage.checksum          off
    storage.max_chunks_up     128
    storage.backlog.mem_limit 10M
    parsers_file              parsers.conf
    http_server               on
    http_listen               0.0.0.0
    http_port                 2020
    coro_stack_size           24576
    plugins_path              /fluent-bit/bin/
 
# ==================== ASUS ROUTER SYSLOG INPUT ====================
[INPUT]
    Name              syslog
    Listen            0.0.0.0
    Port              5140
    Parser            syslog-rfc5424
    Tag               router.syslog
    Buffer_Chunk_Size 4MB
    Buffer_Max_Size   16MB
    Mode              tcp
 
# ==================== DOCKER LOGS INPUT ====================
[INPUT]
    Name              tail
    Path              /var/lib/docker/containers/*/*.log
    Parser            docker
    Refresh_Interval  5
    Ignore_Older      1h
    Docker_Mode       On
    Tag               docker.<file_name>
    Tag_Regex         (?<file_name>[a-f0-9]*)-json.log
    Mem_Buf_Limit     50MB
    Skip_Long_Lines   On
    DB                /var/log/flb-storage/flb_db.db
    DB.sync           normal
    Storage.Type      filesystem
    Read_from_Head    false
 
# Только для отладки
# [INPUT]
#     Name              tail
#     Path              /var/lib/docker/containers/*/*.log
#     Parser            docker
#     Refresh_Interval  10
#     Docker_Mode       On
#     Tag               docker.<file_name>
#     Tag_Regex         (?<file_name>[a-f0-9]*)-json.log
#     Mem_Buf_Limit     50MB
#     Skip_Long_Lines   On
#     DB                /var/log/flb-storage/flb_db.db
#     DB.sync           normal
#     Storage.Type      filesystem
#     Read_from_Head    true
 
# ==================== DOCKER EVENTS INPUT ====================
[INPUT]
    Name              docker_events
    Tag               docker.events
    Interval_Sec      1
    Docker_Mode       On
    DB                /var/log/flb-storage/docker_events.db
 
# ==================== SYSTEM METRICS INPUT ====================
[INPUT]
    Name              mem
    Tag               node.memory
    Interval_Sec      60
 
[INPUT]
    Name              cpu
    Tag               node.cpu
    Interval_Sec      60
    Interval_NSec     0
 
[INPUT]
    Name              disk
    Tag               node.disk
    Interval_Sec      300
    Dev_Name          /dev/sda1
 
# ==================== ROUTER FILTERS ====================
[FILTER]
    Name                parser
    Match               router.*
    Key_Name            message
    Parser              router_logs
    Reserve_Data        true
 
[FILTER]
    Name                record_modifier
    Match               router.*
    Record              hostname ${HOSTNAME}
    Record              device_type router
    Record              source asus_merlin
    Record              cluster docker_swarm
 
[FILTER]
    Name                modify
    Match               router.*
    Rename              host source_host
    Rename              ident facility
    Set                 log_type syslog
    Set                 environment production
 
# ==================== DOCKER FILTERS ====================
# Фильтруем пустые логи
[FILTER]
    Name                grep
    Match               docker.*
    Exclude             log ^$
    Exclude             log ^\s*$
 
# Копируем сырой лог
[FILTER]
    Name                modify
    Match               docker.*
    Copy                log raw_log
 
# Docker metadata через Lua скрипт
[FILTER]
    Name                lua
    Match               docker.*
    Script              /fluent-bit/bin/docker-metadata.lua
    Call                enrich_with_docker_metadata
 
# Добавляем Swarm метаданные
[FILTER]
    Name                modify
    Match               docker.*
    Copy                docker.container_name container_name
    Copy                docker.container_id container_id
    Copy                docker.image_name image_name
    Copy                docker.image_id image_id
    Copy                docker.command command
    Copy                docker.created created
    Copy                docker.hostname hostname
    Set                 node_id ${NODE_ID}
    Set                 node_name ${NODE_NAME}
    Set                 swarm_node_role ${SWARM_NODE_ROLE}
 
# Парсим JSON логи приложений
[FILTER]
    Name                parser
    Match               docker.*
    Key_Name            raw_log
    Parser              json
    Reserve_Data        true
 
# Отфильтровываем служебные контейнеры
[FILTER]
    Name                grep
    Match               docker.*
    Exclude             container_name ^/loki.*
    Exclude             container_name ^/fluent-bit.*
    Exclude             container_name ^/grafana.*
    Exclude             container_name ^/traefik.*
 
# Группируем логи по службам
[FILTER]
    Name                rewrite_tag
    Match               docker.*
    Rule                $logging ^enabled$ logs.$container_name true
 
# ==================== OUTPUTS ====================
# Логи роутера в Loki
[OUTPUT]
    Name                loki
    Match               router.*
    Host                loki
    Port                3100
    Labels              job=asus_router, device_type=router, source=syslog, cluster=docker_swarm
    Label_Keys          source_host,facility,severity
    Line_Format         key_value
    Auto_Kubernetes_Labels off
    Drop_Single_Key     true
    Tenant_ID           router
 
# Docker логи в Loki
[OUTPUT]
    Name                loki
    Match               logs.*
    Host                loki
    Port                3100
    Labels              job=$logging_jobname, node=$node_name, container=$container_name, image=$image_name, cluster=docker_swarm
    Label_Keys          $node_name,$container_name,$image_name,$swarm_node_role
    Line_Format         json
    Auto_Kubernetes_Labels off
    Drop_Single_Key     true
    Tenant_ID           docker
 
# Системные метрики в Loki
[OUTPUT]
    Name                loki
    Match               node.*
    Host                loki
    Port                3100
    Labels              job=node_metrics, node=$node_name, cluster=docker_swarm
    Line_Format         key_value
    Auto_Kubernetes_Labels off
 
# Docker events в Loki
[OUTPUT]
    Name                loki
    Match               docker.events
    Host                loki
    Port                3100
    Labels              job=docker_events, node=$node_name, cluster=docker_swarm
    Line_Format         json
    Auto_Kubernetes_Labels off
 
# Отладочный вывод (можно отключить)
#[OUTPUT]
#    Name                stdout
#    Match               router.*
#    Format              json_lines
 
#[OUTPUT]
#    Name                stdout
#    Match               logs.*
#    Format              json_lines

parsers.conf

[PARSER]
    Name        router_logs
    Format      regex
    Regex       ^(?<timestamp>\w+\s+\d+\s+\d+:\d+:\d+)\s+(?<host>\S+)\s+(?<process>\w+)\[(?<pid>\d+)\]:?\s+(?<message>.*)$
    Time_Key    timestamp
    Time_Format %b %d %H:%M:%S
    Time_Keep   On
    Types       pid:integer
 
[PARSER]
    Name        syslog-rfc5424
    Format      regex
    Regex       ^\<(?<pri>[0-9]{1,3})\>(?<version>[0-9])?\s?(?<timestamp>[^ ]+)\s+(?<host>[^ ]+)\s+(?<ident>[^ ]+)\s+(?<pid>[-0-9]+)\s+(?<msgid>[^ ]+)\s+(?<extradata>(\[.*?\])?)\s*(?<message>.+)$
    Time_Key    timestamp
    Time_Format %Y-%m-%dT%H:%M:%S.%L%z
    Time_Keep   On
    Types       pri:integer, version:integer, pid:integer
 
# Базовый парсер для Docker JSON логов
[PARSER]
    Name        docker
    Format      json
    Time_Key    time
    Time_Format %Y-%m-%dT%H:%M:%S.%LZ
    Time_Keep   On
 
# Nginx access логи
[PARSER]
    Name        nginx_access
    Format      regex
    Regex       ^(?<remote_ip>[^ ]*) - - \[(?<timestamp>[^\]]*)\] "(?<method>\w+) (?<path>[^ ]*) HTTP/[0-9.]+" (?<status>\d+) (?<body_bytes>\d+) "(?<referrer>[^"]*)" "(?<user_agent>[^"]*)"
    Time_Key    timestamp
    Time_Format %d/%b/%Y:%H:%M:%S %z
 
# Gitlab JSON логи (application logs)
[PARSER]
    Name        gitlab_json
    Format      json
    # Time_Key    time    # Используем время из Docker
    Time_Keep   On
 
# GitLab Registry логи
[PARSER]
    Name        gitlab_registry
    Format      regex
    Regex       ^\d{4}-\d{2}-\d{2}_\d{2}:\d{2}:\d{2}\.\d+ time="(?<time>[^"]*)" level=(?<level>\w+) msg="(?<msg>[^"]*)"(?<rest>.*)
    Time_Key    time
    Time_Format %Y-%m-%dT%H:%M:%S.%LZ
    Time_Keep   On
 
# Sidekiq JSON логи
[PARSER]
    Name        sidekiq_json
    Format      json
    # Time_Key    time    # Используем время из Docker
    Time_Keep   On
 
# Nextcloud Apache-style логи
[PARSER]
    Name        nextcloud_access
    Format      regex
    Regex       ^(?<remote_ip>[^ ]*) (?<user_ident>[^ ]*) (?<user_id>[^ ]*) \[(?<timestamp>[^\]]*)\] "(?<method>\w+) (?<path>[^ ]*) HTTP/[0-9.]+" (?<status>\d+) (?<body_bytes>\d+) "(?<referrer>[^"]*)" "(?<user_agent>[^"]*)"
    Time_Key    timestamp
    Time_Format %d/%b/%Y:%H:%M:%S %z  # ← Apache/Nginx формат!
    Time_Keep   On
 
# Apache access логи
[PARSER]
    Name        apache_access
    Format      regex
    Regex       ^(?<remote_ip>[^ ]*) - - \[(?<timestamp>[^\]]*)\] "(?<method>\w+) (?<path>[^ ]*) HTTP/[0-9.]+" (?<status>\d+) (?<body_bytes>\d+) "(?<referrer>[^"]*)" "(?<user_agent>[^"]*)"
    Time_Key    timestamp
    Time_Format %d/%b/%Y:%H:%M:%S %z
 
# MySQL error логи
[PARSER]
    Name        mysql_error
    Format      regex
    Regex       ^(?<timestamp>\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}.\d+Z) (?<level>\w+) (?<message>.*)
    Time_Key    timestamp
    Time_Format %Y-%m-%dT%H:%M:%S.%LZ
    Time_Keep   On
 
# MySQL slow query логи
[PARSER]
    Name        mysql_slow
    Format      regex
    Regex       ^# Time: (?<timestamp>\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}.\d+Z).*# User@Host: (?<user>[^\[]*)\[(?<database>[^\]]*)\] @ (?<host>[\w\.]*)\s*\[(?<ip>[\d\.]*)\].*# Query_time: (?<query_time>[\d\.]*) Lock_time: (?<lock_time>[\d\.]*) Rows_sent: (?<rows_sent>\d*) Rows_examined: (?<rows_examined>\d*).*use (?<used_database>\w*);.*SET timestamp=(?<timestamp_unix>\d*);(?<query>.*)
    Time_Key    timestamp
    Time_Format %Y-%m-%dT%H:%M:%S.%LZ
    Time_Keep   On
 
# PostgreSQL логи
[PARSER]
    Name        postgresql
    Format      regex
    Regex       ^(?<timestamp>\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}.\d+ [A-Z]{3}) \[(?<pid>\d+)\] (?<level>\w+):\s*(?<message>.*)
    Time_Key    timestamp
    Time_Format %Y-%m-%d %H:%M:%S.%L %Z
    Time_Keep   On
 
# PostgreSQL extended логи (с деталями запросов)
[PARSER]
    Name        postgresql_detailed
    Format      regex
    Regex       ^(?<timestamp>\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}.\d+ [A-Z]{3}) \[(?<pid>\d+)\] (?<level>\w+):\s*duration: (?<duration>[\d\.]*) ms\s*(?<message>.*)
    Time_Key    timestamp
    Time_Format %Y-%m-%d %H:%M:%S.%L %Z
    Time_Keep   On
 
# PHP-FPM логи
[PARSER]
    Name        php_fpm
    Format      regex
    Regex       ^\[(?<timestamp>[^\]]+)\] (?<level>\w+): (?<message>.*)
    Time_Key    timestamp
    Time_Format %d-%b-%Y %H:%M:%S
    Time_Keep   On
 
# Стандартный syslog
[PARSER]
    Name        syslog_rfc3164
    Format      regex
    Regex       ^\<(?<pri>[0-9]+)\>(?<timestamp>[^ ]* {1,2}[^ ]* [^ ]*) (?<host>[^ ]*) (?<ident>[a-zA-Z0-9_\/\.\-]*)(?:\[(?<pid>[0-9]+)\])?(?:[^\:]*\:)? (?<message>.*)$
    Time_Key    timestamp
    Time_Format %b %d %H:%M:%S
    Time_Keep   On
 
# Grafana лог формата key=value
[PARSER]
    Name        grafana_logfmt
    Format      logfmt
    Time_Key    t
    Time_Format %Y-%m-%dT%H:%M:%S.%LZ
    Time_Keep   On
 
[PARSER]
    Name        grafana_regex
    Format      regex
    Regex       logger=(?<logger>[^ ]*) endpoint=(?<endpoint>[^ ]*) pluginId=(?<pluginId>[^ ]*) dsName=(?<dsName>[^ ]*) dsUID=(?<dsUID>[^ ]*) uname=(?<uname>[^ ]*) t=(?<t>[^ ]*) level=(?<level>[^ ]*) msg="(?<msg>[^"]*)" error=(?<error>[^ ]*) statusCode=(?<statusCode>[^ ]*) resourcePath="(?<resourcePath>[^"]*)"
    Time_Key    t
    Time_Format %Y-%m-%dT%H:%M:%S.%LZ
    Time_Keep   On

loki_config

auth_enabled: false

server:
  http_listen_port: 3100

common:
  instance_addr: 127.0.0.1
  path_prefix: /loki
  storage:
    filesystem:
      chunks_directory: /loki/chunks
      rules_directory: /loki/rules
  replication_factor: 1
  ring:
    kvstore:
      store: inmemory

schema_config:
  configs:
    - from: 2020-10-24
      store: tsdb
      object_store: filesystem
      schema: v13
      index:
        prefix: index_
        period: 24h

ruler:
  alertmanager_url: http://localhost:9093

limits_config:
  retention_period: 720h
  reject_old_samples: true
  reject_old_samples_max_age: 720h
  allow_structured_metadata: true
  max_query_length: 721h
  
ingester:
  lifecycler:
    ring:
      kvstore:
        store: inmemory
      replication_factor: 1
    final_sleep: 0s
  chunk_idle_period: 1h
  max_chunk_age: 1h
  chunk_target_size: 1048576
  chunk_retain_period: 30s

table_manager:
  retention_deletes_enabled: true
  retention_period: 720h

Развертывание

version: "3.8"
 
x-logging: &default-logging
  driver: "json-file"
  options:
    max-size: "10m"
    max-file: "3"
    tag: "{{.Name}}/{{.ImageName}}"
 
x-labels: &default-labels
  logging: "enabled"
  logging_jobname: "docker_swarm"
  monitoring: "true"
 
services:
  loki:
    image: grafana/loki:3.5.8
    ports:
      - "3100:3100"
    configs:
      - source: loki_config
        target: /etc/loki/local-config.yaml
    volumes:
      - loki_data:/loki
    command: -config.file=/etc/loki/local-config.yaml
    networks:
      - monitoring
    deploy:
      placement:
        constraints:
          - node.role == manager
    logging: *default-logging
    labels:
      <<: *default-labels
      service: "loki"
      component: "logging"
 
  fluent-bit:
    image: fluent/fluent-bit:4.2.0
    configs:
      - source: fluent_bit_config
        target: /fluent-bit/etc/fluent-bit.conf
      - source: fluent_bit_parser
        target: /fluent-bit/etc/parsers.conf
      - source: docker_metadata
        target: /fluent-bit/bin/docker-metadata.lua
    environment:
      - NODE_ID={{.Node.ID}}
      - NODE_NAME={{.Node.Hostname}}
    ports:
      - "5140:5140/tcp"  # Для приема syslog от роутера
      - "2020:2020"      # HTTP мониторинг Fluent Bit
    networks:
      - monitoring
    volumes:
      - flb_storage:/var/log/flb-storage/
      - /var/lib/docker/containers:/var/lib/docker/containers:ro
    deploy:
      mode: global
 
 
  grafana:
    image: grafana/grafana:12.1.4
    ports:
      - "3000:3000"
    environment:
      - GF_SECURITY_ADMIN_PASSWORD=admin
      - GF_SECURITY_ADMIN_USER=admin
    volumes:
      - grafana_data:/var/lib/grafana
    networks:
      - monitoring
    deploy:
      placement:
        constraints:
          - node.role == manager
    logging: *default-logging
    labels:
      <<: *default-labels
      service: "grafana"
      component: "monitoring"
 
configs:
  loki_config:
    external: true
  fluent_bit_config:
    external: true
  fluent_bit_parser:
    external: true
  docker_metadata:
    external: true
 
networks:
  monitoring:
    driver: overlay
 
volumes:
  flb_storage:
    driver: local
  loki_data:
    driver: local
  grafana_data:
    driver: local

Проверка работы

# Проверяем конфигурацию
$ docker exec -it <fluentbit_container> /fluent-bit/bin/fluent-bit -c /fluent-bit/etc/fluent-bit.conf --dry-run
 
# Смотрим логи
$ docker service logs logging_fluent-bit
 
# Проверяем метрики
$ curl http://localhost:2020/api/v1/metrics | jq
 
# Тестируем парсеры
$ echo '2025-11-21T02:13:34.366Z {"method":"PUT","path":"/projects","status":500}' | \
$ docker exec -i <fluentbit_container> /fluent-bit/bin/fluent-bit -c /fluent-bit/etc/fluent-bit.conf -i stdin -o stdout
{job="fluent-bit"} |= "gitlab"
 
{container_name="gitlab"} 
 
{job="fluent-bit"} |~ "(?i)error|exception|fail"
 
{node_name="node-1"}
 
# Все логи GitLab
{container_name=~".*gitlab.*"} 
 
# Логи по компонентам
{container_name=~".*gitlab.*"} | json | component="gitaly.UnaryServerInterceptor"
 
# Ошибки
{container_name=~".*gitlab.*"} | json | level="error"
 
# Запросы с определенным correlation_id
{container_name=~".*gitlab.*"} | json | correlation_id="01KAJ30DCE4BW6JSAT7KHGZ9PX"
 
# Логи Sidekiq
{container_name=~".*gitlab.*"} | json | severity="INFO"
 
# Все логи GitLab с parsed_data
{container_name=~".*gitlab.*"} | json
 
# Логи с ошибками
{environment="production"} | json | level="error" 
 
# Медленные PostgreSQL запросы
{service_name=~".*postgres.*"} | json | duration > 1000
 
# Nginx 5xx ошибки
{container_name=~".*nginx.*"} | json | status >= 500
 
# Sidekiq логи
{container_name=~".*sidekiq.*"} | json | severity="INFO"