Это старая версия документа!


Обработка многострочных логов

fluent-bit.conf

[SERVICE]
    flush                     1
    log_level                 info
    daemon                    off
    storage.path              /var/log/flb-storage/
    storage.sync              normal
    storage.checksum          off
    storage.max_chunks_up     128
    storage.backlog.mem_limit 10M
    parsers_file              parsers.conf
    http_server               on
    http_listen               0.0.0.0
    http_port                 2020
 
[INPUT]
    Name              tail
    Path              /var/lib/docker/containers/*/*.log
    Parser            docker
    Refresh_Interval  10
    #Ignore_Older      1h
    Docker_Mode       On
    Docker_Mode_Flush 4
    Tag               docker.<file_name>
    Tag_Regex         (?<file_name>[a-f0-9]*)-json.log
    Mem_Buf_Limit     50MB
    Skip_Long_Lines   On
    DB                /var/log/flb-storage/flb_db.db
    DB.sync           normal
    Storage.Type      filesystem
    Read_from_Head    true
 
# Метаданные Docker
[FILTER]
    name                lua
    match               docker.*
    script              /fluent-bit/bin/docker-metadata.lua
    call                enrich_with_docker_metadata
 
# Извлекаем сырой лог
[FILTER]
    name                modify
    match               docker.*
    copy                log raw_log
 
# Фильтруем пустые логи
[FILTER]
    name                grep
    match               docker.*
    Exclude             raw_log ^$
    Exclude             raw_log ^\s*$
    Exclude             raw_log ^==>.+<==$
    Exclude             raw_log ^{"message":"\s*"}$
 
# Основной JSON парсинг для Docker логов
[FILTER]
    name                parser
    match               docker.*
    key_name            log
    parser              json_auto
    reserve_data        On
    preserve_key        On
 
# Цепочка специализированных парсеров для raw_log
[FILTER]
    name                parser
    match               docker.*
    key_name            raw_log
    parser              json_auto
    reserve_data        true
 
[FILTER]
    name                parser
    match               docker.*
    key_name            raw_log
    parser              gitlab_json
    reserve_data        true
 
[FILTER]
    name                parser
    match               docker.*
    key_name            raw_log
    parser              gitlab_registry
    reserve_data        true
 
[FILTER]
    name                parser
    match               docker.*
    key_name            raw_log
    parser              sidekiq_json
    reserve_data        true
 
[FILTER]
    name                parser
    match               docker.*
    key_name            raw_log
    parser              mysql_error
    reserve_data        true
 
[FILTER]
    name                parser
    match               docker.*
    key_name            raw_log
    parser              mysql_slow
    reserve_data        true
 
[FILTER]
    name                parser
    match               docker.*
    key_name            raw_log
    parser              postgresql
    reserve_data        true
 
[FILTER]
    name                parser
    match               docker.*
    key_name            raw_log
    parser              postgresql_detailed
    reserve_data        true
 
[FILTER]
    name                parser
    match               docker.*
    key_name            raw_log
    parser              nextcloud_access
    reserve_data        true
    Preserve_Key        true
 
[FILTER]
    name                parser
    match               docker.*
    key_name            raw_log
    parser              nginx_access
    reserve_data        true
 
[FILTER]
    name                parser
    match               docker.*
    key_name            raw_log
    parser              apache_access
    reserve_data        true
 
[FILTER]
    name                parser
    match               docker.*
    key_name            raw_log
    parser              php_fpm
    reserve_data        true
 
[FILTER]
    name                parser
    match               docker.*
    key_name            raw_log
    parser              grafana_regex
    reserve_data        true
 
# Структуризация распарсенных полей из raw_log
[FILTER]
    name                nest
    match               docker.*
    operation           nest
    wildcard            remote_*
    wildcard            method
    wildcard            path
    wildcard            status
    wildcard            body_bytes
    wildcard            user_agent
    wildcard            referrer
    wildcard            logger
    wildcard            endpoint
    wildcard            pluginId
    wildcard            dsName
    wildcard            dsUID
    wildcard            uname
    wildcard            level
    wildcard            msg
    wildcard            statusCode
    wildcard            resourcePath
    wildcard            correlation_id
    wildcard            component
    wildcard            severity
    wildcard            grpc.*
    nest_under          parsed_data
 
# Копируем метаданные Docker
[FILTER]
    name                modify
    match               docker.*
    copy                docker.hostname hostname
    copy                docker.container_started started
    copy                docker.container_name container_name
    copy                docker.container_id container_id
    copy                docker.state state
    copy                docker.stream stream
    copy                log _raw
    copy                parsed_data _parsed
 
    # docker compose
    copy                docker.label_compose_project compose_project
    copy                docker.label_compose_service compose_service
 
    # docker swarm
    copy                docker.Label_stack_name stack_name
    copy                docker.Label_service_name service_name
    copy                docker.Label_service_id service_id
    copy                docker.Label_task_name task_name
    copy                docker.Label_task_id task_id
    copy                docker.Label_node_id node_id
 
    # Labels
    copy                docker.Label_logging logging
    copy                docker.Label_logging_jobname logging_jobname
 
# Структурируем через nest
[FILTER]
    name                nest
    match               docker.*
    operation           nest
    wildcard            _*
    nest_under          log
    remove_prefix       _
 
# Добавляем host metadata
[FILTER]
    name                modify
    match               docker.*
    set                 node_id ${NODE_ID}
    set                 node_name ${NODE_NAME}
    set                 host_name ${NODE_NAME}
 
# Перетагиваем только логи с enabled logging
[FILTER]
    name                rewrite_tag
    match               docker.*
    rule                $logging ^enabled$ data.$container_id true
 
# Очищаем data.* - расширенный whitelist
[FILTER]
    name                record_modifier
    match               data.*
    whitelist_key       log
    whitelist_key       started
    whitelist_key       hostname
    whitelist_key       state
    whitelist_key       labels
    whitelist_key       container_id
    whitelist_key       container_name
    whitelist_key       project
    whitelist_key       service
    whitelist_key       logging
    whitelist_key       logging_jobname
    whitelist_key       stream
    whitelist_key       node_name
    whitelist_key       host_name
 
    # docker compose
    whitelist_key       compose_project
    whitelist_key       compose_service
 
    # docker swarm
    whitelist_key       stack_name
    whitelist_key       service_name
    whitelist_key       service_id
    whitelist_key       task_name
    whitelist_key       task_id
    whitelist_key       node_id
 
[OUTPUT]
    name                loki
    match               data.*
    host                loki
    port                3100
    labels              job=$logging_jobname, node_name=$node_name, container_id=$container_id, container_name=$container_name, service_name=$service_name, project=$project, service=$service, level=$stream, logging_jobname=$logging_jobname
    label_keys          $node_name,$container_id,$container_name,$service_name,$project,$service,$logging_jobname
    line_format         json
    auto_kubernetes_labels off
    drop_single_key     false
    remove_keys         docker_id
 
# OUTPUT для отладки
# [OUTPUT]
#     name                stdout
#     match               data.*
#     format              json

parsers_multiline.conf

# Базовый парсер для Docker JSON логов
[PARSER]
    Name        docker
    Format      json
    Time_Key    time
    Time_Format %Y-%m-%dT%H:%M:%S.%LZ
    Time_Keep   On
 
# Автоматический JSON парсер
[PARSER]
    Name        json_auto
    Format      json
    Time_Key    time
    Time_Format %Y-%m-%dT%H:%M:%S.%LZ
    Time_Keep   On
 
# Nginx access логи
[PARSER]
    Name        nginx_access
    Format      regex
    Regex       ^(?<remote_ip>[^ ]*) - - \[(?<timestamp>[^\]]*)\] "(?<method>\w+) (?<path>[^ ]*) HTTP/[0-9.]+" (?<status>\d+) (?<body_bytes>\d+) "(?<referrer>[^"]*)" "(?<user_agent>[^"]*)"
    Time_Key    timestamp
    Time_Format %d/%b/%Y:%H:%M:%S %z
    Time_Keep   On
 
# Gitlab JSON логи (application logs)
[PARSER]
    Name        gitlab_json
    Format      json
    Time_Keep   On
 
# GitLab Registry логи
[PARSER]
    Name        gitlab_registry
    Format      regex
    Regex       ^\d{4}-\d{2}-\d{2}_\d{2}:\d{2}:\d{2}\.\d+ time="(?<time>[^"]*)" level=(?<level>\w+) msg="(?<msg>[^"]*)"(?<rest>.*)
    Time_Key    time
    Time_Format %Y-%m-%dT%H:%M:%S.%LZ
    Time_Keep   On
 
# Sidekiq JSON логи
[PARSER]
    Name        sidekiq_json
    Format      json
    Time_Keep   On
 
# Nextcloud Apache-style логи
[PARSER]
    Name        nextcloud_access
    Format      regex
    Regex       ^(?<remote_ip>[^ ]*) (?<user_ident>[^ ]*) (?<user_id>[^ ]*) \[(?<timestamp>[^\]]*)\] "(?<method>\w+) (?<path>[^ ]*) HTTP/[0-9.]+" (?<status>\d+) (?<body_bytes>\d+) "(?<referrer>[^"]*)" "(?<user_agent>[^"]*)"
    Time_Key    timestamp
    Time_Format %d/%b/%Y:%H:%M:%S %z
    Time_Keep   On
 
# Apache access логи
[PARSER]
    Name        apache_access
    Format      regex
    Regex       ^(?<remote_ip>[^ ]*) - - \[(?<timestamp>[^\]]*)\] "(?<method>\w+) (?<path>[^ ]*) HTTP/[0-9.]+" (?<status>\d+) (?<body_bytes>\d+) "(?<referrer>[^"]*)" "(?<user_agent>[^"]*)"
    Time_Key    timestamp
    Time_Format %d/%b/%Y:%H:%M:%S %z
    Time_Keep   On
 
# MySQL error логи
[PARSER]
    Name        mysql_error
    Format      regex
    Regex       ^(?<timestamp>\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}.\d+Z) (?<level>\w+) (?<message>.*)
    Time_Key    timestamp
    Time_Format %Y-%m-%dT%H:%M:%S.%LZ
    Time_Keep   On
 
# MySQL slow query логи
[PARSER]
    Name        mysql_slow
    Format      regex
    Regex       ^# Time: (?<timestamp>\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}.\d+Z).*# User@Host: (?<user>[^\[]*)\[(?<database>[^\]]*)\] @ (?<host>[\w\.]*)\s*\[(?<ip>[\d\.]*)\].*# Query_time: (?<query_time>[\d\.]*) Lock_time: (?<lock_time>[\d\.]*) Rows_sent: (?<rows_sent>\d*) Rows_examined: (?<rows_examined>\d*).*use (?<used_database>\w*);.*SET timestamp=(?<timestamp_unix>\d*);(?<query>.*)
    Time_Key    timestamp
    Time_Format %Y-%m-%dT%H:%M:%S.%LZ
    Time_Keep   On
 
# PostgreSQL логи
[PARSER]
    Name        postgresql
    Format      regex
    Regex       ^(?<timestamp>\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}.\d+ [A-Z]{3}) \[(?<pid>\d+)\] (?<level>\w+):\s*(?<message>.*)
    Time_Key    timestamp
    Time_Format %Y-%m-%d %H:%M:%S.%L %Z
    Time_Keep   On
 
# PostgreSQL extended логи (с деталями запросов)
[PARSER]
    Name        postgresql_detailed
    Format      regex
    Regex       ^(?<timestamp>\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}.\d+ [A-Z]{3}) \[(?<pid>\d+)\] (?<level>\w+):\s*duration: (?<duration>[\d\.]*) ms\s*(?<message>.*)
    Time_Key    timestamp
    Time_Format %Y-%m-%d %H:%M:%S.%L %Z
    Time_Keep   On
 
# PHP-FPM логи
[PARSER]
    Name        php_fpm
    Format      regex
    Regex       ^\[(?<timestamp>[^\]]+)\] (?<level>\w+): (?<message>.*)
    Time_Key    timestamp
    Time_Format %d-%b-%Y %H:%M:%S
    Time_Keep   On
 
# Стандартный syslog
[PARSER]
    Name        syslog_rfc3164
    Format      regex
    Regex       ^\<(?<pri>[0-9]+)\>(?<timestamp>[^ ]* {1,2}[^ ]* [^ ]*) (?<host>[^ ]*) (?<ident>[a-zA-Z0-9_\/\.\-]*)(?:\[(?<pid>[0-9]+)\])?(?:[^\:]*\:)? (?<message>.*)$
    Time_Key    timestamp
    Time_Format %b %d %H:%M:%S
    Time_Keep   On
 
# Grafana лог формата key=value
[PARSER]
    Name        grafana_logfmt
    Format      logfmt
    Time_Key    t
    Time_Format %Y-%m-%dT%H:%M:%S.%LZ
    Time_Keep   On
 
[PARSER]
    Name        grafana_regex
    Format      regex
    Regex       logger=(?<logger>[^ ]*) endpoint=(?<endpoint>[^ ]*) pluginId=(?<pluginId>[^ ]*) dsName=(?<dsName>[^ ]*) dsUID=(?<dsUID>[^ ]*) uname=(?<uname>[^ ]*) t=(?<t>[^ ]*) level=(?<level>[^ ]*) msg="(?<msg>[^"]*)" error=(?<error>[^ ]*) statusCode=(?<statusCode>[^ ]*) resourcePath="(?<resourcePath>[^"]*)"
    Time_Key    t
    Time_Format %Y-%m-%dT%H:%M:%S.%LZ
    Time_Keep   On
 
# Ruby/Rails логи (GitLab)
[PARSER]
    Name        ruby_multiline
    Format      regex
    Regex       ^(?<time>\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+Z) \[(?<pid>\d+)\] (?<level>\w+) -- : (?<message>.*)
    Time_Key    time
    Time_Format %Y-%m-%dT%H:%M:%S.%LZ
    Time_Keep   On
 
# GitLab Rails логи (JSON multiline)
[PARSER]
    Name        gitlab_rails_multiline
    Format      regex
    Regex       ^(?<time>\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+Z) .*?"@timestamp":"(?<json_time>[^"]*)".*?"level":"(?<level>[^"]*)".*?"message":"(?<message>[^"]*)"
    Time_Key    time
    Time_Format %Y-%m-%dT%H:%M:%S.%LZ
    Time_Keep   On
 
# Sidekiq multiline логи
[PARSER]
    Name        sidekiq_multiline
    Format      regex
    Regex       ^(?<time>\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+Z) (?<pid>\d+) TID-(?<tid>\w+) (?<level>\w+): (?<message>.*)
    Time_Key    time
    Time_Format %Y-%m-%dT%H:%M:%S.%LZ
    Time_Keep   On
 
# Multiline Java/Spring логи
[PARSER]
    Name        java_multiline
    Format      regex
    Regex       ^(?<time>\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d+) (?<level>\w+)\s+--- \[(?<thread>[^\]]+)\] (?<class>\S+)\s*:(?<message>.*)
    Time_Key    time
    Time_Format %Y-%m-%d %H:%M:%S.%L
    Time_Keep   On
 
# Multiline Python логи
[PARSER]
    Name        python_multiline
    Format      regex
    Regex       ^(?<time>\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d+) - (?<name>\w+) - (?<level>\w+) - (?<message>.*)
    Time_Key    time
    Time_Format %Y-%m-%d %H:%M:%S,%L
    Time_Keep   On
 
# Multiline стектрейсы
[PARSER]
    Name        stacktrace_multiline
    Format      regex
    Regex       ^(?<timestamp>\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d+)\s+(?<level>\w+)\s+(?<message>.*)
    Time_Key    timestamp
    Time_Format %Y-%m-%d %H:%M:%S.%L
    Time_Keep   On
 
# Дополнительный парсер для Go логов
[PARSER]
    Name        go_multiline
    Format      regex
    Regex       ^(?<time>\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+Z)\s+(?<level>\w+)\s+(?<message>.*)
    Time_Key    time
    Time_Format %Y-%m-%dT%H:%M:%S.%LZ
    Time_Keep   On

Установка

 

loki_config

 

Развертывание

 

Проверка работы

# Проверяем конфигурацию
$ docker exec -it <fluentbit_container> /fluent-bit/bin/fluent-bit -c /fluent-bit/etc/fluent-bit.conf --dry-run
 
# Смотрим логи
$ docker service logs logging_fluent-bit
 
# Проверяем метрики
$ curl http://localhost:2020/api/v1/metrics | jq
 
# Тестируем парсеры
$ echo '2025-11-21T02:13:34.366Z {"method":"PUT","path":"/projects","status":500}' | \
$ docker exec -i <fluentbit_container> /fluent-bit/bin/fluent-bit -c /fluent-bit/etc/fluent-bit.conf -i stdin -o stdout
{job="fluent-bit"} |= "gitlab"
 
{container_name="gitlab"} 
 
{job="fluent-bit"} |~ "(?i)error|exception|fail"
 
{node_name="node-1"}
 
# Все логи GitLab
{container_name=~".*gitlab.*"} 
 
# Логи по компонентам
{container_name=~".*gitlab.*"} | json | component="gitaly.UnaryServerInterceptor"
 
# Ошибки
{container_name=~".*gitlab.*"} | json | level="error"
 
# Запросы с определенным correlation_id
{container_name=~".*gitlab.*"} | json | correlation_id="01KAJ30DCE4BW6JSAT7KHGZ9PX"
 
# Логи Sidekiq
{container_name=~".*gitlab.*"} | json | severity="INFO"
 
# Все логи GitLab с parsed_data
{container_name=~".*gitlab.*"} | json
 
# Логи с ошибками
{environment="production"} | json | level="error" 
 
# Медленные PostgreSQL запросы
{service_name=~".*postgres.*"} | json | duration > 1000
 
# Nginx 5xx ошибки
{container_name=~".*nginx.*"} | json | status >= 500
 
# Sidekiq логи
{container_name=~".*sidekiq.*"} | json | severity="INFO"