Это старая версия документа!


Обработка многострочных логов

fluent-bit.conf

[SERVICE]
    flush                     1
    log_level                 info
    daemon                    off
    storage.path              /var/log/flb-storage/
    storage.sync              normal
    storage.checksum          off
    storage.max_chunks_up     128
    storage.backlog.mem_limit 10M
    parsers_file              parsers.conf
    http_server               on
    http_listen               0.0.0.0
    http_port                 2020
 
[INPUT]
    Name              tail
    Path              /var/lib/docker/containers/*/*.log
    Parser            docker
    Refresh_Interval  10
    Docker_Mode       On
    Docker_Mode_Flush 4
    Tag               docker.<file_name>
    Tag_Regex         (?<file_name>[a-z0-9]*)-json.log
    Mem_Buf_Limit     50MB
    Skip_Long_Lines   On
    DB                /var/log/flb-storage/flb_db.db
    DB.sync           normal
    Storage.Type      filesystem
    Read_from_Head    true
 
# Метаданные Docker
[FILTER]
    name                lua
    match               docker.*
    script              /fluent-bit/bin/docker-metadata.lua
    call                enrich_with_docker_metadata
 
# Извлекаем сырой лог
[FILTER]
    name                modify
    match               docker.*
    copy                log raw_log
 
# Фильтруем пустые логи
[FILTER]
    name                grep
    match               docker.*
    Exclude             raw_log ^$
    Exclude             raw_log ^\s*$
    Exclude             raw_log ^==>.+<==$
    Exclude             raw_log ^{"message":"\s*"}$
 
# Обработка многострочных логов (стектрейсы)
[FILTER]
    Name multiline
    Match docker.*
    multiline.key_content log
    multiline.parser java, go, python, ruby, docker, cri
 
# Парсинг JSON логов GitLab
[FILTER]
    Name parser
    Match docker.*
    Key_Name log
    Parser json_auto
    Reserve_Data On
    Preserve_Key On
 
# Цепочка парсеров для разных форматов логов
[FILTER]
    name                parser
    match               docker.*
    key_name            raw_log
    parser              gitlab_json
    reserve_data        true
 
[FILTER]
    name                parser
    match               docker.*
    key_name            raw_log
    parser              sidekiq_json
    reserve_data        true
 
[FILTER]
    name                parser
    match               docker.*
    key_name            raw_log
    parser              nginx_access
    reserve_data        true
 
[FILTER]
    name                parser
    match               docker.*
    key_name            raw_log
    parser              gitlab_registry
    reserve_data        true
 
[FILTER]
    name                parser
    match               docker.*
    key_name            raw_log
    parser              mysql_error
    reserve_data        true
 
[FILTER]
    name                parser
    match               docker.*
    key_name            raw_log
    parser              mysql_slow
    reserve_data        true
 
[FILTER]
    name                parser
    match               docker.*
    key_name            raw_log
    parser              postgresql
    reserve_data        true
 
[FILTER]
    name                parser
    match               docker.*
    key_name            raw_log
    parser              postgresql_detailed
    reserve_data        true
 
[FILTER]
    name                parser
    match               docker.*
    key_name            raw_log
    parser              nextcloud_access
    reserve_data        true
    Preserve_Key        true
 
[FILTER]
    name                parser
    match               docker.*
    key_name            raw_log
    parser              apache_access
    reserve_data        true
 
[FILTER]
    name                parser
    match               docker.*
    key_name            raw_log
    parser              php_fpm
    reserve_data        true
 
[FILTER]
    name                parser
    match               docker.*
    key_name            raw_log
    parser              grafana_regex
    reserve_data        true
 
[FILTER]
    name                parser
    match               docker.*
    key_name            raw_log
    parser              java_multiline
    reserve_data        true
 
[FILTER]
    name                parser
    match               docker.*
    key_name            raw_log
    parser              python_traceback
    reserve_data        true
 
[FILTER]
    name                parser
    match               docker.*
    key_name            raw_log
    parser              nodejs_json
    reserve_data        true
 
[FILTER]
    name                parser
    match               docker.*
    key_name            raw_log
    parser              ruby_rails
    reserve_data        true
 
[FILTER]
    name                parser
    match               docker.*
    key_name            raw_log
    parser              go_stacktrace
    reserve_data        true
 
# Структуризация распарсенных полей из raw_log
[FILTER]
    name                nest
    match               docker.*
    operation           nest
    wildcard            remote_*
    wildcard            method
    wildcard            path
    wildcard            status
    wildcard            body_bytes
    wildcard            user_agent
    wildcard            referrer
    wildcard            logger
    wildcard            endpoint
    wildcard            pluginId
    wildcard            dsName
    wildcard            dsUID
    wildcard            uname
    wildcard            level
    wildcard            msg
    wildcard            statusCode
    wildcard            resourcePath
    wildcard            correlation_id
    wildcard            component
    wildcard            severity
    wildcard            grpc.*
    wildcard            exception.*
    wildcard            traceback
    wildcard            stack_trace
    nest_under          parsed_data
 
# Копируем метаданные
[FILTER]
    name                modify
    match               docker.*
    copy                docker.hostname hostname
    copy                docker.container_started started
    copy                docker.container_name container_name
    copy                docker.container_id container_id
    copy                docker.state state
    copy                docker.stream stream
    copy                log _raw
    copy                parsed_data _parsed
 
    # docker compose
    copy                docker.label_compose_project compose_project
    copy                docker.label_compose_service compose_service
 
    # docker swarm
    copy                docker.Label_stack_name stack_name
    copy                docker.Label_service_name service_name
    copy                docker.Label_service_id service_id
    copy                docker.Label_task_name task_name
    copy                docker.Label_task_id task_id
    copy                docker.Label_node_id node_id
 
    # Labels
    copy                docker.Label_logging logging
    copy                docker.Label_logging_jobname logging_jobname
 
# Структурируем через nest
[FILTER]
    name                nest
    match               docker.*
    operation           nest
    wildcard            _*
    nest_under          log
    remove_prefix       _
 
# Добавляем host metadata
[FILTER]
    name                modify
    match               docker.*
    set                 node_id ${NODE_ID}
    set                 node_name ${NODE_NAME}
    set                 host_name ${NODE_NAME}
    set                 log_source docker_swarm
    set                 environment production
 
# Перетагиваем только логи с enabled logging
[FILTER]
    name                rewrite_tag
    match               docker.*
    rule                $logging ^enabled$ data.$container_name.$TAG true
 
# Очищаем data.* - расширенный whitelist
[FILTER]
    name                record_modifier
    match               data.*
    whitelist_key       log
    whitelist_key       started
    whitelist_key       hostname
    whitelist_key       state
    whitelist_key       container_id
    whitelist_key       container_name
    whitelist_key       logging
    whitelist_key       logging_jobname
    whitelist_key       stream
    whitelist_key       node_name
    whitelist_key       host_name
    whitelist_key       log_source
    whitelist_key       environment
 
    # docker compose
    whitelist_key       compose_project
    whitelist_key       compose_service
 
    # docker swarm
    whitelist_key       stack_name
    whitelist_key       service_name
    whitelist_key       service_id
    whitelist_key       task_name
    whitelist_key       task_id
    whitelist_key       node_id
 
    # parsed data
    whitelist_key       parsed_data
 
[OUTPUT]
    name                loki
    match               data.*
    host                loki
    port                3100
    labels              job=$logging_jobname, node_name=$node_name, container_name=$container_name, service_name=$service_name, stack_name=$stack_name, environment=$environment
    label_keys          container_name,service_name,stack_name,node_name,environment
    line_format         json
    auto_kubernetes_labels off
    drop_single_key     false
    remove_keys         docker_id
 
# [OUTPUT]
#     name                stdout
#     match               data.*
#     format              json

parsers_multiline.conf

[PARSER]
    Name        docker
    Format      json
    Time_Key    time
    Time_Format %Y-%m-%dT%H:%M:%S.%LZ
    Time_Keep   On
 
[PARSER]
    Name        json_auto
    Format      json
    Time_Key    time
    Time_Format %Y-%m-%dT%H:%M:%S.%LZ
    Time_Keep   On
 
# GitLab JSON логи
[PARSER]
    Name        gitlab_json
    Format      json
    Time_Key    time
    Time_Format %Y-%m-%dT%H:%M:%S.%LZ
    Time_Keep   On
 
# Sidekiq JSON логи
[PARSER]
    Name        sidekiq_json
    Format      json
    Time_Key    time
    Time_Format %Y-%m-%dT%H:%M:%S.%LZ
    Time_Keep   On
 
# Nginx access логи
[PARSER]
    Name        nginx_access
    Format      regex
    Regex       ^(?<remote>[^ ]*) (?<host>[^ ]*) (?<user>[^ ]*) \[(?<time>[^\]]*)\] "(?<method>\S+)(?: +(?<path>[^\"]*?)(?: +\S*)?)?" (?<code>[^ ]*) (?<size>[^ ]*)(?: "(?<referer>[^\"]*)" "(?<agent>[^\"]*)")?$
    Time_Key    time
    Time_Format %d/%b/%Y:%H:%M:%S %z
    Time_Keep   On
 
# PostgreSQL логи
[PARSER]
    Name        postgresql
    Format      regex
    Regex       ^(?<time>\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2} \S+)\s+(?<pid>\d+)\s+(?<level>\w+)\s*:\s*(?<message>.*)$
    Time_Key    time
    Time_Format %Y-%m-%d %H:%M:%S %Z
    Time_Keep   On
 
# Многострочные парсеры для GitLab
[PARSER]
    Name        gitlab_multiline_firstline
    Format      regex
    Regex       ^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+Z|^{\"time\":\"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+Z
 
[MULTILINE_PARSER]
    Name          gitlab_multiline
    Type          regex
    Flush_Timeout 1000
    Rule          "start_state" "^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+Z|^{\"time\":\"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+Z" "cont"
    Rule          "cont" "^(?!\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+Z|^{\"time\":\"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+Z).*" "cont"

Установка

 

loki_config

 

Развертывание

 

Проверка работы

 
{job="fluent-bit"} |= "gitlab"
{container_name="gitlab"} 
{job="fluent-bit"} |~ "(?i)error|exception|fail"
{node_name="node-1"}
# Все логи GitLab
{container_name=~".*gitlab.*"} 
# Логи по компонентам
{container_name=~".*gitlab.*"} | json | component="gitaly.UnaryServerInterceptor"
# Ошибки
{container_name=~".*gitlab.*"} | json | level="error"
# Запросы с определенным correlation_id
{container_name=~".*gitlab.*"} | json | correlation_id="01KAJ30DCE4BW6JSAT7KHGZ9PX"
# Логи Sidekiq
{container_name=~".*gitlab.*"} | json | severity="INFO"