Это старая версия документа!


Обработка многострочных логов

fluent-bit.conf

[SERVICE]
    flush                     1
    log_level                 info
    daemon                    off
    storage.path              /var/log/flb-storage/
    storage.sync              normal
    storage.checksum          off
    storage.max_chunks_up     128
    storage.backlog.mem_limit 10M
    parsers_file              parsers.conf
    http_server               on
    http_listen               0.0.0.0
    http_port                 2020
 
[INPUT]
    Name              tail
    Path              /var/lib/docker/containers/*/*.log
    Parser            docker
    Multiline         On
    multiline.parser  docker_multiline
    Refresh_Interval  10
    Tag               docker.*
    Tag_Regex         (?<file_name>[a-f0-9]*)-json.log
    Mem_Buf_Limit     50MB
    Skip_Long_Lines   On
    DB                /var/log/flb-storage/flb_db.db
    DB.sync           normal
    Storage.Type      filesystem
    Read_from_Head    true
 
# Метаданные Docker Swarm
[FILTER]
    name                lua
    match               docker.*
    script              /fluent-bit/bin/docker-metadata.lua
    call                enrich_with_docker_metadata
 
# Основной JSON парсинг
[FILTER]
    name                parser
    match               docker.*
    key_name            log
    parser              json_auto
    reserve_data        true
    preserve_key        true
 
[FILTER]
    name                modify
    match               docker.*
    Rename              log message
    Copy                message raw_message
 
# Дополнительная многострочная обработка
[FILTER]
    name                multiline
    match               docker.*
    multiline.key_content message
    multiline.parser    stacktrace_multiline, java_exception_multiline, ruby_exception_multiline, python_exception_multiline
 
# Копируем метаданные Docker Swarm
[FILTER]
    name                modify
    match               docker.*
 
    # Базовые метаданные Docker
    copy                hostname hostname
    copy                container_started started
    copy                container_name container_name
    copy                container_id container_id
    copy                state state
    copy                stream stream
 
    # Docker Swarm метки
    copy                label_com_docker_swarm_service_name service_name
    copy                label_com_docker_swarm_task_name task_name
    copy                label_com_docker_swarm_task_id task_id
    copy                label_com_docker_swarm_node_id node_id
    copy                label_com_docker_stack_namespace stack_name
 
    # Кастомные лейблы логирования
    copy                label_logging_enabled logging_enabled
    copy                label_logging_jobname logging_jobname
 
    # Дополнительные поля
    copy                image_name image_name
    copy                image_id image_id
 
# Устанавливаем значения по умолчанию для переменных окружения
[FILTER]
    name                modify
    match               docker.*
    Set                 service_name unknown-service
    Set                 logging_jobname unknown-job
    Set                 node_name ${HOSTNAME}
    Set                 environment production
 
# Специфичные парсеры для разных сервисов
[FILTER]
    name                parser
    match               docker.*
    key_name            raw_message
    parser              gitlab_json
    reserve_data        true
 
[FILTER]
    name                parser
    match               docker.*
    key_name            raw_message
    parser              gitlab_combined
    reserve_data        true
 
[FILTER]
    name                parser
    match               docker.*
    key_name            raw_message
    parser              nextcloud_cron
    reserve_data        true
 
[FILTER]
    name                parser
    match               docker.*
    key_name            raw_message
    parser              nextcloud_stacktrace
    reserve_data        true
 
[FILTER]
    name                parser
    match               docker.*
    key_name            raw_message
    parser              nginx_access
    reserve_data        true
 
[FILTER]
    name                parser
    match               docker.*
    key_name            raw_message
    parser              gitlab_registry
    reserve_data        true
 
[FILTER]
    name                parser
    match               docker.*
    key_name            raw_message
    parser              sidekiq
    reserve_data        true
 
[FILTER]
    name                parser
    match               docker.*
    key_name            raw_message
    parser              gitaly_json
    reserve_data        true
 
# Структуризация распарсенных полей
[FILTER]
    name                nest
    match               docker.*
    operation           lift
    nested_under        parsed_data
    add_prefix          parsed_
 
[FILTER]
    name                modify
    match               docker.*
    Remove              parsed_data
 
[FILTER]
    name                nest
    match               docker.*
    operation           nest
    wildcard            parsed_*
    nest_under          parsed
 
# Копируем важные распарсенные поля
[FILTER]
    name                modify
    match               docker.*
    Copy               parsed_level level
    Copy               parsed_message log_message
    Copy               parsed_method method
    Copy               parsed_status status
    Copy               parsed_path path
    Copy               parsed_remote_addr remote_ip
    Copy               parsed_correlation_id correlation_id
    Copy               parsed_severity severity
    Copy               parsed_exception exception
    Copy               parsed_stacktrace stacktrace
 
# Перетагиваем логи с включенным логированием
[FILTER]
    name                rewrite_tag
    match               docker.*
    Rule                logging_enabled ^enabled$ logs.${logging_jobname} true
    Emitter_Name        re_emitted
 
# Очищаем данные для Loki
[FILTER]
    name                record_modifier
    match               logs.*
    Whitelist_key       message
    Whitelist_key       raw_message
    Whitelist_key       container_name
    Whitelist_key       container_id
    Whitelist_key       service_name
    Whitelist_key       task_name
    Whitelist_key       stack_name
    Whitelist_key       node_name
    Whitelist_key       environment
    Whitelist_key       logging_jobname
    Whitelist_key       parsed
    Whitelist_key       level
    Whitelist_key       log_message
    Whitelist_key       method
    Whitelist_key       status
    Whitelist_key       path
    Whitelist_key       remote_ip
    Whitelist_key       correlation_id
    Whitelist_key       severity
    Whitelist_key       exception
    Whitelist_key       stacktrace
 
# Выход в Loki с нужными лейблами
[OUTPUT]
    name                loki
    match               logs.*
    host                loki
    port                3100
    labels              job=$logging_jobname, node_name=$node_name, container_name=$container_name, service_name=$service_name, stack_name=$stack_name, environment=$environment
    line_format         json
    auto_kubernetes_labels off
    drop_single_key     false
 
# Резервный вывод для отладки
#[OUTPUT]
#    name                stdout
#    match               logs.*
#    format              json

parsers_multiline.conf

# Базовый парсер для Docker JSON логов
[PARSER]
    Name        docker
    Format      json
    Time_Key    time
    Time_Format %Y-%m-%dT%H:%M:%S.%LZ
    Time_Keep   On
 
# Многострочный парсер для Docker логов
[MULTILINE_PARSER]
    Name        docker_multiline
    Type        regex
    Rule        "start_state"  "/(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+Z).*|({\".*\"})/" "cont_state"
    Rule        "cont_state"   "/^[^{].*/" "cont_state"
    Rule        "cont_state"   "/(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+Z).*|({\".*\"})/" "start_state"
 
# Автоматический JSON парсер
[PARSER]
    Name        json_auto
    Format      json
    Time_Key    time
    Time_Format %Y-%m-%dT%H:%M:%S.%LZ
    Time_Keep   On
 
# GitLab JSON логи
[PARSER]
    Name        gitlab_json
    Format      json
    Time_Keep   On
 
# GitLab комбинированный формат
[PARSER]
    Name        gitlab_combined
    Format      regex
    Regex       ^(?<time>\d{4}-\d{2}-\d{2}_\d{2}:\d{2}:\d{2}\.\d+)\s+(?<message>.+)$
    Time_Key    time
    Time_Format %Y-%m-%d_%H:%M:%S.%L
    Time_Keep   On
 
# Nextcloud cron логи
[PARSER]
    Name        nextcloud_cron
    Format      regex
    Regex       ^-+\s*Executing Cron Tasks:\s*(?<timestamp>.+?)\s*-+(?<log_message>.*)$
    Time_Key    timestamp
    Time_Format %a %b %d %H:%M:%S %Z %Y
    Time_Keep   On
 
# Nextcloud стектрейсы и исключения
[PARSER]
    Name        nextcloud_stacktrace
    Format      regex
    Regex       ^(?<exception>[A-Z][a-zA-Z0-9_\\]+):\s+(?<message>.*)\s+in\s+(?<file>.*):(?<line>\d+)\s*$\s*Stack trace:\s*$(?<stacktrace>.*)
    Time_Keep   On
 
# Nginx access логи
[PARSER]
    Name        nginx_access
    Format      regex
    Regex       ^(?<remote_ip>[^ ]*) - - \[(?<timestamp>[^\]]*)\] "(?<method>\w+) (?<path>[^ ]*) HTTP/[0-9.]+" (?<status>\d+) (?<body_bytes>\d+) "(?<referrer>[^"]*)" "(?<user_agent>[^"]*)"
    Time_Key    timestamp
    Time_Format %d/%b/%Y:%H:%M:%S %z
    Time_Keep   On
 
# GitLab Registry
[PARSER]
    Name        gitlab_registry
    Format      regex
    Regex       ^time="(?<time>[^"]+)"\s+level=(?<level>\w+)\s+msg="(?<message>[^"]+)"(?<extra>.*)$
    Time_Key    time
    Time_Format %Y-%m-%dT%H:%M:%S.%LZ
    Time_Keep   On
 
# Sidekiq
[PARSER]
    Name        sidekiq
    Format      regex
    Regex       ^{\s*"severity"\s*:\s*"(?<severity>\w+)"\s*,\s*"time"\s*:\s*"(?<time>[^"]+)".*?"message"\s*:\s*"(?<message>[^"]*)"(?<extra>.*)$
    Time_Key    time
    Time_Format %Y-%m-%dT%H:%M:%S.%LZ
    Time_Keep   On
 
# Gitaly JSON логи
[PARSER]
    Name        gitaly_json
    Format      json
    Time_Key    time
    Time_Format %Y-%m-%dT%H:%M:%S.%LZ
    Time_Keep   On
 
# Многострочные парсеры для стектрейсов
[MULTILINE_PARSER]
    Name        stacktrace_multiline
    Type        regex
    Rule        "start_state"   "/^\w+/"  "cont_state"
    Rule        "cont_state"    "/^[\s\t]+/"  "cont_state"
    Rule        "cont_state"    "/^\w+/"  "start_state"
 
[MULTILINE_PARSER]
    Name        java_exception_multiline
    Type        regex
    Rule        "start_state"   "/^([A-Z][a-zA-Z]*Exception|Error).*|^\s+at\s+/"  "cont_state"
    Rule        "cont_state"    "/^\s+at\s+|^\s+\.\.\.\s+|^\s*Caused by:/"  "cont_state"
    Rule        "cont_state"    "/^([A-Z][a-zA-Z]*Exception|Error).*/"  "start_state"
 
[MULTILINE_PARSER]
    Name        ruby_exception_multiline
    Type        regex
    Rule        "start_state"   "/^[A-Z][a-zA-Z]*Error.*|^\s+from\s+/"  "cont_state"
    Rule        "cont_state"    "/^\s+from\s+|^\s+.*\.rb:\d+:in\s+/"  "cont_state"
    Rule        "cont_state"    "/^[A-Z][a-zA-Z]*Error.*/"  "start_state"
 
[MULTILINE_PARSER]
    Name        python_exception_multiline
    Type        regex
    Rule        "start_state"   "/^[A-Z][a-zA-Z]*Error.*|^Traceback.*|^\s+File\s+/"  "cont_state"
    Rule        "cont_state"    "/^\s+File\s+|^\s+.*\.py:\d+/"  "cont_state"
    Rule        "cont_state"    "/^[A-Z][a-zA-Z]*Error.*/"  "start_state"
 
# Парсер для PHP исключений (Nextcloud)
[PARSER]
    Name        php_exception
    Format      regex
    Regex       ^PHP\s+(?<level>\w+):\s+(?<message>.*)\s+in\s+(?<file>.*)\s+on\s+line\s+(?<line>\d+)
    Time_Keep   On
 
# Парсер для PDO/Doctrine исключений
[PARSER]
    Name        pdo_exception
    Format      regex
    Regex       ^PDOException:\s+SQLSTATE\[(?<sql_state>\w+)\]:\s+(?<message>.*)\s+in\s+(?<file>.*):(?<line>\d+)
    Time_Keep   On

Установка

 

loki_config

auth_enabled: false

server:
  http_listen_port: 3100
  grpc_listen_port: 9096

common:
  path: /loki
  storage:
    filesystem:
      chunks_directory: /loki/chunks
      rules_directory: /loki/rules
  replication_factor: 1
  ring:
    instance_addr: 127.0.0.1
    kvstore:
      store: inmemory

query_range:
  results_cache:
    cache:
      embedded_cache:
        enabled: true
        max_size_mb: 100

schema_config:
  configs:
    - from: 2020-10-24
      store: boltdb-shipper
      object_store: filesystem
      schema: v11
      index:
        prefix: index_
        period: 24h

ruler:
  alertmanager_url: http://localhost:9093

analytics:
  reporting_enabled: false

Развертывание

 

Проверка работы

# Проверяем конфигурацию
$ docker exec -it <fluentbit_container> /fluent-bit/bin/fluent-bit -c /fluent-bit/etc/fluent-bit.conf --dry-run
 
# Смотрим логи
$ docker service logs logging_fluent-bit
 
# Проверяем метрики
$ curl http://localhost:2020/api/v1/metrics | jq
 
# Тестируем парсеры
$ echo '2025-11-21T02:13:34.366Z {"method":"PUT","path":"/projects","status":500}' | \
$ docker exec -i <fluentbit_container> /fluent-bit/bin/fluent-bit -c /fluent-bit/etc/fluent-bit.conf -i stdin -o stdout
{job="fluent-bit"} |= "gitlab"
 
{container_name="gitlab"} 
 
{job="fluent-bit"} |~ "(?i)error|exception|fail"
 
{node_name="node-1"}
 
# Все логи GitLab
{container_name=~".*gitlab.*"} 
 
# Логи по компонентам
{container_name=~".*gitlab.*"} | json | component="gitaly.UnaryServerInterceptor"
 
# Ошибки
{container_name=~".*gitlab.*"} | json | level="error"
 
# Запросы с определенным correlation_id
{container_name=~".*gitlab.*"} | json | correlation_id="01KAJ30DCE4BW6JSAT7KHGZ9PX"
 
# Логи Sidekiq
{container_name=~".*gitlab.*"} | json | severity="INFO"
 
# Все логи GitLab с parsed_data
{container_name=~".*gitlab.*"} | json
 
# Логи с ошибками
{environment="production"} | json | level="error" 
 
# Медленные PostgreSQL запросы
{service_name=~".*postgres.*"} | json | duration > 1000
 
# Nginx 5xx ошибки
{container_name=~".*nginx.*"} | json | status >= 500
 
# Sidekiq логи
{container_name=~".*sidekiq.*"} | json | severity="INFO"