Содержание

, , , ,

Docker Swarm + Fluent Bit + Loki + Grafana

Версии:

Настройки

[[ loki_config ]]

auth_enabled: false

server:
  http_listen_port: 3100

common:
  instance_addr: 127.0.0.1
  path_prefix: /loki
  storage:
    filesystem:
      chunks_directory: /loki/chunks
      rules_directory: /loki/rules
  replication_factor: 1
  ring:
    kvstore:
      store: inmemory

schema_config:
  configs:
    - from: 2020-10-24
      store: tsdb
      object_store: filesystem
      schema: v13
      index:
        prefix: index_
        period: 24h

ruler:
  alertmanager_url: http://localhost:9093

limits_config:
  retention_period: 720h
  reject_old_samples: true
  reject_old_samples_max_age: 720h
  allow_structured_metadata: true
  max_query_length: 721h
  
ingester:
  lifecycler:
    ring:
      kvstore:
        store: inmemory
      replication_factor: 1
    final_sleep: 0s
  chunk_idle_period: 1h
  max_chunk_age: 1h
  chunk_target_size: 1048576
  chunk_retain_period: 30s

table_manager:
  retention_deletes_enabled: true
  retention_period: 720h

[[ fluent_bit_config ]]

[SERVICE]
    flush                     1
    log_level                 info
    daemon                    off
    storage.path              /var/log/flb-storage/
    storage.sync              normal
    storage.checksum          off
    storage.max_chunks_up     128
    storage.backlog.mem_limit 10M
    parsers_file              parsers.conf
    http_server               on
    http_listen               0.0.0.0
    http_port                 2020
 
[INPUT]
    Name              tail
    Path              /var/lib/docker/containers/*/*.log
    Parser            docker
    Refresh_Interval  10
    #Ignore_Older      1h
    Docker_Mode       On
    Tag               docker.<file_name>
    Tag_Regex         (?<file_name>[a-z0-9]*)-json.log
    Mem_Buf_Limit     50MB
    Skip_Long_Lines   On
    DB                /var/log/flb-storage/flb_db.db
    DB.sync           normal
    Storage.Type      filesystem
    Read_from_Head    true
 
[INPUT]
    Name              systemd
    Tag               host.*
    Systemd_Filter    _SYSTEMD_UNIT=docker.service
    DB                /var/log/flb-storage/flb_systemd.db
    Read_From_Tail    true
 
# Получаем метаданные контейнера через Lua
[FILTER]
    name                lua
    match               docker.*
    script              /fluent-bit/bin/docker-metadata.lua
    call                enrich_with_docker_metadata
 
# Для отладки скрипта
# [OUTPUT]
#     name                stdout
#     match               docker.*
#     format              json
 
# Копируем нужные поля в docker.* ДО создания data.*
[FILTER]
    name                modify
    match               docker.*
    copy                docker.hostname hostname
    copy                docker.container_started started
    copy                docker.container_name container_name
    copy                docker.container_name service_name
    copy                docker.container_id container_id
    copy                docker.state state
    copy                docker.stream stream
    copy                docker.label_project project
    copy                docker.label_service service
    copy                docker.label_logging logging
    copy                docker.label_logging_jobname logging_jobname
 
[FILTER]
    name              grep
    match             docker.*
    Exclude           docker.log ^$
    Exclude           docker.log ^\s\s+
    Exclude           docker.log ^\s*$
    Exclude           docker.log ^(\r?\n)?$
    Exclude           docker.log (.*fluent).+
    Exclude           container_name (.*fluent).+
 
# Создаем data.* ТОЛЬКО если logging = enabled
[FILTER]
    name                rewrite_tag
    match               docker.*
    rule                $logging ^enabled$ data.$container_id true
 
# Очищаем data.* - оставляем ТОЛЬКО нужные поля
[FILTER]
    name                record_modifier
    match               data.*
    whitelist_key       log
    whitelist_key       started
    whitelist_key       hostname
    whitelist_key       state
    whitelist_key       labels
    whitelist_key       container_id
    whitelist_key       container_name
    whitelist_key       service_name
    whitelist_key       project
    whitelist_key       service
    whitelist_key       logging
    whitelist_key       logging_jobname
    whitelist_key       stream
 
# Добавляем метаданные Swarm
[FILTER]
    name                modify
    match               data.*
    set                 node_id ${NODE_ID}
    set                 node_name ${NODE_NAME}
    set                 host_name ${NODE_NAME}
 
# [OUTPUT]
#     name                loki
#     match               data.*
#     host                loki
#     port                3100
#     labels              job=docker_swarm, node_name=$node_name, container_id=$container_id, container_name=$container_name, service_name=$service_name, project=$project, service=$service
#     label_keys          $node_name,$container_id,$container_name,$service_name,$project,$service
#     line_format         json
#     auto_kubernetes_labels off
 
[OUTPUT]
    name                stdout
    match               data.*
    format              json

[[ fluent_bit_parser ]]

[PARSER]
    Name        docker
    Format      json
    Time_Key    time
    Time_Format %Y-%m-%dT%H:%M:%S
    Time_Keep   On

Plugins

docker_metadata

docker-metadata.lua

-- https://github.com/fluent/fluent-bit/issues/1499 (@konstantin-kornienko)
-- A few little tweak was made to parse Docker Swarm metadata with fluent-bit (@ziwon)
DOCKER_VAR_DIR = '/var/lib/docker/containers/'
DOCKER_CONTAINER_CONFIG_FILE = '/config.v2.json'
CACHE_TTL_SEC = 300
 
-- Key-value pairs to get metadata.
DOCKER_CONTAINER_METADATA = {
  ['docker.container_name'] = '\"Name\":\"/?(.-)\"',
  ['docker.container_image'] = '\"Image\":\"/?(.-)\"',
  ['docker.container_started'] = '\"StartedAt\":\"/?(.-)\"',
  ['docker.hostname'] = '\"Hostname\":\"/?(.-)\"',
  ['docker.environment'] = '\"Env\":%[/?(.-)%]',
  ['docker.labels'] = '\"Labels\":%{/?(.-)%}',
  ['docker.state'] = '\"State\":%{/?(.-)%}',
}
 
-- Additional metadata for Swarm
DOCKER_CONTAINER_CHILD_METADATA = {
  ['docker.environment'] = '\"/?(.-)=/?(.-)\",',
  ['docker.labels'] = '\"/?(.-)\":\"/?(.-)\",',
  ['docker.state'] = '\"/?(.-)\":\"?/?(.-)\"?,',
}
 
cache = {}
 
-- Print table in a recursive way
-- https://gist.github.com/hashmal/874792
function tprint (tbl, indent)
  if not indent then indent = 0 end
  for k, v in pairs(tbl) do
    formatting = string.rep("  ", indent) .. k .. ": "
    if type(v) == "table" then
      print(formatting)
      tprint(v, indent+1)
    else
      print(formatting .. v)
    end
  end
end
 
-- Apply regular expression map to the given string
function apply_regex_map(data_tbl, reg_tbl, func, str)
  if str then
    for key, regex in pairs(reg_tbl) do
        data_tbl[key] = func(str, regex)
    end
  else
    for key, regex in pairs(reg_tbl) do
      local tbl = {}
      for k, v in func(data_tbl[key], regex) do
        tbl[k] = v
      end
      data_tbl[key] = tbl
    end
  end
  return data_tbl
end
 
-- Get container id from tag
function get_container_id_from_tag(tag)
  return tag:match'^[^%.]+%.([a-f0-9]+)$'
end
 
-- Gets metadata from config.v2.json file for container
function get_container_metadata_from_disk(container_id)
  local docker_config_file = DOCKER_VAR_DIR .. container_id .. DOCKER_CONTAINER_CONFIG_FILE
  fl = io.open(docker_config_file, 'r')
  if fl == nil then
    return nil
  end
 
  -- parse json file and create record for cache
  local data = { time = os.time() }
  local reg_match = string.match
  local reg_gmatch = string.gmatch
  for line in fl:lines() do
    data = apply_regex_map(
      data,
      DOCKER_CONTAINER_METADATA,
      reg_match,
      line
    )
    data = apply_regex_map(
      data,
      DOCKER_CONTAINER_CHILD_METADATA,
      reg_gmatch
    )
  end
  fl:close()
 
  if next(data) == nil then
    return nil
  else
    return data
  end
end
 
function set_log_level(record)
  local log_msg = record['log'] or ''
  local lower_msg = log_msg:lower()
 
  if lower_msg:find('level=error') then record['stream'] = 'error'
  elseif lower_msg:find('level=warn') then record['stream'] = 'warning'
  elseif lower_msg:find('level=info') then record['stream'] = 'info'
  elseif lower_msg:find('level=debug') then record['stream'] = 'debug'
  else record['stream'] = 'info' end
end
 
function enrich_with_docker_metadata(tag, timestamp, record)
  -- print("LUA SCRIPT CALLED! Tag: " .. (tag or "nil"))
 
  -- Get container id from tag
  container_id = get_container_id_from_tag(tag)
 
  if not container_id then
    -- print("No container_id - skipping")
    return 0, 0, 0
  end
 
  -- Add container_id to record
  new_record = record
  new_record['docker.container_id'] = container_id
 
  -- Check if we have fresh cache record for container
  local cached_data = cache[container_id]
  if cached_data == nil or ( os.time() - cached_data['time'] > CACHE_TTL_SEC) then
    cached_data = get_container_metadata_from_disk(container_id)
    cache[container_id] = cached_data
    new_record['source'] = 'disk'
  else
    new_record['source'] = 'cache'
  end
 
  -- Metadata found in cache or got from disk, enrich record
  if cached_data then
    for key, regex in pairs(DOCKER_CONTAINER_METADATA) do
      new_record[key] = cached_data[key]
    end
 
    for key, regex in pairs(DOCKER_CONTAINER_CHILD_METADATA) do
      new_record[key] = cached_data[key]
    end
  end
 
  -- Extracted labels
  if cached_data and cached_data['docker.labels'] then
    new_record['docker.label_project'] = cached_data['docker.labels']['com.docker.compose.project']
    new_record['docker.label_service'] = cached_data['docker.labels']['com.docker.compose.service']
    new_record['docker.label_logging'] = cached_data['docker.labels']['logging']
    new_record['docker.label_logging_jobname'] = cached_data['docker.labels']['logging_jobname']
  end
 
  -- Set log level based on log content
  set_log_level(new_record)
 
  -- print("Returning enriched record")
  return 1, timestamp, new_record
end

Docker Compose

monitoring

version: "3.8"

x-logging: &default-logging
  driver: "json-file"
  options:
    max-size: "10m"
    max-file: "3"
    tag: "{{.Name}}/{{.ImageName}}"

x-labels: &default-labels
  logging: "enabled"
  logging_jobname: "docker_swarm"
  monitoring: "true"

services:
  loki:
    image: grafana/loki:3.5.8
    ports:
      - "3100:3100"
    configs:
      - source: loki_config
        target: /etc/loki/local-config.yaml
    volumes:
      - loki_data:/loki
    command: -config.file=/etc/loki/local-config.yaml
    networks:
      - monitoring
    deploy:
      placement:
        constraints:
          - node.role == manager
    logging: *default-logging
    labels:
      <<: *default-labels
      service: "loki"
      component: "logging"

  fluent-bit:
    image: fluent/fluent-bit:4.2.0
    configs:
      - source: fluent_bit_config
        target: /fluent-bit/etc/fluent-bit.conf
      - source: fluent_bit_parser
        target: /fluent-bit/etc/parsers.conf
      - source: docker_metadata
        target: /fluent-bit/bin/docker-metadata.lua
    environment:
      - NODE_ID={{.Node.ID}}
      - NODE_NAME={{.Node.Hostname}}
    networks:
      - monitoring
    volumes:
      - flb_storage:/var/log/flb-storage/
      - /var/lib/docker/containers:/var/lib/docker/containers:ro
    deploy:
      mode: global
    logging: *default-logging
    labels:
      <<: *default-labels
      service: "fluent-bit"
      component: "logging"

  grafana:
    image: grafana/grafana:12.1.4
    ports:
      - "3000:3000"
    environment:
      - GF_SECURITY_ADMIN_PASSWORD=admin
      - GF_SECURITY_ADMIN_USER=admin
    volumes:
      - grafana_data:/var/lib/grafana
    networks:
      - monitoring
    deploy:
      placement:
        constraints:
          - node.role == manager
    logging: *default-logging
    labels:
      <<: *default-labels
      service: "grafana"
      component: "monitoring"

configs:
  loki_config:
    external: true
  fluent_bit_config:
    external: true
  fluent_bit_parser:
    external: true
  docker_metadata:
    external: true

networks:
  monitoring:
    driver: overlay

volumes:
  flb_storage:
    driver: local
  loki_data:
    driver: local
  grafana_data:
    driver: local

Встраивание в docker-compose

x-logging: &default-logging
  driver: "json-file"
  options:
    max-size: "10m"
    max-file: "3"
    tag: "{{.Name}}/{{.ImageName}}"

x-labels: &default-labels
  logging: "enabled"
  logging_jobname: "docker_swarm"
  monitoring: "true"

services:

  loki:
    logging: *default-logging
    labels:
      <<: *default-labels
      service: "grafana"
      component: "monitoring"