Loading...

Parse user agent strings into structured data based on BrowserScope data with logstash

:heavy_exclamation_mark: This post is older than a year. Consider some information might not be accurate anymore. :heavy_exclamation_mark:

The Apache HTTP Server logs user agent strings. The user agent string contains information like family, operating system, version, and device. Logstash offers a filter plugin to parse this information.

Following exemplary log entry.

66.249.76.151 - - [04/Dec/2016:02:14:01 +0100] "GET /wp/docker-behind-proxy-with-cntlm/ HTTP/1.1" 200 14386 "-" "Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.96 Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"

Use in the filter section following configuration for useragent.

input { stdin { } }
filter {
  grok {
    match => { "message" => "%{COMBINEDAPACHELOG}" }
  }
  date {
    match => [ "timestamp" , "dd/MMM/yyyy:HH:mm:ss Z" ]
  }
  geoip {
    source => "clientip"
    target => "geoip"
    database => "./GeoLite2-City.mmdb"
    add_field => [ "[geoip][coordinates]", "%{[geoip][longitude]}" ]
    add_field => [ "[geoip][coordinates]", "%{[geoip][latitude]}"  ]
  }
  mutate {
    convert => [ "[geoip][coordinates]", "float"]
    add_field => { "dnsname" => "%{clientip}" }
  }
  dns {
    reverse => [ "dnsname" ]
    action => "replace"
  }
  useragent {
    source => "agent"
  }
}
output { stdout { codec => "rubydebug" } }

You will receive this output

{
        "request" => "/wp/docker-behind-proxy-with-cntlm/",
          "agent" => "\"Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.96 Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)\"",
          "minor" => "1",
           "auth" => "-",
          "ident" => "-",
       "os_minor" => "0",
       "os_major" => "6",
          "major" => "2",
       "clientip" => "66.249.76.151",
       "@version" => "1",
           "host" => "omega",
        "dnsname" => "crawl-66-249-76-151.googlebot.com",
      "timestamp" => "04/Dec/2016:02:14:01 +0100",
          "geoip" => {
              "timezone" => "America/Los_Angeles",
                    "ip" => "66.249.76.151",
              "latitude" => 37.419200000000004,
           "coordinates" => [
            [0] -122.0574,
            [1] 37.419200000000004
        ],
        "continent_code" => "NA",
             "city_name" => "Mountain View",
         "country_code2" => "US",
          "country_name" => "United States",
              "dma_code" => 807,
         "country_code3" => "US",
           "region_name" => "California",
              "location" => [
            [0] -122.0574,
            [1] 37.419200000000004
        ],
           "postal_code" => "94043",
             "longitude" => -122.0574,
           "region_code" => "CA"
    },
             "os" => "Android 6.0.1",
           "verb" => "GET",
        "message" => "66.249.76.151 - - [04/Dec/2016:02:14:01 +0100] \"GET /wp/docker-behind-proxy-with-cntlm/ HTTP/1.1\" 200 14386 \"-\" \"Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.96 Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)\"",
       "referrer" => "\"-\"",
     "@timestamp" => 2016-12-04T01:14:01.000Z,
       "response" => "200",
          "bytes" => "14386",
           "name" => "Googlebot",
        "os_name" => "Android",
    "httpversion" => "1.1",
         "device" => "Spider"
}
Please remember the terms for blog comments.