Regex Support

Some sample regular expressions to parse and preview the contents of common types of source LOG files.

When defining object groups, after you select the files to index, ChaosSearch analyzes the file to identify its type, compression (if any), and the fields within the file for indexing. For LOG format files, ChaosSearch uses regular expressions to detect the fields to be indexed. ChaosSearch has default patterns for many common types of log files. If you have new or custom applications with unique log files, you can specify the regex string to use for parsing and indexing the fields.

Within the create object group workflow, you can use the Validate link next to Formatted Preview to confirm that the regex (either a default, or your own) can parse the contents of the log file.

If the regex value is not complete, Validate displays a message (Regex is incomplete; Please use the regex editor to make sure the whole line is matched.) to inform you that the regex needs to be edited.

The following sections offer some examples of regular expressions that have been helpful for object groups that index log files from commonly observed vendors, services, and formats. The patterns might need some customization for your log files.

πŸ“˜

Regex Supported in the UI is Java Regex

For ChaosSearch to parse values correctly, your regex must escape any backslash (\) characters using the double-backslash\\ convention. The examples in this topic use this convention.

AWS Logs

Regex Examples for AWS CloudFront Logs

^[^#\\n](?<date>(?:(?!\\t).)*)\\t(?<time>(?:(?!\\t).)*)\\t(?<edge_location>(?:(?!\\t).)*)\\t(?<sc_bytes>(?:(?!\\t).)*)\\t(?<c_ip>(?:(?!\\t).)*)\\t(?<cs_method>(?:(?!\\t).)*)\\t(?<cs_host>(?:(?!\\t).)*)\\t(?<cs_uri_stem>(?:(?!\\t).)*)\\t(?<sc_status>(?:(?!\\t).)*)\\t(?<referer>(?:(?!\\t).)*)\\t(?<user_agent>(?:(?!\\t).)*)\\t(?<uri_query>(?:(?!\\t).)*)\\t(?<cookie>(?:(?!\\t).)*)\\t(?<edge_result_type>(?:(?!\\t).)*)\\t(?<edge_request_id>(?:(?!\\t).)*)\\t(?<host_header>(?:(?!\\t).)*)\\t(?<protocol>(?:(?!\\t).)*)\\t(?<bytes>(?:(?!\\t).)*)\\t(?<time_taken>(?:(?!\\t).)*)\\t(?<forwarded_for>(?:(?!\\t).)*)\\t(?<ssl_protocol>(?:(?!\\t).)*)\\t(?<ssl_cipher>(?:(?!\\t).)*)\\t(?<edge_response_result_type>(?:(?!\\t).)*)\\t(?<protocol_version>(?:(?!\\t).)*)\\t(?<fle_status>(?:(?!\\t).)*)\\t(?<fle_encrypted_fields>(?:(?!\\t).)*)
^(?:(?:#[^\n]+)|(?:(?<date>[^\\t]+)\\t(?<time>[^\\t]+)\\t(?<edge_location>[^\\t]+)\\t(?<sc_bytes>[^\\t]+)\\t(?<c_ip>[^\\t]+)\\t(?<cs_method>[^\\t]+)\\t(?<cs_host>[^\\t]+)\\t(?<cs_uri_stem>[^\\t]+)\\t(?<cs_status>[^\\t]+)\\t(?<referer>[^\\t]+)\\t(?<user_agent>[^\\t]+)\\t(?<uri_query>[^\\t]+)\\t(?<cookie>[^\\t]+)\\t(?<edge_result_type>[^\\t]+)\\t(?<edge_result_id>[^\\t]+)\\t(?<host_header>[^\\t]+)\\t(?<protocol>[^\\t]+)\\t(?<bytes>[^\\t]+)\\t(?<time_taken>[^\\t]+)\\t(?<forwarded_for>[^\\t]+)\\t(?<ssl_protocol>[^\\t]+)\\t(?<ssl_cipher>[^\\t]+)\\t(?<edge_response_result_type>[^\\t]+)\\t(?<protocol_version>[^\\t]+)\\t(?<fle_status>[^\\t]+)\\t(?<fle_encrypted_fields>[^\n]+)))
(?:#\\V+\\n)*(?<datetime>\\d{4}.\\d{2}.\\d{2}\\t\\d{2}.\\d{2}.\\d{2})\\t(?<x_edge_location>\\S+)\\t(?<sc_bytes>\\d+)\\t(?<c_ip>\\d+.\\d+.\\d+.\\d+)\\t(?<cs_method>\\S+)\\t(?<cs_Host>\\S+)\\t(?<cs_uri_stem>\\S+)\\t(?<cs_status>\\d+)\\t(?<cs_referer>\\S+)\\t(?<cs_User_Agent>\\S+)\\t(?<cs_uri_query>\\S+)\\t(?<cs_Cookie>\\S+)\\t(?<x_edge_result_type>\\S+)\\t(?<x_edge_request_id>\\S+)\\t(?<x_host_header>\\S+)\\t(?<cs_protocol>\\S+)\\t(?<cs_bytes>\\S+)\\t(?<time_taken>\\d+?.\\d+?)\\t(?<x_forwarded_for>\\S+)\\t(?<ssl_protocol>\\S+)\\t(?<ssl_cipher>\\S+)\\t(?<x_edge_response_result_type>\\S+)\\t(?<cs_protocol_version>\\S+)\\t(?<fle_status>\\S+)\\t(?<fle_encrypted_fields>\\S+)\\t(?<c_port>\\d+)\\t(?<time_to_first_byte>\\d+?.\\d+?)\\t(?<x_edge_detailed_result_type>\\S+)\\t(?<sc_content_type>\\S+)\\t(?<sc_content_len>\\S+)\\t(?<sc_range_start>\\S+)\\t(?<sc_range_end>\\S+)

Regex for AWS S3 Bucket Logs

^(?<bucketOwner>[0-9a-zA-Z]+) (?<bucket>[^ ]+) \\D(?<timestamp>[^ ]+) (?>\\D\\d{1,}.+?) (?<remoteIP>[^ ]+) (?<requester>[^ ]+) (?<requestID>[^ ]+) (?<operation>[^ ]+) (?<key>[^ ]+) (?<requestUri>[^ ].+?\") (?<httpStatus>[^ ]+) (?<errorCode>[^ ]+) (?<bytesSent>[^ ]+) (?<totalTime>[^ ]+) (?<objectSize>[^ ]+) (?<turnAroundTime>[^ ]+) (?<referrer>[^ ]+) (?<userAgent>[^ ].+?\") (?<versionId>[^ ]+) (?<hostId>[^ ]+) (?<signatureVersion>[^ ]+) (?<cipherSuite>[^ ]+) (?<authenticationType>[^ ]+) (?<hostHeader>[^ ]+) (?<tlsVersion>[^ ]+)

Regex for AWS Route 53 DNS Logs

^(?<version>[^ ]*) (?<timestamp>[^ ]*) (?<hosted_zone_id>[^ ]*) (?<query_name>[^ ]*) (?<query_type>[^ ]*) (?<response_code>[^ ]*) (?<protocol>[^ ]*) (?<edge_location>[^ ]*) (?<resolver_ip_address>[^ ]*) (?<client_subnet>[^ ]*)

Regex for AWS ELB Logs

^(?<type>[^ ]+) (?<timestamp>\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}\\.\\d{6}Z) (?<elb>[^ ]+) (?<client_ip>[\\w\\.:]+):(?<client_port>\\d+) (?<backend_ip>[\\w\\.:-]+)(?::(?<backend_port>\\d+))? (?<request_processing_time>[\\d-]+(?:\\.\\d+)?) (?<backend_processing_time>[\\d-]+(?:\\.\\d+)?) (?<response_processing_time>[\\d-]+(?:\\.\\d+)?) (?<elb_status_code>\\d+|-) (?<backend_status_code>\\d+|-) (?<received_bytes>\\d+) (?<sent_bytes>\\d+) \"(?:\\-|(?<cs_method>\\w+|-) (?<cs_uri_stem>[^ \\?]+)(?:\\?(?<cs_uri_query>[^ ]*))? (?<cs_version>[\\w/\\.]+|-)\\s*)\" (?:\"\"|\"(?<user_agent>(?:(?:\\\\\")?[^\\\"]*)*)\") (?<ssl_cipher>[\\w-]+) (?<ssl_protocol>[\\w\\.-]+)(?<body>.*)

Regex Examples for AWS VPC Flow Logs

^(?:(?:version[^\\n]*log-status)|(?<version>[^\\s]*)[\\s](?<account_id>[^\\s]*)[\\s](?<interface_id>[^\\s]*)[\\s](?<srcaddr>[^\\s]*)[\\s](?<dstaddr>[^\\s]*)[\\s](?<srcport>[^\\s]*)[\\s](?<dstport>[^\\s]*)[\\s](?<protocol>[^\\s]*)[\\s](?<packets>[^\\s]*)[\\s](?<bytes>[^\\s]*)[\\s](?<start>[^\\s]*)[\\s](?<end>[^\\s]*)[\\s](?<action>[^\\s]*)[\\s](?<log_status>[^\\s]*))
^(?<version>[\\d-]+)\\s+(?<account_id>[^\\s]+)\\s+(?<interface_id>[^\\s]+)\\s+(?<srcaddr>[^\\s]+)\\s+(?<dstaddr>[^\\s]+)\\s+(?<srcport>[\\d-]+)\\s+(?<dstport>[\\d-]+)\\s+(?<protocol>[\\d-]+)\\s+(?<packets>[\\d-]+)\\s+(?<bytes>[\\d-]+)\\s+(?<start>[\\d-]+)\\s+(?<end>[\\d-]+)\\s+(?<action>(?:ACCEPT)|(?:REJECT)|(?:-))\\s+(?<log_status>(?:OK)|(?:NODATA)|(?:SKIPDATA)|(?:-))(?<body>.*)
^(?<version>[\\d-]+)\\s(?<vpc_id>[^\\s]+)\\s(?<subnet_id>[^\\s]+)\\s(?<instance_id>[^\\s]+)\\s(?<interface_id>[^\\s]+)\\s(?<account_id>[\\d-]+)\\s(?<type>[^\\s]+)\\s(?<srcaddr>[^\\s]+)\\s(?<dstaddr>[^\\s]+)\\s(?<srcport>[\\d-]+)\\s(?<dstport>[\\d-]+)\\s(?<pkt_srcaddr>[^\\s]+)\\s(?<pkt_dstaddr>[^\\s]*)\\s(?<protocol>[^\\s]+)\\s(?<bytes>[^\\s]+)\\s(?<packets>[^\\s]+)\\s(?<start>[^\\s]+)\\s(?<end>[^\\s]+)\\s(?<action>(?:ACCEPT)|(?:REJECT)|(?:-))\\s(?<tcp_flags>[^\\s]+)\\s(?<log_status>(?:OK)|(?:NODATA)|(?:SKIPDATA)|(?:-))

Regex for Fastly CDN Logs

^(?<priority>[^\\d{3}].+?>)(?<timestamp>\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}Z)\\s(?<cachenode>[^\\s].+?)\\s(?<logger>[^\\\\s].+?)\\s(?<ipaddress>[\\d{1,3}.\\d{1,3}.\\d{1,3}.\\d{1,3}].+?)\\s(?<serviceId>[^\\\\s].+?)\\s(?<placement>[^\\\\s].+?)\\s\\D(?>\\d{1,2}.\\w{1,11}.\\d{4}:\\d{2}:\\d{2}:\\d{2}\\s\\S\\d{4}.+?)\\s(?<request>[^\\\\s].+?)\\s(?<response>[\\d{3}].+?)\\s(?<bytes>[^\\s].*)

Regex for Apache2 Logs

^host:(?<host>.+?),appsvrip:(?<appsvrip>.+?),timestamp:(?<timestamp>.+?),websrv:(?<websrv>.+?),httphost:(?<httphost>.+?),request:(?<request>.+?),response:(?<response>.+?),client_ip_address:(?<client_ip_address>.+?),process_time:(?<process_time>.+?),bytes:(?<bytes>.+?),referrer:(?<referrer>.+?),user_agent:(?<user_agent>.+?),session_id:(?<session_id>.+?)

Regex Examples for Nginx Logs

^host:(?<hostname>.+?),scheme:(?<scheme>.+?),client_ip_address:(?<client_ip_address>.+?),timestamp:(?<timestamp>.+?),websrv:(?<websrv>.+?),request_method:(?<request_method>.+?),request_uri:(?<request_uri>.+?),server_protocol:(?<server_protocol>.+?),response:(?<response>.+?),bytes:(?<bytes>.+?),process_time:(?<process_time>.+?),httphost:(?<httphost>.+?),referrer:(?<referrer>.+?),user_agent:(?<user_agent>.+?),xff:(?<xff>.+?),upstream_response_time:(?<upstream_response_time>.+?),upstream_cache_status:(?<upstream_cache_status>.+?),cookies:(?<cookies>.+?),cookie_si:(?<cookie_si>.+?)
^(?<c_ip>[\\w\\.\\:\\-]+)\\s\\-\\s(?<cs_username>\\S+)\\s+\\[(?<timestamp>[^\\]]+)\\]\\s\\"(?(?=[A-Z]+\\s)(?<cs_method>\\w+)\\s(?<cs_uri_stem>[^\\s\\?]+)(?:\\?(?<cs_uri_query>.*))?\\s(?<cs_version>[\\w\\/\\.]+)"|(?:[^"]+"|"))\\s(?<sc_status>\\d+)\\s(?<sc_bytes>[\\d\\-]+)\\s"(?<cs_referer>[^"]+|)"\\s"(?<cs_user_agent>[^"]+|)"\\s"(?<cs_xfwdfor>[^\\"]+)"\\s"(?<host>[^\\"]+)"\\s(?<scheme>\\w+)\\s(?<method>[\\w\\-]+)?