Regex Support
Some sample regular expressions to parse and preview the contents of common types of source files.
When defining object groups, after you select the files to index, ChaosSearch analyzes the file to identify its type, compression (if any), and the fields within the file for indexing. For LOG format files, ChaosSearch uses regular expressions to detect the fields to be indexed. ChaosSearch has default patterns for many common types of log files. If you have new or custom log files, you can specify the regex string to use for indexing the fields.
Within the create object group workflow, you can use the Validate link next to Formatted Preview to confirm that the regex (either a default, or your own) can parse the contents of the log file.
If the regex value is not complete, Validate displays a message (Regex is incomplete; Please use the regex editor to make sure the whole line is matched.
) to inform you that the regex needs to be edited.
The following regex samples have been identified as helpful patterns for creating object groups to index collections of log files from the following vendors, services, and formats.
Regex Supported in the UI is Java Regex
For ChaosSearch to parse values correctly, your regex must escape any backslash (
\
) characters using the double-backslash\\
convention. The examples in this topic use this convention.
AWS Logs
Regex Examples for AWS CloudFront Logs
^[^#\\n](?<date>(?:(?!\\t).)*)\\t(?<time>(?:(?!\\t).)*)\\t(?<edge_location>(?:(?!\\t).)*)\\t(?<sc_bytes>(?:(?!\\t).)*)\\t(?<c_ip>(?:(?!\\t).)*)\\t(?<cs_method>(?:(?!\\t).)*)\\t(?<cs_host>(?:(?!\\t).)*)\\t(?<cs_uri_stem>(?:(?!\\t).)*)\\t(?<sc_status>(?:(?!\\t).)*)\\t(?<referer>(?:(?!\\t).)*)\\t(?<user_agent>(?:(?!\\t).)*)\\t(?<uri_query>(?:(?!\\t).)*)\\t(?<cookie>(?:(?!\\t).)*)\\t(?<edge_result_type>(?:(?!\\t).)*)\\t(?<edge_request_id>(?:(?!\\t).)*)\\t(?<host_header>(?:(?!\\t).)*)\\t(?<protocol>(?:(?!\\t).)*)\\t(?<bytes>(?:(?!\\t).)*)\\t(?<time_taken>(?:(?!\\t).)*)\\t(?<forwarded_for>(?:(?!\\t).)*)\\t(?<ssl_protocol>(?:(?!\\t).)*)\\t(?<ssl_cipher>(?:(?!\\t).)*)\\t(?<edge_response_result_type>(?:(?!\\t).)*)\\t(?<protocol_version>(?:(?!\\t).)*)\\t(?<fle_status>(?:(?!\\t).)*)\\t(?<fle_encrypted_fields>(?:(?!\\t).)*)
^(?:(?:#[^\n]+)|(?:(?<date>[^\\t]+)\\t(?<time>[^\\t]+)\\t(?<edge_location>[^\\t]+)\\t(?<sc_bytes>[^\\t]+)\\t(?<c_ip>[^\\t]+)\\t(?<cs_method>[^\\t]+)\\t(?<cs_host>[^\\t]+)\\t(?<cs_uri_stem>[^\\t]+)\\t(?<cs_status>[^\\t]+)\\t(?<referer>[^\\t]+)\\t(?<user_agent>[^\\t]+)\\t(?<uri_query>[^\\t]+)\\t(?<cookie>[^\\t]+)\\t(?<edge_result_type>[^\\t]+)\\t(?<edge_result_id>[^\\t]+)\\t(?<host_header>[^\\t]+)\\t(?<protocol>[^\\t]+)\\t(?<bytes>[^\\t]+)\\t(?<time_taken>[^\\t]+)\\t(?<forwarded_for>[^\\t]+)\\t(?<ssl_protocol>[^\\t]+)\\t(?<ssl_cipher>[^\\t]+)\\t(?<edge_response_result_type>[^\\t]+)\\t(?<protocol_version>[^\\t]+)\\t(?<fle_status>[^\\t]+)\\t(?<fle_encrypted_fields>[^\n]+)))
Regex Example for AWS CloudTrail Logs
CloudTrail uses the following file name format for the log file objects that it delivers to an Amazon S3 bucket. The format of the file is json.gz.
AccountID_CloudTrail_RegionName_YYYYMMDDTHHmmZ_UniqueString.FileNameFormat
.*\/CloudTrail\/.*
Regex for AWS S3 Bucket Logs
^(?<bucketOwner>[0-9a-zA-Z]+) (?<bucket>[^ ]+) \\D(?<timestamp>[^ ]+) (?>\\D\\d{1,}.+?) (?<remoteIP>[^ ]+) (?<requester>[^ ]+) (?<requestID>[^ ]+) (?<operation>[^ ]+) (?<key>[^ ]+) (?<requestUri>[^ ].+?\") (?<httpStatus>[^ ]+) (?<errorCode>[^ ]+) (?<bytesSent>[^ ]+) (?<totalTime>[^ ]+) (?<objectSize>[^ ]+) (?<turnAroundTime>[^ ]+) (?<referrer>[^ ]+) (?<userAgent>[^ ].+?\") (?<versionId>[^ ]+) (?<hostId>[^ ]+) (?<signatureVersion>[^ ]+) (?<cipherSuite>[^ ]+) (?<authenticationType>[^ ]+) (?<hostHeader>[^ ]+) (?<tlsVersion>[^ ]+)
Regex for AWS Route 53 DNS Logs
^(?<version>[^ ]*) (?<timestamp>[^ ]*) (?<hosted_zone_id>[^ ]*) (?<query_name>[^ ]*) (?<query_type>[^ ]*) (?<response_code>[^ ]*) (?<protocol>[^ ]*) (?<edge_location>[^ ]*) (?<resolver_ip_address>[^ ]*) (?<client_subnet>[^ ]*)
Regex for AWS ELB Logs
^(?<type>[^ ]+) (?<timestamp>\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}\\.\\d{6}Z) (?<elb>[^ ]+) (?<client_ip>[\\w\\.:]+):(?<client_port>\\d+) (?<backend_ip>[\\w\\.:-]+)(?::(?<backend_port>\\d+))? (?<request_processing_time>[\\d-]+(?:\\.\\d+)?) (?<backend_processing_time>[\\d-]+(?:\\.\\d+)?) (?<response_processing_time>[\\d-]+(?:\\.\\d+)?) (?<elb_status_code>\\d+|-) (?<backend_status_code>\\d+|-) (?<received_bytes>\\d+) (?<sent_bytes>\\d+) \"(?:\\-|(?<cs_method>\\w+|-) (?<cs_uri_stem>[^ \\?]+)(?:\\?(?<cs_uri_query>[^ ]*))? (?<cs_version>[\\w/\\.]+|-)\\s*)\" (?:\"\"|\"(?<user_agent>(?:(?:\\\\\")?[^\\\"]*)*)\") (?<ssl_cipher>[\\w-]+) (?<ssl_protocol>[\\w\\.-]+)(?<body>.*)
Regex Examples for AWS VPC Flow Logs
^(?:(?:version[^\\n]*log-status)|(?<version>[^\\s]*)[\\s](?<account_id>[^\\s]*)[\\s](?<interface_id>[^\\s]*)[\\s](?<srcaddr>[^\\s]*)[\\s](?<dstaddr>[^\\s]*)[\\s](?<srcport>[^\\s]*)[\\s](?<dstport>[^\\s]*)[\\s](?<protocol>[^\\s]*)[\\s](?<packets>[^\\s]*)[\\s](?<bytes>[^\\s]*)[\\s](?<start>[^\\s]*)[\\s](?<end>[^\\s]*)[\\s](?<action>[^\\s]*)[\\s](?<log_status>[^\\s]*))
^(?<version>[\\d-]+)\\s+(?<account_id>[^\\s]+)\\s+(?<interface_id>[^\\s]+)\\s+(?<srcaddr>[^\\s]+)\\s+(?<dstaddr>[^\\s]+)\\s+(?<srcport>[\\d-]+)\\s+(?<dstport>[\\d-]+)\\s+(?<protocol>[\\d-]+)\\s+(?<packets>[\\d-]+)\\s+(?<bytes>[\\d-]+)\\s+(?<start>[\\d-]+)\\s+(?<end>[\\d-]+)\\s+(?<action>(?:ACCEPT)|(?:REJECT)|(?:-))\\s+(?<log_status>(?:OK)|(?:NODATA)|(?:SKIPDATA)|(?:-))(?<body>.*)
^(?<version>[\\d-]+)\\s(?<vpc_id>[^\\s]+)\\s(?<subnet_id>[^\\s]+)\\s(?<instance_id>[^\\s]+)\\s(?<interface_id>[^\\s]+)\\s(?<account_id>[\\d-]+)\\s(?<type>[^\\s]+)\\s(?<srcaddr>[^\\s]+)\\s(?<dstaddr>[^\\s]+)\\s(?<srcport>[\\d-]+)\\s(?<dstport>[\\d-]+)\\s(?<pkt_srcaddr>[^\\s]+)\\s(?<pkt_dstaddr>[^\\s]*)\\s(?<protocol>[^\\s]+)\\s(?<bytes>[^\\s]+)\\s(?<packets>[^\\s]+)\\s(?<start>[^\\s]+)\\s(?<end>[^\\s]+)\\s(?<action>(?:ACCEPT)|(?:REJECT)|(?:-))\\s(?<tcp_flags>[^\\s]+)\\s(?<log_status>(?:OK)|(?:NODATA)|(?:SKIPDATA)|(?:-))
Regex for Fastly CDN Logs
^(?<priority>[^\\d{3}].+?>)(?<timestamp>\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}Z)\\s(?<cachenode>[^\\s].+?)\\s(?<logger>[^\\\\s].+?)\\s(?<ipaddress>[\\d{1,3}.\\d{1,3}.\\d{1,3}.\\d{1,3}].+?)\\s(?<serviceId>[^\\\\s].+?)\\s(?<placement>[^\\\\s].+?)\\s\\D(?>\\d{1,2}.\\w{1,11}.\\d{4}:\\d{2}:\\d{2}:\\d{2}\\s\\S\\d{4}.+?)\\s(?<request>[^\\\\s].+?)\\s(?<response>[\\d{3}].+?)\\s(?<bytes>[^\\s].*)
Regex for Apache2 Logs
^host:(?<host>.+?),appsvrip:(?<appsvrip>.+?),timestamp:(?<timestamp>.+?),websrv:(?<websrv>.+?),httphost:(?<httphost>.+?),request:(?<request>.+?),response:(?<response>.+?),client_ip_address:(?<client_ip_address>.+?),process_time:(?<process_time>.+?),bytes:(?<bytes>.+?),referrer:(?<referrer>.+?),user_agent:(?<user_agent>.+?),session_id:(?<session_id>.+?)
Regex Examples for Nginx Logs
^host:(?<hostname>.+?),scheme:(?<scheme>.+?),client_ip_address:(?<client_ip_address>.+?),timestamp:(?<timestamp>.+?),websrv:(?<websrv>.+?),request_method:(?<request_method>.+?),request_uri:(?<request_uri>.+?),server_protocol:(?<server_protocol>.+?),response:(?<response>.+?),bytes:(?<bytes>.+?),process_time:(?<process_time>.+?),httphost:(?<httphost>.+?),referrer:(?<referrer>.+?),user_agent:(?<user_agent>.+?),xff:(?<xff>.+?),upstream_response_time:(?<upstream_response_time>.+?),upstream_cache_status:(?<upstream_cache_status>.+?),cookies:(?<cookies>.+?),cookie_si:(?<cookie_si>.+?)
^(?<c_ip>[\\w\\.\\:\\-]+)\\s\\-\\s(?<cs_username>\\S+)\\s+\\[(?<timestamp>[^\\]]+)\\]\\s\\"(?(?=[A-Z]+\\s)(?<cs_method>\\w+)\\s(?<cs_uri_stem>[^\\s\\?]+)(?:\\?(?<cs_uri_query>.*))?\\s(?<cs_version>[\\w\\/\\.]+)"|(?:[^"]+"|"))\\s(?<sc_status>\\d+)\\s(?<sc_bytes>[\\d\\-]+)\\s"(?<cs_referer>[^"]+|)"\\s"(?<cs_user_agent>[^"]+|)"\\s"(?<cs_xfwdfor>[^\\"]+)"\\s"(?<host>[^\\"]+)"\\s(?<scheme>\\w+)\\s(?<method>[\\w\\-]+)?
Updated about 2 months ago