ChaosSearch Scroll API - Python
This section shows an example scroll API request
The Scroll API request has been deprecated and is replaced by the Bulk Export feature.
A search request through the ChaosSearch API returns a single page of results. The scroll API can be used to retrieve large numbers of results (or even all results) from a single search request.
Scrolling is not intended for real-time user requests, but rather for processing large amounts of data.
Example Request
Fields to update:
- Input your Access-Key-ID and Secret-Access-Key from Settings. (line 5)
- Update the Index-View-Name with the one you created in the Refinery. (line 32)
- Use the scroll field to specify how often you want the scroll API to move through results (for example, 1 minute = 1m). (line 34)
- Use the size field to specify the number of results to return each time the scroll API makes a request. (line 35)
Important Timeout and Redirect Considerations for Python Clients
In your python client, make sure that you plan for handling timeouts and for the redirects that ChaosSearch returns as heartbeats for long-running operations. When a user submits an
_msearch
request, after two minutes of performing the query, ChaosSearch replies with a redirect. Following that redirect will "reconnect" you to the query (and if two more minutes elapse, the process repeats).The python client must be configured to allow queries to run for longer than the 10 second default, and to follow redirects from ChaosSearch. If you observe read timeout and response timeout errors on the python client side, you can add the
timeout
andresponse_timeout
settings shown below and start with a value of120
as a default.
from opensearchpy import OpenSearch, helpers, exceptions, RequestsHttpConnection
import json
from requests_aws4auth import AWS4Auth
awsauth = AWS4Auth("Access-Key-ID", "Secret-Access-Key", "us-east-1", 's3')
os = OpenSearch(
hosts = [{'host': 'lab.chaossearch.io', 'port': 443, 'url_prefix': '/elastic', 'use_ssl': True}],
http_auth=awsauth,
connection_class=RequestsHttpConnection,
timeout=120,
response_timeout=120,
verify_certs=True
)
client = os
try:
client_info = OpenSearch.info(client)
print ('OpenSearch client info:', json.dumps(client_info, indent=4))
except exceptions.ConnectionError as err:
print ('Opensearch client error:', err)
client = None
if client != None:
search_body = {
"size": 500,
"query": {
"match_all": {}
}
}
resp = helpers.scan(
client,
index = 'Index-View-Name', # <---- SPECIFY ChaosSearch Refinery View HERE
query = search_body,
scroll = '1m',
size = 10,
)
for num, doc in enumerate(resp):
print ('\n', num, '', doc)
The following sample Python script follows redirects using the requests
library:
import json
import requests
import boto3
from lowercase_booleans import true, false
from requests_aws4auth import AWS4Auth
credentials = boto3.Session(profile_name='default').get_credentials()
region = 'us-east-1'
host = "myuser.chaossearch.io"
view = "my-view"
es_url = "https://" + host + "/elastic/" + view + "/_search"
es_headers = {"Accept": "application/json", "Content-Type": "application/json"}
es_payload = {
"version": true,
"size": 500,
"sort": [
{
"@timestamp": {
"order": "asc",
"unmapped_type": "boolean"
}
}
],
"aggs": {
"2": {
"date_histogram": {
"field": "@timestamp",
"fixed_interval": "10m",
"time_zone": "America/Los_Angeles",
"min_doc_count": 1
}
}
},
"stored_fields": [
"*"
],
"script_fields": {},
"docvalue_fields": [
{
"field": "@timestamp",
"format": "date_time"
},
{
"field": "mystart_time",
"format": "date_time"
}
],
"_source": {
"excludes": []
},
"query": {
"bool": {
"must": [],
"filter": [
{
"match_all": {}
},
{
"range": {
"@timestamp": {
"gte": "2022-08-11T09:26:43.634Z",
"lte": "2022-08-11T11:26:43.634Z",
"format": "strict_date_optional_time"
}
}
}
],
"should": [],
"must_not": []
}
},
"highlight": {
"pre_tags": [
"@kibana-highlighted-field@"
],
"post_tags": [
"@/kibana-highlighted-field@"
],
"fields": {
"*": {}
},
"fragment_size": 2147483647
}
}
ā
awsauth = AWS4Auth(credentials.access_key, credentials.secret_key, region, 's3', session_token=credentials.token)
response = requests.post(es_url, json=es_payload, auth=awsauth, headers=es_headers, verify=false, allow_redirects=true)
es_response_json = json.loads(response.text)
print(es_response_json)
if response.status_code != 200:
print("Request failed")
print(response.status_code)
raise Exception("non-200 es response code")
else:
print("Successful request")
if "response" in es_response_json:
hits = es_response_json["responses"][0]["hits"]["hits"]
for hit in hits:
print(hit)
Updated about 1 month ago