Python源码示例:elasticsearch.Elasticsearch()
示例1
def run(self):
with self.input()['Emotion'].open('r') as fopen:
emotions = json.load(fopen)
es = Elasticsearch()
for i in range(0, len(emotions), self.batch_size):
batch = emotions[i : min(i + self.batch_size, len(emotions))]
actions = [
{
'_index': self.index,
'_type': 'text',
'_id': '%d-%s' % (i + j, self.summary),
'_source': batch[j],
}
for j in range(len(batch))
]
helpers.bulk(es, actions)
示例2
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument('--es', type=str,
help='Root URL to Elasticsearch, ie http://localhost:9200 (defaults to envvar ELYZER_ES_URL or localhost:9200)',
action=EnvDefault,
required=True,
envvar='ELYZER_ES_URL',
default='http://localhost:9200')
parser.add_argument('--index', type=str, action=EnvDefault,
required=True, envvar='ELYZER_INDEX',
help='Name of the index to find the analyzer, ie tweets (defaults to envvar ELYZER_INDEX)')
parser.add_argument('--analyzer', type=str, action=EnvDefault, required=True,
envvar='ELYZER_ANALYZER',
help='Name of the custom analyzer, ie my_text_analyzer (defaults to envvar ELYZER_ANALYZER)')
parser.add_argument('text', type=str,
help='Text to analyze, ie "mary had a little lamb"')
return vars(parser.parse_args())
示例3
def main():
try:
args = parse_args()
es = Elasticsearch(args['es'])
stepWise(es=es,
text=args['text'],
indexName=args['index'],
analyzer=getAnalyzer(indexName=args['index'],
analyzerName=args['analyzer'],
es=es))
except KeyboardInterrupt:
print('Interrupted')
except AnalyzerNotFound as e:
print(e.error)
except TransportError as e:
print("Unexpected Elasticsearch Transport Exception:")
print(e.error)
print(e.info)
示例4
def _add_prefix(self, *args, **kwargs):
if args:
index = args[0].strip()
else:
index = kwargs.get("index", "").strip()
if index is None or index == "":
raise NotImplementedError("Elasticsearch index not specified.")
prefix = "%s_" % self.prefix.strip() if self.prefix and self.prefix.strip() != "" else ""
ret = []
for idx in index.split(","):
ret.append("%s%s" % (prefix, idx))
index = ",".join(ret)
if args:
return index
else:
return dict(kwargs, index=index)
示例5
def __init__(self, client, params=None, **kwargs):
'''
API for performing easy bulk operations in Elasticsearch.
:arg client: instance of official Elasticsearch Python client.
:arg index: Default index for items which don't provide one
:arg doc_type: Default document type for items which don't provide one
:arg consistency: Explicit write consistency setting for the operation
:arg refresh: Refresh the index after performing the operation
:arg routing: Specific routing value
:arg replication: Explicitly set the replication type (default: sync)
:arg timeout: Explicit operation timeout
.. Note:: all the arguments passed at the time create a new bulk
operation can be overridden when
:meth:`BulkOperation.execute`: is called.
'''
self._client = client
self._params = params
self._actions = []
示例6
def configure(parser: ArgumentParser) -> Callable:
parser.add_argument(
'-b', '--brokers', dest='brokers', required=True, type=str,
help='Kafka brokers to bootstrap from as a comma separated list of <host>:<port>')
parser.add_argument(
'-c', '--es-clusters', dest='es_clusters', required=True, type=str,
help='Elasticsearch servers to bootstrap from as a comma separated list of <host>:<port>')
parser.add_argument(
'-t', '--topic', dest='topics', required=True, type=str, nargs='+',
help='Kafka topic(s) to read indexing requests from. Multiple topics may be provided.')
parser.add_argument(
'-g', '--group-id', dest='group_id', type=str, default='TODO',
help='Kafka consumer group to join')
parser.add_argument(
'--prometheus-port', dest='prometheus_port', default=9170, type=int, required=False,
help='Port to export prometheus metrics over.')
return main
示例7
def indices_map(clusters: List[Elasticsearch]) -> Mapping[str, Elasticsearch]:
"""Map from addressable index name to elasticsearch client that contains it
Index names that exist on multiple clusters are treated as existing on
no clusters. Essentially this only tracks indices that are unique to
the cluster it lives on.
"""
indices = cast(Dict[str, Optional[Elasticsearch]], dict())
for elastic in clusters:
for index_name, data in elastic.indices.get_alias().items():
for name in [index_name] + list(data['aliases'].keys()):
if name not in indices:
indices[name] = elastic
# If an index name exists on multiple clusters we
# pretend it doesn't exist on any of them.
elif indices[name] != elastic:
indices[name] = None
return {k: v for k, v in indices.items() if v is not None}
示例8
def __init__(self, client: Elasticsearch):
super().__init__(client)
self.cache = CacheClient(client)
self.store = FeatureStoreClient(client)
self.feature = FeatureClient(client)
self.feature_set = FeatureSetClient(client)
self.model = ModelClient(client)
# Domain objects stored in the plugin. These offer a very simple interface for
# constructing requests and interpreting results of objects stored in the ltr
# plugin.
#
# Note that when encoding these objects to send to the plugin they are almost always
# wrapped in a single-value dict containing the type. So for example to add a feature
# to a feature store:
#
# feature = StoredFeature('test', ['keywords'], 'mustache', {"match":{"title":"{{keywords}}"}})
# response = ltr_client.feature.create(feature.name, {'feature': feature.to_dict()})
示例9
def __init__(self, config=None):
if isinstance(config, dict) or isinstance(config, OrderedDict):
self.config = config
elif isinstance(config, str):
try:
self.config = json.load(open(config, "r"))
except:
self.config = {}
self.username = self.config.get("username", "data_security_es_45")
self.password = self.config.get("password", "Nb6121ca7ffe3")
es_url = self.config.get("es_url", ['http://zsearch.alipay.com:9999'])
if isinstance(es_url, list):
self.es_url = es_url
else:
self.es_url = [es_url]
self.es = Elasticsearch(self.es_url, http_auth=(self.username, self.password))
示例10
def insert_record_to_ssdeep_index(ssdeep_value, sha256):
"""
Adds a record to the ssdeep index in elasticsearch
:param ssdeep_value: The ssdeep hash value of the item
:param sha256: The sha256 hash value of the item
"""
chunksize, chunk, double_chunk = ssdeep_value.split(':')
chunksize = int(chunksize)
es = elasticsearch.Elasticsearch(['localhost:9200'])
document = {'chunksize': chunksize, 'chunk': chunk, 'double_chunk': double_chunk, 'ssdeep': ssdeep_value,
'sha256': sha256}
es.index('ssdeep-index', 'record', document)
es.indices.refresh('ssdeep-index')
示例11
def __init__(
self,
host="localhost",
port=443,
path="",
scheme="https",
user=None,
password=None,
context=None,
**kwargs,
):
super().__init__(
host=host,
port=port,
path=path,
scheme=scheme,
user=user,
password=password,
context=context,
**kwargs,
)
if user and password:
self.es = Elasticsearch(self.url, http_auth=(user, password), **self.kwargs)
else:
self.es = Elasticsearch(self.url, **self.kwargs)
示例12
def __init__(
self,
host="localhost",
port=9200,
path="",
scheme="http",
user=None,
password=None,
context=None,
**kwargs,
):
super().__init__(
host=host,
port=port,
path=path,
scheme=scheme,
user=user,
password=password,
context=context,
**kwargs,
)
if user and password:
self.es = Elasticsearch(self.url, http_auth=(user, password), **self.kwargs)
else:
self.es = Elasticsearch(self.url, **self.kwargs)
示例13
def pull_to_elastic(**kwargs):
ti = kwargs['ti']
sentiments = ti.xcom_pull(task_ids = 'push_sentiment', key = 'sentiment')
es = Elasticsearch()
for i in range(0, len(sentiments), batch_size):
batch = sentiments[i : min(i + batch_size, len(sentiments))]
actions = [
{
'_index': 'test_index',
'_type': 'text',
'_id': '%d-text' % (j + i),
'_source': batch[j],
}
for j in range(len(batch))
]
helpers.bulk(es, actions)
示例14
def create_elasticsearch_connection(data_storage=None):
db_conn = Elasticsearch([{'host': data_storage.get("connection_uri"), 'port': 9200}])
return db_conn
示例15
def get_es_connection():
""" Try to connect to es """
hosts = build_es_connection_hosts()
try:
return Elasticsearch(hosts)
except Exception as e:
logger.warn('Could not contact ElasticSearch with provided configuration.')
logger.warn(e)
sys.exit(1)
示例16
def __init__(self, hosts, index='pyspider'):
self.index = index
self.es = Elasticsearch(hosts=hosts)
self.es.indices.create(index=self.index, ignore=400)
if not self.es.indices.get_mapping(index=self.index, doc_type=self.__type__):
self.es.indices.put_mapping(index=self.index, doc_type=self.__type__, body={
"_all": {"enabled": True},
"properties": {
"taskid": {"enabled": False},
"project": {"type": "string", "index": "not_analyzed"},
"url": {"enabled": False},
}
})
示例17
def __init__(self, hosts, index='pyspider'):
self.index = index
self.es = Elasticsearch(hosts=hosts)
self.es.indices.create(index=self.index, ignore=400)
if not self.es.indices.get_mapping(index=self.index, doc_type=self.__type__):
self.es.indices.put_mapping(index=self.index, doc_type=self.__type__, body={
"_all": {"enabled": False},
"properties": {
"updatetime": {"type": "double"}
}
})
示例18
def __init__(self, hosts, index='pyspider'):
self.index = index
self._changed = False
self.es = Elasticsearch(hosts=hosts)
self.es.indices.create(index=self.index, ignore=400)
if not self.es.indices.get_mapping(index=self.index, doc_type=self.__type__):
self.es.indices.put_mapping(index=self.index, doc_type=self.__type__, body={
"_all": {"enabled": False},
"properties": {
"project": {"type": "string", "index": "not_analyzed"},
"status": {"type": "byte"},
}
})
示例19
def get_elasticsearch(self):
"""
Get a connection to the Elasticsearch cluster. Currently on supports a
single host.
:returns: ``elasticsearch.Elasticsearch``
"""
return Elasticsearch(hosts=self.config['es_host'], timeout=30)
示例20
def get_elasticsearch_helper(self):
"""
Get helpers module for Elasticsearch. Used to bulk index documents.
:returns: package ``elasticsearch.helpers``
"""
return helpers
示例21
def get_es_client(timeout=60):
return elasticsearch.Elasticsearch(hosts=[{"host": ELASTICSEARCH_SERVICE_HOSTNAME, "port": ELASTICSEARCH_SERVICE_PORT}], timeout=timeout)
示例22
def __init__(self, **kwargs):
#
serializer = JSONSerializer()
serializer.mimetype = "application/json"
serializer.dumps = serializer.serialize
serializer.loads = JSONDeserializer().deserialize
self.prefix = kwargs.pop("prefix", "").lower()
self.es = Elasticsearch(serializer=serializer, **kwargs)
self.logger = logging.getLogger(__name__)
示例23
def create_mapping(self, index, fieldname="", fieldtype="string", fieldindex=None, body=None):
"""
Creates an Elasticsearch body for a single field given an index name and type name
"""
index = self._add_prefix(index)
self.es.indices.create(index=index, ignore=400)
self.es.indices.put_mapping(index=index, doc_type="_doc", body=body, include_type_name=True)
# print(f"adding mapping to {index} : {body}")
示例24
def index_data(self, index=None, body=None, idfield=None, id=None, **kwargs):
"""
Indexes a document or list of documents into Elasticsearch
If "id" is supplied then will use that as the id of the document
If "idfield" is supplied then will try to find that property in the
document itself and use the value found for the id of the document
"""
index = self._add_prefix(index)
if not isinstance(body, list):
body = [body]
for document in body:
if idfield is not None:
if isinstance(document, dict):
id = document[idfield]
else:
id = getattr(document, idfield)
try:
self.es.index(index=index, doc_type="_doc", body=document, id=id)
except Exception as detail:
self.logger.warning(
"%s: WARNING: failed to index document: %s \nException detail: %s\n" % (datetime.now(), document, detail)
)
raise detail
示例25
def get_primary_key_fields(schema_filename, csv_filename, ignore_cql_schema):
if schema_filename is None and csv_filename is None:
print("")
raise ValueError("Both schema file (.cql) and data file (.csv) are missing - Exit script")
# If we didn't get a schema file but did get a csv file, or ignoring cql schema (default),
# then select the first column from the csv as the key to be used as the ES id field
if schema_filename is None or ignore_cql_schema is True:
print("")
print("## No schema provided / Ignoring schema -> using column1 from csv as 'id' for Elasticsearch index")
with open(csv_filename, "r") as f:
reader = csv.reader(f)
headers_row = next(reader)
return [headers_row[0]]
else:
with open(schema_filename, "r") as f:
schema_file = f.read()
# Check for compound PK i.e. PRIMARY KEY ((col1,col2),col3)
print("")
print("## Check schema ({0}) for compound primary key to be used as index id".format(schema_filename))
m = re.search(r"PRIMARY KEY \(\((.+?)\)", schema_file, re.I)
if m:
keys = m.group(1).split(",")
return [k.strip() for k in keys]
# We didn't find a compound PK, try checking for a regular PK i.e. PRIMARY KEY (col1,col2,col3)
print("")
print("## Did not find a compound primary key, checking for regular primary key to be used as index id")
m = re.search(r"PRIMARY KEY \((.+)\)", schema_file, re.I)
if m:
keys = m.group(1).split(",")
return [keys[0]]
return []
示例26
def __init__(self):
es_host = {'host': self.__hostname__, 'port': self.__port__}
self.__es_conn__ = Elasticsearch(hosts=[es_host])
self.set_up_index()
示例27
def set_up_index(self):
try:
try:
try:
index_exists = self.__es_conn__.indices.exists(index=__index_name__)
if not index_exists:
self.create_index()
else:
res = self.__es_conn__.indices.get_mapping(index=__index_name__)
try:
current_version = res[__index_name__]['mappings']['_meta']['version']
if current_version < __index_version__:
self.update_index(current_version)
elif current_version is None:
logger.error("Old Index Mapping. Manually reindex the index to persist your data.")
print("\n -- Old Index Mapping. Manually reindex the index to persist your data.--\n")
sys.exit(1)
except KeyError:
logger.error("Old Index Mapping. Manually reindex the index to persist your data.")
print("\n -- Old Index Mapping. Manually reindex the index to persist your data.--\n")
sys.exit(1)
except ESConnectionError as e:
logger.error("Elasticsearch is not installed or its service is not running. {0}".format(e))
print("\n -- Elasticsearch is not installed or its service is not running.--\n", e)
sys.exit(1)
except NewConnectionError:
pass
except ConnectionRefusedError:
pass
示例28
def __init__(self, type, params, body=None):
if type not in BulkOperation.BULK_ACTIONS:
raise Exception('%s action type is not a valid Elasticsearch bulk '
'action type.' % type)
if BulkOperation.BULK_ACTIONS.get(type) and body is None:
raise Exception('%s action type expects a body as well to be a '
'valid bulk operation.' % type)
self.type = type
self.params = params
self.body = body
示例29
def execute(self, params=None, **kwargs):
'''
Executes all recorded actions using Elasticsearch's Bulk Query.
.. Note:: The arguments passed at the time of creating a bulk client
will be overridden with the arguments passed to this method.
:arg index: Default index for items which don't provide one
:arg doc_type: Default document type for items which don't provide one
:arg consistency: Explicit write consistency setting for the operation
:arg refresh: Refresh the index after performing the operation
:arg routing: Specific routing value
:arg replication: Explicitly set the replication type (default: sync)
:arg timeout: Explicit operation timeout
'''
# TO DO: check if percolate, timeout and replication parameters are
# allowed for bulk index operation
bulk_body = ''
for action in self._actions:
bulk_body += action.es_op + '\n'
bulk_kwargs = {}
bulk_kwargs.update(self._params)
bulk_kwargs.update(params)
resp = self._client.bulk(body=bulk_body, **bulk_kwargs)
self._actions = []
return resp
示例30
def __init__(self, username=None, password=None, **kwargs):
if username is not None and password is not None:
kwargs['http_auth'] = username, password
self._es = Elasticsearch(timeout=300, maxsize=4096, **kwargs)