!wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-7.0.0-linux-x86_64.tar.gz
!tar -xzf elasticsearch-7.0.0-linux-x86_64.tar.gz
!chown -R daemon:daemon elasticsearch-7.0.0
!pip install elasticsearch -q
from subprocess import Popen, PIPE, STDOUT
from elasticsearch import Elasticsearch
from elasticsearch import helpers
import os
import time
import pprint
es_server = Popen(
['elasticsearch-7.0.0/bin/elasticsearch'],
stdout = PIPE, stderr = STDOUT,
preexec_fn = lambda: os.setuid(1))
!curl -X GET "localhost:9200/"
def gen_data():
with open('/content/gdrive/My Drive/finch/es/free_chat/data/basic.txt') as f:
for line in f:
line = line.rstrip()
q, a = line.split('<SEP>')
yield {
'_index': 'chatbot',
'question': q,
'answer': a,}
from google.colab import drive
drive.mount('/content/gdrive')
es = Elasticsearch()
print(es.ping())
es.indices.create(index='chatbot')
mapping = {
'properties': {
'question': {
'type': 'text',}}}
es.indices.put_mapping(body=mapping, index='chatbot')
helpers.bulk(es, gen_data())
correct, total = 0., 0.
while True:
text_inp = input('Input:')
if text_inp == '|quit':
break
t0 = time.time()
dsl = {
'query': {
'match': {
'question': text_inp,}}}
hits = es.search(index='chatbot', body=dsl)['hits']['hits']
print('Match:', hits[0]['_source'])
print('%.2f sec' % (time.time() - t0))
text_inp = input('Is the answer correct?')
if text_inp == 'yes':
correct += 1.
total += 1
print()
print('Correct: {} | Total: {} | Accuracy: {:.3f}'.format(correct, total, correct/total))