import json
import string
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import random
import string

data = json.load(open('/content/sample_data/Intent.json'))
data = data["intents"]

df = pd.DataFrame(data)
df.head()

df_patterns = df[['text', 'intent']]
df_responses = df[['responses', 'intent']]

df_patterns.head()

df_responses.head()

df_patterns = df_patterns.explode('text')
df_responses = df_responses.explode('responses')

df_patterns.head()

df_patterns['text'] = df_patterns['text'].apply(lambda x: ''.join(ch for ch in x if ch not in string.punctuation).lower())
df_responses['responses'] = df_responses['responses'].apply(lambda x: ''.join(ch for ch in x if ch not in string.punctuation).lower())
df_responses.head()

from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

tokenizer = Tokenizer(num_words = 2000)
tokenizer.fit_on_texts(df_patterns['text'])
sequences = tokenizer.texts_to_sequences(df_patterns['text'])
x_train = pad_sequences(sequences)


from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y_train = le.fit_transform(df_patterns["intent"])

input_shape = x_train.shape[1]
print(input_shape)

#Define vocab
num_vocabulary = len(tokenizer.word_index)
print("number of unique words:", num_vocabulary)
output_length = le.classes_.shape[0]
print("output length:", output_length)

from tensorflow.keras.layers import Input, Embedding, LSTM, Flatten, Dense
from tensorflow.keras.models  import Model


i = Input(shape =(input_shape))
x = Embedding(num_vocabulary+1, 10)(i)
x = LSTM(10, return_sequences = True)(x)
x = Flatten()(x)
x = Dense(output_length, activation = "softmax")(x)
model = Model(i, x)

model.compile(loss = "sparse_categorical_crossentropy", optimizer = "adam", metrics=["accuracy"])

train = model.fit(x_train, y_train, epochs = 200)

plt.plot(train.history["accuracy"], label = "training set accurarcy")
plt.plot(train.history["loss"], label = "training set loss")
plt.legend()


while True:
    text_p = []
    prediction_input = input("You: ")

    prediction_input = [letters.lower() for letters in prediction_input if letters not in string.punctuation]
    prediction_input = ''.join(prediction_input)
    text_p.append(prediction_input)

    prediction_input = tokenizer.texts_to_sequences(text_p)
    prediction_input = np.array(prediction_input).reshape(-1)
    prediction_input = pad_sequences([prediction_input], maxlen=input_shape)


    output = model.predict(prediction_input)
    output = output.argmax()

    response_tag = le.inverse_transform([output])[0]
    print("AI:", random.choice(df_responses[df_responses["intent"] == response_tag]["responses"].values))

    if response_tag == "GoodBye":
        break