import json import string import pandas as pd import numpy as np import matplotlib.pyplot as plt import tensorflow as tf import random import string data = json.load(open('/content/sample_data/Intent.json')) data = data["intents"] df = pd.DataFrame(data) df.head() df_patterns = df[['text', 'intent']] df_responses = df[['responses', 'intent']] df_patterns.head() df_responses.head() df_patterns = df_patterns.explode('text') df_responses = df_responses.explode('responses') df_patterns.head() df_patterns['text'] = df_patterns['text'].apply(lambda x: ''.join(ch for ch in x if ch not in string.punctuation).lower()) df_responses['responses'] = df_responses['responses'].apply(lambda x: ''.join(ch for ch in x if ch not in string.punctuation).lower()) df_responses.head() from tensorflow.keras.preprocessing.text import Tokenizer from tensorflow.keras.preprocessing.sequence import pad_sequences tokenizer = Tokenizer(num_words = 2000) tokenizer.fit_on_texts(df_patterns['text']) sequences = tokenizer.texts_to_sequences(df_patterns['text']) x_train = pad_sequences(sequences) from sklearn.preprocessing import LabelEncoder le = LabelEncoder() y_train = le.fit_transform(df_patterns["intent"]) input_shape = x_train.shape[1] print(input_shape) #Define vocab num_vocabulary = len(tokenizer.word_index) print("number of unique words:", num_vocabulary) output_length = le.classes_.shape[0] print("output length:", output_length) from tensorflow.keras.layers import Input, Embedding, LSTM, Flatten, Dense from tensorflow.keras.models import Model i = Input(shape =(input_shape)) x = Embedding(num_vocabulary+1, 10)(i) x = LSTM(10, return_sequences = True)(x) x = Flatten()(x) x = Dense(output_length, activation = "softmax")(x) model = Model(i, x) model.compile(loss = "sparse_categorical_crossentropy", optimizer = "adam", metrics=["accuracy"]) train = model.fit(x_train, y_train, epochs = 200) plt.plot(train.history["accuracy"], label = "training set accurarcy") plt.plot(train.history["loss"], label = "training set loss") plt.legend() while True: text_p = [] prediction_input = input("You: ") prediction_input = [letters.lower() for letters in prediction_input if letters not in string.punctuation] prediction_input = ''.join(prediction_input) text_p.append(prediction_input) prediction_input = tokenizer.texts_to_sequences(text_p) prediction_input = np.array(prediction_input).reshape(-1) prediction_input = pad_sequences([prediction_input], maxlen=input_shape) output = model.predict(prediction_input) output = output.argmax() response_tag = le.inverse_transform([output])[0] print("AI:", random.choice(df_responses[df_responses["intent"] == response_tag]["responses"].values)) if response_tag == "GoodBye": break