Ad Code

Data Science labs blog

Use Google Colab, Google Data Studio and Google Sheets to Build predictive Business Intelligence

Use Google Colab, Google Data Studio and Google Sheets to Build predictive Business Intelligence

Image for post
Image by Rostyslav Neskorozhenyi

Load Data

!pip install kaggle
from google.colab import drive
drive.mount(‘/content/drive/’)
import os
os.chdir(“/content/drive/My Drive/Colab Notebooks”)
import os
os.chdir(“/content/drive/My Drive/Colab Notebooks”)
!kaggle competitions download -c online-sales
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
df = pd.read_csv(“TrainingDataset.csv”)
df = df.replace(np.nan, 0, regex=True)
Image for post
Image by Rostyslav Neskorozhenyi
from sklearn.model_selection import train_test_splitdf_train, df_test = train_test_split(df, test_size=0.2)
print(df_train.shape)
print(df_test.shape)

Exploratory Data Analysis

import randomindexes = random.sample(range(len(df)), 10)
df_plot = pd.DataFrame()
for i in indexes:
df_plot[“product_”+str(i)] = df.iloc[i, 0:12]
df_plot.plot();
Image for post
Image by Rostyslav Neskorozhenyi

Prepare data

y_train_real = df_train.iloc[:, 0:12].values
print(y_train_real.shape)
y_test_real = df_test.iloc[:, 0:12].values
print(y_test_real.shape)
x_train_real = df_train.iloc[:, 12:].values
x_test_real = df_test.iloc[:, 12:].values
print(x_train_real.shape)
print(x_test_real.shape)
from sklearn.preprocessing import MinMaxScaler#scale datasets
x_scaler = MinMaxScaler()
x_scaler = x_scaler.fit(x_train_real)
x_train = x_scaler.transform(x_train_real)
x_test = x_scaler.transform(x_test_real)
y_scaler = MinMaxScaler()
y_scaler = y_scaler.fit(y_train_real)
y_train = y_scaler.transform(y_train_real)
y_test = y_scaler.transform(y_test_real)
x_train_series = []
x_test_series = []
for k in range(len(y_train[0])):
x_train_series.append(np.zeros((x_train.shape[0], k+1, x_train.shape[1]+1)))
for k in range(len(y_test[0])):
x_test_series.append(np.zeros((x_test.shape[0], k+1, x_test.shape[1]+1)))
for k in range(len(y_train[0])):
for i in range(len(x_train)):
for j in range(k + 1):
shifted_index = j - 1
if shifted_index < 0:
x_train_series[k][i, j] = np.append(x_train[i], 0)
else:
x_train_series[k][i, j] = np.append(x_train[i], y_train[i, shifted_index])
for k in range(len(y_test[0])):
for i in range(len(x_test)):
for j in range(k + 1):
shifted_index = j - 1
if shifted_index < 0:
x_test_series[k][i, j] = np.append(x_test[i], 0)
else:
x_test_series[k][i, j] = np.append(x_test[i], y_test[i, shifted_index])
y_train_series = []
y_test_series = []
for k in range(len(y_train[0])):
y_train_series.append(np.zeros((len(y_train), 1)))
y_test_series.append(np.zeros((len(y_test), 1)))


for k in range(len(y_train[0])):
y_train_series[k] = y_train[:, k].reshape(-1, 1)
y_test_series[k] = y_test[:, k].reshape(-1, 1)

Building LSTM Neural Network

from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout
model = Sequential()
model.add(LSTM(256, input_shape=(None, x_train.shape[1]+1)))
model.add(Dropout(0.5))
model.add(Dense(128, activation = "relu"))
model.add(Dense(1))
model.summary()
Image for post
model.compile(loss='mse', optimizer='rmsprop')
for i in range(len(x_train_series)):
print(i)
model.fit(x_train_series[i], y_train_series[i], epochs=10, validation_split=0.2)
for i in range(len(x_test_series)):
accr = model.evaluate(x_test_series[i], y_test_series[i])
print("Predicting outcome after {} months. MSE:".format(i), accr)
Image for post

Outcome prediction function

def predictor(features, history, future):
'''
features: list of product features
history: list with outcome per month
future: int, number of months to predict outcome
'''
if future == 0:
return history

p_serie = np.zeros((1, len(history), len(features)+1))

for j in range(len(history)):
shifted_index = j - 1
if shifted_index < 0:
p_serie[0, j] = np.append(features, 0)
else:
p_serie[0, j] = np.append(features, history[shifted_index])

prediction = model.predict(p_serie)

history.append(prediction[0][0])
future -= 1

return predictor(features, history, future)
import randomn = random.choice(range(len(x_test)-1))
m = 6
future = 6
features = x_test[n].tolist()
history = y_test[n, 0:m].tolist()
prediction = predictor(features, history, future)plt.plot(y_scaler.inverse_transform([prediction])[0])
plt.plot(y_scaler.inverse_transform([y_test[n, :m+future]])[0])
plt.title('Predicted and real outcome')
plt.legend(['predicted', 'real'], loc='upper left')
axes = plt.gca()
plt.show()
Image for post
Image by Rostyslav Neskorozhenyi

Display predictions in Google Data Studio

!pip install — upgrade oauth2client gspread
import gspread
from oauth2client.service_account import ServiceAccountCredentials
scope = ['https://spreadsheets.google.com/feeds',
'https://www.googleapis.com/auth/drive']
credentials = ServiceAccountCredentials.from_json_keyfile_name('spreadkey.json', scope)gc = gspread.authorize(credentials)
real_data = []
predicted_data = []
for i in range(3):
n = random.choice(range(len(x_test)-1))
m = 6
future = 6
features = x_test[n].tolist()
history = y_test[n, 0:m].tolist()
prediction = predictor(features, history, future) predicted_data.append(y_scaler.inverse_transform([prediction])[0])
real_data.append(y_scaler.inverse_transform([y_test[n, :m+future]])[0])
ws = gc.open('TrainingDataset2')ws_predicted = ws.worksheet("predicted")
ws_real = ws.worksheet("real")
for j in range(len(real_data)):
for i in range(len(real_data[0])):
ws_predicted.update_cell(i+2, j+1, float(predicted_data[j][i]))
ws_real.update_cell(i+2, j+1, float(real_data[j][i]))


for i in range(len(real_data[0])):
# add index column
ws_predicted.update_cell(i+2, len(real_data)+1, i)
ws_real.update_cell(i+2, len(real_data)+1, i)
ws_real.get_all_records()[6:11]
Image for post
Image for post
Image by Rostyslav Neskorozhenyi

Conclusion

Reactions

Post a Comment

0 Comments