This notebooks presents simple Multi-Layer Perceptron in Keras model to solve College Admissions problem
Contents
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
Limit TensorFlow GPU memory usage
import tensorflow as tf
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
with tf.Session(config=config):
pass # init sessin with allow_growth
Load and show raw, unprocessed data
dataset_file = '../Datasets/college-admissions/college_admissions.csv'
df = pd.read_csv(dataset_file)
df.head()
Preprocess dataset
# Create dummies
temp = pd.get_dummies(df['rank'], prefix='rank')
data = pd.concat([df, temp], axis=1)
data.drop(columns='rank', inplace=True)
# Normalize
for col in ['gre', 'gpa']:
mean, std = data[col].mean(), data[col].std()
# data.loc[:, col] = (data[col]-mean) / std
data[col] = (data[col]-mean) / std
# Split off random 20% of the data for testing
np.random.seed(0) # for reproducibility
sample = np.random.choice(data.index, size=int(len(data)*0.9), replace=False)
data, test_data = data.iloc[sample], data.drop(sample)
# Split into features and targets
features_train = data.drop('admit', axis=1)
targets_train = data['admit']
features_test = test_data.drop('admit', axis=1)
targets_test = test_data['admit']
# Convert to numpy
x_train = features_train.values # features train set (numpy)
y_train = targets_train.values[:,None] # targets train set (numpy)
x_test = features_test.values # features validation set (numpy)
y_test = targets_test.values[:,None] # targets validation set (numpy)
# Assert shapes came right way around
assert x_train.shape == (360, 6)
assert y_train.shape == (360, 1)
assert x_test.shape == (40, 6)
assert y_test.shape == (40, 1)
Train data looks like this
x_train[0:6].round(2)
y_train[0:6]
Model with one hidden, one output layer
from tensorflow.keras.layers import Dense
model = tf.keras.Sequential()
model.add(Dense(units=128, input_dim=6, activation='sigmoid'))
model.add(Dense(units=1, activation='sigmoid'))
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()
history = model.fit(x=x_train, y=y_train, batch_size=len(x_train), epochs=500, verbose=0)
Show final results
loss, acc = model.evaluate(x_train, y_train, verbose=0)
print(f'Accuracy on train set: {acc:.2f}')
loss, acc = model.evaluate(x_test, y_test, verbose=0)
print(f'Accuracy on test set: {acc:.2f}')
plt.plot(history.history['loss'], label='loss')
plt.plot(history.history['acc'], label='acc', color='red')
plt.legend();