-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
161 lines (118 loc) · 4.9 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
from datetime import time
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
# Make NumPy printouts easier to read.
np.set_printoptions(precision=3, suppress=True)
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
print("Tensorflow version: " + tf.__version__ + "\n")
# https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/tutorials/keras/regression.ipynb#scrollTo=gFh9ne3FZ-On
# First download and import the dataset using pandas:
url = 'http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data'
column_names = ['MPG', 'Cylinders', 'Displacement', 'Horsepower', 'Weight',
'Acceleration', 'Model Year', 'Origin']
raw_dataset = pd.read_csv(url, names=column_names,
na_values='?', comment='\t',
sep=' ', skipinitialspace=True)
dataset = raw_dataset.copy()
dataset.tail()
# The dataset contains a few unknown values:
dataset.isna().sum()
# Drop those rows to keep this initial tutorial simple:
dataset = dataset.dropna()
# Now, split the dataset into a training set and a test set.
# You will use the test set in the final evaluation of your models.
# https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.sample.html
# Return a random sample of items from an axis of object.
train_dataset = dataset.sample(frac=0.8, random_state=0)
test_dataset = dataset.drop(train_dataset.index)
print("Initial dataset:")
print(train_dataset)
sns.pairplot(train_dataset[['MPG', 'Cylinders', 'Displacement', 'Weight']], diag_kind='kde')
print()
print(train_dataset.describe().transpose())
# Separate the target value—the "label"—from the features.
# This label is the value that you will train the model to predict.
train_features = train_dataset.copy()
test_features = test_dataset.copy()
train_labels = train_features.pop('MPG')
test_labels = test_features.pop('MPG')
print("\n" + "Training features (removed MPG) dataset:")
print(train_features)
# Normalization:
# In the table of statistics it's easy to see how different the ranges of each feature are:
print()
print(train_dataset.describe().transpose()[['mean', 'std']])
print()
# The Normalization layer
# The tf.keras.layers.Normalization is a clean and simple way to add feature normalization into your model.
# The first step is to create the layer:
normalizer = tf.keras.layers.Normalization(axis=-1)
# Then, fit the state of the preprocessing layer to the data by calling Normalization.adapt:
normalizer.adapt(np.array(train_features))
# Calculate the mean and variance, and store them in the layer:
print(normalizer.mean.numpy())
# When the layer is called, it returns the input data, with each feature independently normalized:
first = np.array(train_features[:1])
print()
with np.printoptions(precision=2, suppress=True):
print('First example:', first)
print()
print('Normalized:', normalizer(first).numpy())
# ---- Linear regression with one variable
# Begin with a single-variable linear regression to predict 'MPG' from 'Horsepower'.
# First, create a NumPy array made of the 'Horsepower' features.
# Then, instantiate the tf.keras.layers.Normalization and fit its state to the horsepower data:
horsepower = np.array(train_features['Horsepower'])
horsepower_normalizer = layers.Normalization(input_shape=[1, ], axis=None)
horsepower_normalizer.adapt(horsepower)
# Build the Keras Sequential model:
horsepower_model = tf.keras.Sequential([
horsepower_normalizer,
layers.Dense(units=1)
])
print()
horsepower_model.summary()
# ----- This model will predict 'MPG' from 'Horsepower'.
# Run the untrained model on the first 10 'Horsepower' values.
# The output won't be good, but notice that it has the expected shape of (10, 1):
print(horsepower_model.predict(horsepower[:10]))
horsepower_model.compile(
optimizer=tf.optimizers.Adam(learning_rate=0.1),
loss='mean_absolute_error')
history = horsepower_model.fit(
train_features['Horsepower'],
train_labels,
epochs=100,
# Suppress logging.
verbose=0,
# Calculate validation results on 20% of the training data.
validation_split = 0.2)
hist = pd.DataFrame(history.history)
hist['epoch'] = history.epoch
hist.tail()
def plot_loss(history):
plt.plot(history.history['loss'], label='loss')
plt.plot(history.history['val_loss'], label='val_loss')
plt.ylim([0, 10])
plt.xlabel('Epoch')
plt.ylabel('Error [MPG]')
plt.legend()
plt.grid(True)
plot_loss(history)
test_results = {}
test_results['horsepower_model'] = horsepower_model.evaluate(
test_features['Horsepower'],
test_labels, verbose=0)
x = tf.linspace(0.0, 250, 251)
y = horsepower_model.predict(x)
def plot_horsepower(x, y):
plt.scatter(train_features['Horsepower'], train_labels, label='Data')
plt.plot(x, y, color='k', label='Predictions')
plt.xlabel('Horsepower')
plt.ylabel('MPG')
plt.legend()
plot_horsepower(x,y)