0. Import Library
import tensorflow as tf
from tensorflow.keras import datasets, utils
from tensorflow.keras import models, layers, activations, initializers, losses, optimizers, metrics
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import model_selection, preprocessing
1. Prepare train & test data
(train_data, train_label), (test_data, test_label) = datasets.mnist.load_data()
print(train_data.shape) # # of training data == 60000, each data = 28px * 28px
print(test_data.shape) # # of test data == 10000
print(train_label.shape)
print(test_label.shape)
import matplotlib.pyplot as plt
plt.imshow(train_data[0], cmap='gray') # 60000장의 train data 중 첫번째 data
2. Data Preprocessing
2.1 Normalization
# 각 이미지(28px * 28px)는 0~255 사이의 숫자로 이루어져 있습니다.
print(train_data.min()) #> 0
print(train_data.max()) #> 255
# 각 이미지를 [28행 x 28열]에서 [1행 x 784열]로 펼쳐줍니다.
# 각 이미지 내의 pixel 값을 [0~255]에서 [0~1]로 바꿔줍니다.
train_data = train_data.reshape(60000, 784) / 255.0
test_data = test_data.reshape(10000, 784) / 255.0
2.2 One-Hot Encoding
# 각 이미지에 대한 label은 integer value로 이루어져 있습니다.
train_label
# 각 label을 integer value에서 one-hot vector로 변경해줍니다. (Tensorflow 2.x 활용)
train_label = utils.to_categorical(train_label) # 0~9 -> one-hot vector
test_label = utils.to_categorical(test_label) # 0~9 -> one-hot vector
# 기존의 integer label들이 아래와 같은 one-hot vector들로 변경된 것을 확인할 수 있습니다.
import pandas as pd
pd.DataFrame(train_label).head(3)
3. Build the model & Set the criterion
1) Sequential
- 구조 단순, 에러 가능성 줄어듦, 직관적
2) Functional
- 구조가 조금 복잡, 에러의 가능성이 커짐
- 신경망 구조를 다르게 만들 수 있음 (layer가 이어가다가 가지치기로 갈아지거나 합쳐지도록 만들 수 있음)
model = models.Sequential() # Build up the "Sequence" of layers (Linear stack of layers)
'''1-Hidden layer'''
# X 데이터의 열의 수(784열), hidden layer의 perceptron 개수 (parameter theta 개수 = 784*512)
model.add(layers.Dense(input_dim=28*28, units=512, activation='relu', kernel_initializer='he_uniform')) # kernel_initializer='he_normal'도 가능
# model.add(layers.Flatten()) # Flatten 활용한다면 input_dim 파라미터를 설정해주지 않아도 됨
'''2-Hidden layer'''
# model.add(layers.Dense(units=512, activation='relu', kernel_initializer='he_uniform'))
model.add(layers.Dropout(0.2)) # Dropout-layer(drop할 비율)
'''Output layer'''
model.add(layers.Dense(units=10, activation='softmax')) # (Output) Dense-layer with softmax function, 0~9 -> 10 (라벨 열의 수: 10개)
# "Compile" the model description (Configures the model for training) : 엮다
model.compile(optimizer='adam',
loss=losses.categorical_crossentropy,
metrics=['accuracy'])
# learning rate 들어갈 자리가 없음
문자열이 아니라 함수로 모델을 생성한다면,
model = models.Sequential()
'''1-Hidden layer'''
model.add(layers.Dense(input_dim=8, units=256, activation=None, kernel_initializer=initializers.he_uniform()))
# model.add(layers.Flatten())
# model.add(layers.Dense(units=256, activation=None, kernel_initializer=initializers.he_uniform()))
model.add(layers.Activation('elu')) # elu or relu
'''2-Hidden layer'''
model.add(layers.Dense(units=512, activation=None, kernel_initializer=initializers.he_uniform()))
model.add(layers.Activation('elu'))
'''3-Hidden layer'''
model.add(layers.Dense(units=512, activation=None, kernel_initializer=initializers.he_uniform()))
model.add(layers.Activation('elu'))
'''4-Hidden layer'''
model.add(layers.Dense(units=256, activation=None, kernel_initializer=initializers.he_uniform()))
model.add(layers.Activation('elu'))
model.add(layers.Dropout(rate=0.5))
'''Output layer'''
model.add(layers.Dense(units=2, activation='softmax')) # One-hot vector for 0 & 1
# model.add(layers.Dense(units=1, activation='sigmoid'))로 하면 model.compile(loss=losses.binary_crossentropy)
model.compile(optimizer=optimizers.Adam(),
loss=losses.categorical_crossentropy,
metrics=[metrics.categorical_accuracy])
* BatchNormalization
'''BatchNormalization'''
model = models.Sequential()
# 1-Hidden layer
model.add(layers.Dense(input_dim=28*28, units=256, activation=None, kernel_initializer=initializers.he_uniform()))
model.add(layers.BatchNormalization())
model.add(layers.Activation('relu')) # layers.ELU or layers.LeakyReLU
model.add(layers.Dropout(rate=0.2))
# 2-Hidden layer
model.add(layers.Dense(units=256, activation=None, kernel_initializer=initializers.he_uniform()))
model.add(layers.BatchNormalization())
model.add(layers.Activation('relu')) # layers.ELU or layers.LeakyReLU
model.add(layers.Dropout(rate=0.2))
# Output layer
model.add(layers.Dense(units=10, activation='softmax')) # 0~9
model.compile(optimizer=optimizers.Adam(), # Adam(0.01)로 learning rate 조정 가능
loss=losses.categorical_crossentropy,
metrics=[metrics.categorical_accuracy])
* Regression / Multi-class Classfication / Binary Classification 참고 코드
# Regression
model.add(layers.Dense(units=1, activation=None))
model.compile(optimizer='adam',
loss=losses.mean_squared_error,
metrics=['mean_squared_error'])
# Multi-class classification
model.add(layers.Dense(units=10, activation='softmax'))
model.compile(optimizer='adam',
loss=losses.categorical_crossentropy, # <- Label이 One-hot 형태일 경우 (권장)
loss=losses.sparse_categorical_crossentropy, # <- Label이 One-hot 형태가 아닐 경우
metrics=['accuracy'])
# Binary Classification 1 (Softmax를 적용하는 경우, recommended)
model.add(layers.Dense(units=2, activation='softmax'))
model.compile(optimizer='adam',
loss=losses.categorical_crossentropy,
metrics=['accuracy'])
# Binary Classification 2 (Sigmoid를 적용하는 경우)
# 선형결합 결과에 대해 sigmoid function의 output을 계산해주면, binary_crossentropy가 이를 음성 & 양성 확률로 변환하여 처리해줍니다.
model.add(layers.Dense(units=1, activation='sigmoid'))
model.compile(optimizer='adam',
loss=losses.binary_crossentropy,
metrics=['accuracy'])
4. Train the model
# Fit the model on training data
history = model.fit(train_data, train_label, batch_size=100, epochs=10, validation_split=0.2, verbose=0) # validation도 설정 가능
5. Test the model
import numpy as np
model.predict(test_data)
np.sum(model.predict(test_data[0:1,:])) # >1.0
np.argmax(model.predict(test_data[0:1,:]), axis=1) # >array([7], dtype=int64)
# Evaluate the model on test data
result = model.evaluate(test_data, test_label, batch_size=100)
print('loss (cross-entropy) :', result[0])
print('test accuracy :', result[1])
* Save the trained model
# 저장되는 항목
# - Model의 architecture
# - Model의 모든 weights (Parameter Theta)
# - The state of the optimizer (바로 모델 학습의 재개 가능)
model.save('trained_model.h5') # "Save" the model
# model.save_weights('trained_model.h5') # weights만 따로 저장도 가능함
* Load the saved model
# from tensorflow.keras import models
# model = models.Sequential()
model = models.load_model('trained_model.h5') # "Load" the "model"
# model.load_weights('trained_model.h5') # weights만 따로 불러올 수 있음
result = model.evaluate(test_data, test_label)
print('loss (cross-entropy) :', result[0])
print('test accuracy :', result[1])
6. Visualize the result
val_acc = history.history['val_categorical_accuracy']
acc = history.history['categorical_accuracy']
import numpy as np
import matplotlib.pyplot as plt
x_len = np.arange(len(acc))
plt.plot(x_len, acc, marker='.', c='blue', label="Train-set Acc.")
plt.plot(x_len, val_acc, marker='.', c='red', label="Validation-set Acc.")
plt.legend(loc='lower right')
plt.grid()
plt.xlabel('epoch')
plt.ylabel('Accuracy')
plt.show()
val_acc = history.history['val_loss']
acc = history.history['loss']
import numpy as np
import matplotlib.pyplot as plt
x_len = np.arange(len(acc))
plt.plot(x_len, acc, marker='.', c='blue', label="Train-set Loss")
plt.plot(x_len, val_acc, marker='.', c='red', label="Validation-set Loss")
plt.legend(loc='upper right')
plt.grid()
plt.xlabel('epoch')
plt.ylabel('Loss')
plt.show()
'멋쟁이 사자처럼 AI SCHOOL 5기 > Today I Learned' 카테고리의 다른 글
[7주차 총정리] TF2 Regression (tensorflow.keras) (0) | 2022.04.29 |
---|---|
[7주차 총정리] 케라스 콜백 함수, Callbacks API (ModelCheckpoint, EarlyStopping) (0) | 2022.04.29 |
[7주차 총정리] Neural Network에서 오버피팅 피하는 방법 (Dropout, Batch normalization) (0) | 2022.04.28 |
[7주차 총정리] Gradient Descent 활용한 신경망 학습 과정 (Neural Network Optimization) (0) | 2022.04.28 |
[7주차 총정리] 딥러닝, 활성화 함수 (Activation function) 기초 총정리 (0) | 2022.04.27 |