본문으로 바로가기

0. Import Library

import tensorflow as tf
from tensorflow.keras import datasets, utils
from tensorflow.keras import models, layers, activations, initializers, losses, optimizers, metrics

import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import model_selection, preprocessing

 


1. Prepare train & test data

(train_data, train_label), (test_data, test_label) = datasets.mnist.load_data()
print(train_data.shape) # # of training data == 60000, each data = 28px * 28px
print(test_data.shape) # # of test data == 10000
print(train_label.shape)
print(test_label.shape)

import matplotlib.pyplot as plt
plt.imshow(train_data[0], cmap='gray') # 60000장의 train data 중 첫번째 data

 


2. Data Preprocessing

2.1 Normalization

# 각 이미지(28px * 28px)는 0~255 사이의 숫자로 이루어져 있습니다.
print(train_data.min())   #> 0
print(train_data.max())   #> 255

# 각 이미지를 [28행 x 28열]에서 [1행 x 784열]로 펼쳐줍니다. 
# 각 이미지 내의 pixel 값을 [0~255]에서 [0~1]로 바꿔줍니다.
train_data = train_data.reshape(60000, 784) / 255.0
test_data = test_data.reshape(10000, 784) / 255.0

 

2.2 One-Hot Encoding

# 각 이미지에 대한 label은 integer value로 이루어져 있습니다.
train_label

# 각 label을 integer value에서 one-hot vector로 변경해줍니다. (Tensorflow 2.x 활용)
train_label = utils.to_categorical(train_label)   # 0~9 -> one-hot vector
test_label = utils.to_categorical(test_label)   # 0~9 -> one-hot vector

# 기존의 integer label들이 아래와 같은 one-hot vector들로 변경된 것을 확인할 수 있습니다.
import pandas as pd
pd.DataFrame(train_label).head(3)

 


3. Build the model & Set the criterion

1) Sequential

- 구조 단순, 에러 가능성 줄어듦, 직관적

 

2) Functional

- 구조가 조금 복잡, 에러의 가능성이 커짐

- 신경망 구조를 다르게 만들 수 있음 (layer가 이어가다가 가지치기로 갈아지거나 합쳐지도록 만들 수 있음)

 

model = models.Sequential() # Build up the "Sequence" of layers (Linear stack of layers)

'''1-Hidden layer'''
# X 데이터의 열의 수(784열), hidden layer의 perceptron 개수 (parameter theta 개수 = 784*512)
model.add(layers.Dense(input_dim=28*28, units=512, activation='relu', kernel_initializer='he_uniform'))  # kernel_initializer='he_normal'도 가능
# model.add(layers.Flatten())   # Flatten 활용한다면 input_dim 파라미터를 설정해주지 않아도 됨

'''2-Hidden layer'''
# model.add(layers.Dense(units=512, activation='relu', kernel_initializer='he_uniform'))
model.add(layers.Dropout(0.2)) # Dropout-layer(drop할 비율)

'''Output layer'''
model.add(layers.Dense(units=10, activation='softmax'))   # (Output) Dense-layer with softmax function, 0~9 -> 10 (라벨 열의 수: 10개)

# "Compile" the model description (Configures the model for training) : 엮다
model.compile(optimizer='adam', 
              loss=losses.categorical_crossentropy,
              metrics=['accuracy']) 
# learning rate 들어갈 자리가 없음

 

문자열이 아니라 함수로 모델을 생성한다면,

model = models.Sequential() 

'''1-Hidden layer'''
model.add(layers.Dense(input_dim=8, units=256, activation=None, kernel_initializer=initializers.he_uniform())) 
# model.add(layers.Flatten())
# model.add(layers.Dense(units=256, activation=None, kernel_initializer=initializers.he_uniform())) 
model.add(layers.Activation('elu')) # elu or relu

'''2-Hidden layer'''
model.add(layers.Dense(units=512, activation=None, kernel_initializer=initializers.he_uniform())) 
model.add(layers.Activation('elu')) 

'''3-Hidden layer'''
model.add(layers.Dense(units=512, activation=None, kernel_initializer=initializers.he_uniform())) 
model.add(layers.Activation('elu'))

'''4-Hidden layer'''
model.add(layers.Dense(units=256, activation=None, kernel_initializer=initializers.he_uniform())) 
model.add(layers.Activation('elu')) 
model.add(layers.Dropout(rate=0.5))

'''Output layer'''
model.add(layers.Dense(units=2, activation='softmax')) # One-hot vector for 0 & 1
# model.add(layers.Dense(units=1, activation='sigmoid'))로 하면 model.compile(loss=losses.binary_crossentropy)

model.compile(optimizer=optimizers.Adam(), 
              loss=losses.categorical_crossentropy, 
              metrics=[metrics.categorical_accuracy])

 

* BatchNormalization

'''BatchNormalization'''
model = models.Sequential() 

# 1-Hidden layer
model.add(layers.Dense(input_dim=28*28, units=256, activation=None, kernel_initializer=initializers.he_uniform())) 
model.add(layers.BatchNormalization())
model.add(layers.Activation('relu')) # layers.ELU or layers.LeakyReLU
model.add(layers.Dropout(rate=0.2))

# 2-Hidden layer
model.add(layers.Dense(units=256, activation=None, kernel_initializer=initializers.he_uniform())) 
model.add(layers.BatchNormalization())
model.add(layers.Activation('relu')) # layers.ELU or layers.LeakyReLU
model.add(layers.Dropout(rate=0.2))

# Output layer
model.add(layers.Dense(units=10, activation='softmax')) # 0~9 


model.compile(optimizer=optimizers.Adam(),  # Adam(0.01)로 learning rate 조정 가능
              loss=losses.categorical_crossentropy, 
              metrics=[metrics.categorical_accuracy])

 

 

* Regression / Multi-class Classfication / Binary Classification 참고 코드

# Regression
model.add(layers.Dense(units=1, activation=None))
model.compile(optimizer='adam',
              loss=losses.mean_squared_error,
              metrics=['mean_squared_error']) 


# Multi-class classification
model.add(layers.Dense(units=10, activation='softmax'))
model.compile(optimizer='adam',
              loss=losses.categorical_crossentropy,        # <- Label이 One-hot 형태일 경우 (권장)
              loss=losses.sparse_categorical_crossentropy, # <- Label이 One-hot 형태가 아닐 경우
              metrics=['accuracy']) 


# Binary Classification 1 (Softmax를 적용하는 경우, recommended)
model.add(layers.Dense(units=2, activation='softmax'))
model.compile(optimizer='adam',
              loss=losses.categorical_crossentropy,
              metrics=['accuracy']) 


# Binary Classification 2 (Sigmoid를 적용하는 경우)
# 선형결합 결과에 대해 sigmoid function의 output을 계산해주면, binary_crossentropy가 이를 음성 & 양성 확률로 변환하여 처리해줍니다.
model.add(layers.Dense(units=1, activation='sigmoid')) 
model.compile(optimizer='adam',
              loss=losses.binary_crossentropy, 
              metrics=['accuracy'])

 


4. Train the model

# Fit the model on training data
history = model.fit(train_data, train_label, batch_size=100, epochs=10, validation_split=0.2, verbose=0)   # validation도 설정 가능

 


5. Test the model

import numpy as np
model.predict(test_data)
np.sum(model.predict(test_data[0:1,:]))   # >1.0
np.argmax(model.predict(test_data[0:1,:]), axis=1)   # >array([7], dtype=int64)

# Evaluate the model on test data
result = model.evaluate(test_data, test_label, batch_size=100)
print('loss (cross-entropy) :', result[0])
print('test accuracy :', result[1])

 

* Save the trained model

# 저장되는 항목 
# - Model의 architecture
# - Model의 모든 weights (Parameter Theta)
# - The state of the optimizer (바로 모델 학습의 재개 가능)

model.save('trained_model.h5') # "Save" the model
# model.save_weights('trained_model.h5') # weights만 따로 저장도 가능함

 

* Load the saved model

# from tensorflow.keras import models
# model = models.Sequential()

model = models.load_model('trained_model.h5') # "Load" the "model"
# model.load_weights('trained_model.h5') # weights만 따로 불러올 수 있음


result = model.evaluate(test_data, test_label)
print('loss (cross-entropy) :', result[0])
print('test accuracy :', result[1])

 


6. Visualize the result

val_acc = history.history['val_categorical_accuracy']
acc = history.history['categorical_accuracy']

import numpy as np
import matplotlib.pyplot as plt

x_len = np.arange(len(acc))
plt.plot(x_len, acc, marker='.', c='blue', label="Train-set Acc.")
plt.plot(x_len, val_acc, marker='.', c='red', label="Validation-set Acc.")

plt.legend(loc='lower right')
plt.grid()
plt.xlabel('epoch')
plt.ylabel('Accuracy')
plt.show()

 

val_acc = history.history['val_loss']
acc = history.history['loss']

import numpy as np
import matplotlib.pyplot as plt

x_len = np.arange(len(acc))
plt.plot(x_len, acc, marker='.', c='blue', label="Train-set Loss")
plt.plot(x_len, val_acc, marker='.', c='red', label="Validation-set Loss")

plt.legend(loc='upper right')
plt.grid()
plt.xlabel('epoch')
plt.ylabel('Loss')
plt.show()