반응형
compare-optimizer-of-efficientnet

3rd ML Month - Compare optimizer of efficientNet

Reference

Package

In [1]:
import gc
import os
import warnings
import numpy as np 
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import f1_score
from keras import backend as K
# for문 시간계산 lib
from tqdm import tqdm_notebook
# 교차검증 lib
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import train_test_split
#모델 lib
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential, Model
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from keras.layers import Dense, Dropout, Flatten, Activation, Conv2D, GlobalAveragePooling2D
from keras import layers
from keras.optimizers import Adam,RMSprop,SGD,Nadam
#경고메세지 무시
import warnings
warnings.filterwarnings(action='ignore')
#input 하위 디렉토리 폴터
import os
print(os.listdir("../input"))
Using TensorFlow backend.
['car-crop', '2019-3rd-ml-month-with-kakr']
In [2]:
#efficientnet download
!pip install git+https://github.com/qubvel/efficientnet
from efficientnet import EfficientNetB3
Collecting git+https://github.com/qubvel/efficientnet
  Cloning https://github.com/qubvel/efficientnet to /tmp/pip-req-build-4pdycl7v
  Running command git clone -q https://github.com/qubvel/efficientnet /tmp/pip-req-build-4pdycl7v
Building wheels for collected packages: efficientnet
  Building wheel for efficientnet (setup.py) ... - \ done
  Stored in directory: /tmp/pip-ephem-wheel-cache-npug02wz/wheels/64/60/2e/30ebaa76ed1626e86bfb0cc0579b737fdb7d9ff8cb9522663a
Successfully built efficientnet
Installing collected packages: efficientnet
Successfully installed efficientnet-0.0.4

File Directory Setting

In [3]:
#crop data directory
DATA_PATH = '../input/car-crop'
os.listdir(DATA_PATH)
Out[3]:
['train_crop', 'test_crop']
In [4]:
#original data directory
DATA_PATH2 = '../input/2019-3rd-ml-month-with-kakr'
os.listdir(DATA_PATH2)
Out[4]:
['test.csv',
 'test',
 'train',
 'train.csv',
 'class.csv',
 'sample_submission.csv']
In [5]:
# 이미지 폴더 경로
TRAIN_IMG_PATH = os.path.join(DATA_PATH, 'train')
TEST_IMG_PATH = os.path.join(DATA_PATH, 'test')

# CSV 파일 경로
df_train = pd.read_csv(os.path.join(DATA_PATH2, 'train.csv'))
df_test = pd.read_csv(os.path.join(DATA_PATH2, 'test.csv'))
df_class = pd.read_csv(os.path.join(DATA_PATH2, 'class.csv'))

train/test data Split

In [6]:
df_train["class"] = df_train["class"].astype('str')

df_train = df_train[['img_file', 'class']]
df_test = df_test[['img_file']]

its = np.arange(df_train.shape[0])
train_idx, val_idx = train_test_split(its, train_size = 0.8, random_state=42)

X_train = df_train.iloc[train_idx, :]
X_val = df_train.iloc[val_idx, :]

print(X_train.shape)
print(X_val.shape)
print(df_test.shape)
(7992, 2)
(1998, 2)
(6150, 1)

Parameter

In [7]:
def recall_m(y_true, y_pred):
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
        recall = true_positives / (possible_positives + K.epsilon())
        return recall

def precision_m(y_true, y_pred):
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
        precision = true_positives / (predicted_positives + K.epsilon())
        return precision

def f1_m(y_true, y_pred):
    precision = precision_m(y_true, y_pred)
    recall = recall_m(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))
In [8]:
# Parameter
img_size = (299, 299)
image_size = 299
nb_train_samples = len(X_train)
nb_validation_samples = len(X_val)
nb_test_samples = len(df_test)
epochs = 20
batch_size = 32

# Define Generator config
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True,
    vertical_flip=False,
    zoom_range=0.2,
    fill_mode='nearest')
val_datagen = ImageDataGenerator(rescale=1./255)
In [9]:
#generator
train_generator = train_datagen.flow_from_dataframe(
    dataframe=X_train, 
    directory='../input/car-crop/train_crop',
    x_col = 'img_file',
    y_col = 'class',
    target_size = img_size,
    color_mode='rgb',
    class_mode='categorical',
    batch_size=batch_size,
    seed=42
)

validation_generator = val_datagen.flow_from_dataframe(
    dataframe=X_val, 
    directory='../input/car-crop/train_crop',
    x_col = 'img_file',
    y_col = 'class',
    target_size = img_size,
    color_mode='rgb',
    class_mode='categorical',
    batch_size=batch_size,
    shuffle=False,
    seed=42
)
Found 7992 validated image filenames belonging to 196 classes.
Found 1998 validated image filenames belonging to 196 classes.

Model

In [10]:
def get_steps(num_samples, batch_size):
    if (num_samples % batch_size) > 0 :
        return (num_samples // batch_size) + 1
    else :
        return num_samples // batch_size
In [11]:
%%time
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau

#model path
MODEL_SAVE_FOLDER_PATH = './model/'
if not os.path.exists(MODEL_SAVE_FOLDER_PATH):
    os.mkdir(MODEL_SAVE_FOLDER_PATH)

model_path = MODEL_SAVE_FOLDER_PATH + '{epoch:02d}-{val_loss:.4f}.hdf5'

patient = 2
callbacks_list = [
    EarlyStopping(
        # 모델의 검증 정확도 모니터링
        monitor='val_loss', 
        # patient(정수)보다 정확도가 향상되지 않으면 훈련 종료
        patience=patient, 
        # 검증에 대해 판단하기 위한 기준, val_loss경우 감소되는 것이므로 min
        mode='min', 
        #얼마나 자세하게 정보를 나타낼것인가.
        verbose=1
                          
    ),
    ReduceLROnPlateau(
        monitor = 'val_loss', 
        #콜백 호출시 학습률(lr)을 절반으로 줄임
        factor = 0.5, 
        #위와 동일
        patience = patient / 2, 
        #최소학습률
        min_lr=0.00001,
        verbose=1,
        mode='min'
    ) ]
gc.collect()
CPU times: user 116 ms, sys: 4 ms, total: 120 ms
Wall time: 116 ms
Out[11]:
381
In [12]:
#model
def get_model():
    EfficientNet_model = base_model = EfficientNetB3(weights='imagenet', include_top=False, 
                                                     input_shape=(299, 299, 3))


    model = Sequential()
    model.add(EfficientNet_model)
    model.add(layers.GlobalAveragePooling2D())
    model.add(layers.Dense(2048, activation='relu'))
    model.add(layers.Dropout(0.25))
    model.add(layers.Dense(196, activation='softmax'))
    #model.summary()
    
    return model

Optimizer 1: RMSprop

In [13]:
#compile
model_rmsprop = get_model()
model_rmsprop.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['acc',f1_m])
hist_rmsprop = model_rmsprop.fit_generator(
    train_generator,
    steps_per_epoch = get_steps(nb_train_samples, batch_size),
    epochs=epochs,
    validation_data = validation_generator,
    validation_steps = get_steps(nb_validation_samples, batch_size),
    callbacks = callbacks_list
)
Downloading data from https://github.com/qubvel/efficientnet/releases/download/v0.0.1/efficientnet-b3_imagenet_1000_notop.h5
43974656/43966704 [==============================] - 1s 0us/step
Epoch 1/20
250/250 [==============================] - 290s 1s/step - loss: 3.5051 - acc: 0.2196 - f1_m: 0.1509 - val_loss: 1.9829 - val_acc: 0.4630 - val_f1_m: 0.4226
Epoch 2/20
250/250 [==============================] - 248s 993ms/step - loss: 1.3776 - acc: 0.6072 - f1_m: 0.5945 - val_loss: 1.4108 - val_acc: 0.6371 - val_f1_m: 0.6503
Epoch 3/20
250/250 [==============================] - 249s 995ms/step - loss: 0.9035 - acc: 0.7302 - f1_m: 0.7368 - val_loss: 0.9871 - val_acc: 0.7472 - val_f1_m: 0.7539
Epoch 4/20
250/250 [==============================] - 248s 993ms/step - loss: 0.6861 - acc: 0.7971 - f1_m: 0.8006 - val_loss: 0.9284 - val_acc: 0.7658 - val_f1_m: 0.7778
Epoch 5/20
250/250 [==============================] - 250s 1s/step - loss: 0.5730 - acc: 0.8353 - f1_m: 0.8366 - val_loss: 0.7969 - val_acc: 0.8163 - val_f1_m: 0.8232
Epoch 6/20
250/250 [==============================] - 253s 1s/step - loss: 0.4692 - acc: 0.8576 - f1_m: 0.8580 - val_loss: 0.8536 - val_acc: 0.8198 - val_f1_m: 0.8241

Epoch 00006: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 7/20
250/250 [==============================] - 248s 991ms/step - loss: 0.2281 - acc: 0.9255 - f1_m: 0.9264 - val_loss: 0.5314 - val_acc: 0.8789 - val_f1_m: 0.8827
Epoch 8/20
250/250 [==============================] - 246s 985ms/step - loss: 0.1798 - acc: 0.9421 - f1_m: 0.9430 - val_loss: 0.5439 - val_acc: 0.8839 - val_f1_m: 0.8874

Epoch 00008: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 9/20
250/250 [==============================] - 246s 982ms/step - loss: 0.1038 - acc: 0.9646 - f1_m: 0.9656 - val_loss: 0.4406 - val_acc: 0.9079 - val_f1_m: 0.9102
Epoch 10/20
250/250 [==============================] - 245s 978ms/step - loss: 0.0694 - acc: 0.9764 - f1_m: 0.9770 - val_loss: 0.4560 - val_acc: 0.9049 - val_f1_m: 0.9065

Epoch 00010: ReduceLROnPlateau reducing learning rate to 0.0001250000059371814.
Epoch 11/20
250/250 [==============================] - 247s 990ms/step - loss: 0.0472 - acc: 0.9844 - f1_m: 0.9846 - val_loss: 0.4593 - val_acc: 0.9079 - val_f1_m: 0.9106

Epoch 00011: ReduceLROnPlateau reducing learning rate to 6.25000029685907e-05.
Epoch 00011: early stopping

Optimizer 2: Adam

In [14]:
#compile
model_adam = get_model()
model_adam.compile(loss='categorical_crossentropy', optimizer=Adam(), metrics=['acc',f1_m])
hist_adam = model_adam.fit_generator(
    train_generator,
    steps_per_epoch = get_steps(nb_train_samples, batch_size),
    epochs=epochs,
    validation_data = validation_generator,
    validation_steps = get_steps(nb_validation_samples, batch_size),
    callbacks = callbacks_list
)
Epoch 1/20
250/250 [==============================] - 281s 1s/step - loss: 3.7481 - acc: 0.1823 - f1_m: 0.1136 - val_loss: 2.7741 - val_acc: 0.3639 - val_f1_m: 0.3564
Epoch 2/20
250/250 [==============================] - 245s 981ms/step - loss: 1.6301 - acc: 0.5540 - f1_m: 0.5289 - val_loss: 1.2303 - val_acc: 0.6612 - val_f1_m: 0.6570
Epoch 3/20
250/250 [==============================] - 245s 980ms/step - loss: 0.9075 - acc: 0.7375 - f1_m: 0.7402 - val_loss: 0.8972 - val_acc: 0.7487 - val_f1_m: 0.7541
Epoch 4/20
250/250 [==============================] - 244s 977ms/step - loss: 0.6960 - acc: 0.7932 - f1_m: 0.7889 - val_loss: 0.8714 - val_acc: 0.7733 - val_f1_m: 0.7783
Epoch 5/20
250/250 [==============================] - 245s 980ms/step - loss: 0.5387 - acc: 0.8333 - f1_m: 0.8360 - val_loss: 0.7838 - val_acc: 0.7943 - val_f1_m: 0.7996
Epoch 6/20
250/250 [==============================] - 244s 978ms/step - loss: 0.4708 - acc: 0.8572 - f1_m: 0.8600 - val_loss: 0.7524 - val_acc: 0.8248 - val_f1_m: 0.8348
Epoch 7/20
250/250 [==============================] - 245s 979ms/step - loss: 0.3988 - acc: 0.8737 - f1_m: 0.8768 - val_loss: 0.7650 - val_acc: 0.8058 - val_f1_m: 0.8189

Epoch 00007: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 8/20
250/250 [==============================] - 244s 975ms/step - loss: 0.1856 - acc: 0.9407 - f1_m: 0.9418 - val_loss: 0.4081 - val_acc: 0.8959 - val_f1_m: 0.8962
Epoch 9/20
250/250 [==============================] - 244s 976ms/step - loss: 0.1304 - acc: 0.9559 - f1_m: 0.9562 - val_loss: 0.4419 - val_acc: 0.8944 - val_f1_m: 0.8960

Epoch 00009: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 10/20
250/250 [==============================] - 246s 982ms/step - loss: 0.0780 - acc: 0.9755 - f1_m: 0.9751 - val_loss: 0.4020 - val_acc: 0.9104 - val_f1_m: 0.9131
Epoch 11/20
250/250 [==============================] - 245s 980ms/step - loss: 0.0489 - acc: 0.9842 - f1_m: 0.9840 - val_loss: 0.3835 - val_acc: 0.9134 - val_f1_m: 0.9190
Epoch 12/20
250/250 [==============================] - 244s 978ms/step - loss: 0.0464 - acc: 0.9843 - f1_m: 0.9842 - val_loss: 0.3998 - val_acc: 0.9129 - val_f1_m: 0.9157

Epoch 00012: ReduceLROnPlateau reducing learning rate to 0.0001250000059371814.
Epoch 13/20
250/250 [==============================] - 246s 986ms/step - loss: 0.0377 - acc: 0.9872 - f1_m: 0.9872 - val_loss: 0.3781 - val_acc: 0.9199 - val_f1_m: 0.9232
Epoch 14/20
250/250 [==============================] - 251s 1s/step - loss: 0.0297 - acc: 0.9906 - f1_m: 0.9910 - val_loss: 0.3760 - val_acc: 0.9219 - val_f1_m: 0.9257
Epoch 15/20
250/250 [==============================] - 251s 1s/step - loss: 0.0297 - acc: 0.9903 - f1_m: 0.9904 - val_loss: 0.3899 - val_acc: 0.9219 - val_f1_m: 0.9240

Epoch 00015: ReduceLROnPlateau reducing learning rate to 6.25000029685907e-05.
Epoch 16/20
250/250 [==============================] - 252s 1s/step - loss: 0.0249 - acc: 0.9915 - f1_m: 0.9917 - val_loss: 0.3785 - val_acc: 0.9259 - val_f1_m: 0.9273

Epoch 00016: ReduceLROnPlateau reducing learning rate to 3.125000148429535e-05.
Epoch 00016: early stopping

Optimizer 3: Nadam

In [15]:
#compile
model_nadam = get_model()
model_nadam.compile(loss='categorical_crossentropy', optimizer=Nadam(), metrics=['acc',f1_m])
hist_nadam = model_nadam.fit_generator(
    train_generator,
    steps_per_epoch = get_steps(nb_train_samples, batch_size),
    epochs=epochs,
    validation_data = validation_generator,
    validation_steps = get_steps(nb_validation_samples, batch_size),
    callbacks = callbacks_list
)
Epoch 1/20
250/250 [==============================] - 288s 1s/step - loss: 4.1294 - acc: 0.1234 - f1_m: 0.0572 - val_loss: 4.2228 - val_acc: 0.1732 - val_f1_m: 0.1424
Epoch 2/20
250/250 [==============================] - 242s 967ms/step - loss: 2.2012 - acc: 0.4195 - f1_m: 0.3679 - val_loss: 2.6808 - val_acc: 0.4960 - val_f1_m: 0.4767
Epoch 3/20
250/250 [==============================] - 242s 969ms/step - loss: 1.4434 - acc: 0.5975 - f1_m: 0.5878 - val_loss: 1.2144 - val_acc: 0.6787 - val_f1_m: 0.6876
Epoch 4/20
250/250 [==============================] - 242s 970ms/step - loss: 1.0976 - acc: 0.6841 - f1_m: 0.6825 - val_loss: 1.1604 - val_acc: 0.6827 - val_f1_m: 0.6945
Epoch 5/20
250/250 [==============================] - 241s 963ms/step - loss: 0.8958 - acc: 0.7365 - f1_m: 0.7412 - val_loss: 0.9532 - val_acc: 0.7412 - val_f1_m: 0.7506
Epoch 6/20
250/250 [==============================] - 241s 963ms/step - loss: 0.7421 - acc: 0.7752 - f1_m: 0.7774 - val_loss: 0.8828 - val_acc: 0.7548 - val_f1_m: 0.7629
Epoch 7/20
250/250 [==============================] - 242s 966ms/step - loss: 0.6870 - acc: 0.7912 - f1_m: 0.7934 - val_loss: 0.8472 - val_acc: 0.7723 - val_f1_m: 0.7799
Epoch 8/20
250/250 [==============================] - 241s 964ms/step - loss: 0.6115 - acc: 0.8132 - f1_m: 0.8144 - val_loss: 0.8297 - val_acc: 0.7783 - val_f1_m: 0.7875
Epoch 9/20
250/250 [==============================] - 240s 961ms/step - loss: 0.5497 - acc: 0.8315 - f1_m: 0.8336 - val_loss: 0.7878 - val_acc: 0.8023 - val_f1_m: 0.8098
Epoch 10/20
250/250 [==============================] - 241s 962ms/step - loss: 0.5024 - acc: 0.8471 - f1_m: 0.8473 - val_loss: 0.8300 - val_acc: 0.7803 - val_f1_m: 0.7876

Epoch 00010: ReduceLROnPlateau reducing learning rate to 0.0010000000474974513.
Epoch 11/20
250/250 [==============================] - 240s 960ms/step - loss: 0.2669 - acc: 0.9133 - f1_m: 0.9142 - val_loss: 0.4663 - val_acc: 0.8869 - val_f1_m: 0.8897
Epoch 12/20
250/250 [==============================] - 241s 964ms/step - loss: 0.1816 - acc: 0.9398 - f1_m: 0.9399 - val_loss: 0.4990 - val_acc: 0.8709 - val_f1_m: 0.8717

Epoch 00012: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 13/20
250/250 [==============================] - 241s 965ms/step - loss: 0.1196 - acc: 0.9607 - f1_m: 0.9615 - val_loss: 0.4290 - val_acc: 0.8919 - val_f1_m: 0.8933
Epoch 14/20
250/250 [==============================] - 242s 967ms/step - loss: 0.0917 - acc: 0.9695 - f1_m: 0.9706 - val_loss: 0.4254 - val_acc: 0.8999 - val_f1_m: 0.9043
Epoch 15/20
250/250 [==============================] - 240s 960ms/step - loss: 0.0724 - acc: 0.9757 - f1_m: 0.9762 - val_loss: 0.4568 - val_acc: 0.9009 - val_f1_m: 0.9025

Epoch 00015: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 16/20
250/250 [==============================] - 240s 960ms/step - loss: 0.0701 - acc: 0.9778 - f1_m: 0.9771 - val_loss: 0.4242 - val_acc: 0.9124 - val_f1_m: 0.9149
Epoch 17/20
250/250 [==============================] - 239s 957ms/step - loss: 0.0569 - acc: 0.9799 - f1_m: 0.9799 - val_loss: 0.4275 - val_acc: 0.9054 - val_f1_m: 0.9080

Epoch 00017: ReduceLROnPlateau reducing learning rate to 0.0001250000059371814.
Epoch 18/20
250/250 [==============================] - 239s 957ms/step - loss: 0.0563 - acc: 0.9806 - f1_m: 0.9807 - val_loss: 0.4138 - val_acc: 0.9129 - val_f1_m: 0.9157
Epoch 19/20
250/250 [==============================] - 240s 960ms/step - loss: 0.0394 - acc: 0.9864 - f1_m: 0.9864 - val_loss: 0.4088 - val_acc: 0.9119 - val_f1_m: 0.9135
Epoch 20/20
250/250 [==============================] - 239s 956ms/step - loss: 0.0407 - acc: 0.9867 - f1_m: 0.9867 - val_loss: 0.4164 - val_acc: 0.9134 - val_f1_m: 0.9143

Epoch 00020: ReduceLROnPlateau reducing learning rate to 6.25000029685907e-05.

Optimizer 4: SGD

In [16]:
#compile
model_sgd = get_model()
model_sgd.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['acc',f1_m])
hist_sgd = model_sgd.fit_generator(
    train_generator,
    steps_per_epoch = get_steps(nb_train_samples, batch_size),
    epochs=epochs,
    validation_data = validation_generator,
    validation_steps = get_steps(nb_validation_samples, batch_size),
    callbacks = callbacks_list
)
Epoch 1/20
250/250 [==============================] - 272s 1s/step - loss: 5.2815 - acc: 0.0070 - f1_m: 0.0000e+00 - val_loss: 5.2558 - val_acc: 0.0100 - val_f1_m: 0.0000e+00
Epoch 2/20
250/250 [==============================] - 237s 947ms/step - loss: 5.2364 - acc: 0.0134 - f1_m: 0.0000e+00 - val_loss: 5.2147 - val_acc: 0.0195 - val_f1_m: 0.0000e+00
Epoch 3/20
250/250 [==============================] - 236s 945ms/step - loss: 5.1866 - acc: 0.0198 - f1_m: 0.0000e+00 - val_loss: 5.1626 - val_acc: 0.0425 - val_f1_m: 0.0000e+00
Epoch 4/20
250/250 [==============================] - 237s 948ms/step - loss: 5.1253 - acc: 0.0367 - f1_m: 0.0000e+00 - val_loss: 5.0876 - val_acc: 0.0571 - val_f1_m: 0.0000e+00
Epoch 5/20
250/250 [==============================] - 236s 945ms/step - loss: 5.0311 - acc: 0.0557 - f1_m: 0.0000e+00 - val_loss: 4.9584 - val_acc: 0.0771 - val_f1_m: 0.0000e+00
Epoch 6/20
250/250 [==============================] - 236s 945ms/step - loss: 4.8801 - acc: 0.0801 - f1_m: 0.0000e+00 - val_loss: 4.7433 - val_acc: 0.0946 - val_f1_m: 0.0000e+00
Epoch 7/20
250/250 [==============================] - 236s 944ms/step - loss: 4.6674 - acc: 0.1102 - f1_m: 0.0000e+00 - val_loss: 4.5005 - val_acc: 0.1296 - val_f1_m: 0.0000e+00
Epoch 8/20
250/250 [==============================] - 237s 949ms/step - loss: 4.4327 - acc: 0.1355 - f1_m: 7.2728e-04 - val_loss: 4.2099 - val_acc: 0.1702 - val_f1_m: 0.0000e+00
Epoch 9/20
250/250 [==============================] - 238s 952ms/step - loss: 4.1743 - acc: 0.1830 - f1_m: 4.8485e-04 - val_loss: 3.9078 - val_acc: 0.2157 - val_f1_m: 0.0067
Epoch 10/20
250/250 [==============================] - 237s 948ms/step - loss: 3.8609 - acc: 0.2226 - f1_m: 0.0057 - val_loss: 3.5655 - val_acc: 0.2688 - val_f1_m: 0.0125
Epoch 11/20
250/250 [==============================] - 265s 1s/step - loss: 3.5310 - acc: 0.2820 - f1_m: 0.0152 - val_loss: 3.2231 - val_acc: 0.3268 - val_f1_m: 0.0258
Epoch 12/20
250/250 [==============================] - 269s 1s/step - loss: 3.2108 - acc: 0.3311 - f1_m: 0.0350 - val_loss: 2.8546 - val_acc: 0.4124 - val_f1_m: 0.0495
Epoch 13/20
250/250 [==============================] - 265s 1s/step - loss: 2.8602 - acc: 0.3960 - f1_m: 0.0636 - val_loss: 2.5056 - val_acc: 0.4700 - val_f1_m: 0.1007
Epoch 14/20
250/250 [==============================] - 246s 984ms/step - loss: 2.5567 - acc: 0.4545 - f1_m: 0.1146 - val_loss: 2.2381 - val_acc: 0.5215 - val_f1_m: 0.1474
Epoch 15/20
250/250 [==============================] - 303s 1s/step - loss: 2.2699 - acc: 0.4998 - f1_m: 0.1789 - val_loss: 1.9367 - val_acc: 0.5701 - val_f1_m: 0.2627
Epoch 16/20
250/250 [==============================] - 245s 981ms/step - loss: 2.0120 - acc: 0.5417 - f1_m: 0.2576 - val_loss: 1.7187 - val_acc: 0.6081 - val_f1_m: 0.3514
Epoch 17/20
250/250 [==============================] - 244s 976ms/step - loss: 1.7865 - acc: 0.5907 - f1_m: 0.3439 - val_loss: 1.5097 - val_acc: 0.6411 - val_f1_m: 0.4685
Epoch 18/20
250/250 [==============================] - 241s 965ms/step - loss: 1.6084 - acc: 0.6243 - f1_m: 0.4144 - val_loss: 1.3484 - val_acc: 0.6792 - val_f1_m: 0.5331
Epoch 19/20
250/250 [==============================] - 245s 978ms/step - loss: 1.4419 - acc: 0.6577 - f1_m: 0.5014 - val_loss: 1.2364 - val_acc: 0.6937 - val_f1_m: 0.5887
Epoch 20/20
250/250 [==============================] - 244s 976ms/step - loss: 1.3128 - acc: 0.6885 - f1_m: 0.5429 - val_loss: 1.0948 - val_acc: 0.7192 - val_f1_m: 0.6461

Optimizer 5: SGD + Nesterov

In [17]:
#compile
model_sgdnes = get_model()
model_sgdnes.compile(loss='categorical_crossentropy', optimizer=SGD(nesterov=True), metrics=['acc',f1_m])
hist_sgdnes = model_sgdnes.fit_generator(
    train_generator,
    steps_per_epoch = get_steps(nb_train_samples, batch_size),
    epochs=epochs,
    validation_data = validation_generator,
    validation_steps = get_steps(nb_validation_samples, batch_size),
    callbacks = callbacks_list
)
Epoch 1/20
250/250 [==============================] - 287s 1s/step - loss: 5.2796 - acc: 0.0066 - f1_m: 0.0000e+00 - val_loss: 5.2519 - val_acc: 0.0060 - val_f1_m: 0.0000e+00
Epoch 2/20
250/250 [==============================] - 257s 1s/step - loss: 5.2392 - acc: 0.0112 - f1_m: 0.0000e+00 - val_loss: 5.2110 - val_acc: 0.0195 - val_f1_m: 0.0000e+00
Epoch 3/20
250/250 [==============================] - 253s 1s/step - loss: 5.1897 - acc: 0.0218 - f1_m: 0.0000e+00 - val_loss: 5.1554 - val_acc: 0.0385 - val_f1_m: 0.0000e+00
Epoch 4/20
250/250 [==============================] - 250s 1s/step - loss: 5.1149 - acc: 0.0385 - f1_m: 0.0000e+00 - val_loss: 5.0658 - val_acc: 0.0591 - val_f1_m: 0.0000e+00
Epoch 5/20
250/250 [==============================] - 246s 983ms/step - loss: 5.0154 - acc: 0.0540 - f1_m: 0.0000e+00 - val_loss: 4.9082 - val_acc: 0.0711 - val_f1_m: 0.0000e+00
Epoch 6/20
250/250 [==============================] - 245s 978ms/step - loss: 4.8579 - acc: 0.0828 - f1_m: 0.0000e+00 - val_loss: 4.7131 - val_acc: 0.1206 - val_f1_m: 0.0000e+00
Epoch 7/20
250/250 [==============================] - 246s 984ms/step - loss: 4.6758 - acc: 0.1168 - f1_m: 0.0000e+00 - val_loss: 4.4955 - val_acc: 0.1491 - val_f1_m: 9.7067e-04
Epoch 8/20
250/250 [==============================] - 248s 991ms/step - loss: 4.4442 - acc: 0.1498 - f1_m: 7.2728e-04 - val_loss: 4.2417 - val_acc: 0.1807 - val_f1_m: 0.0019
Epoch 9/20
250/250 [==============================] - 244s 977ms/step - loss: 4.1850 - acc: 0.1889 - f1_m: 0.0032 - val_loss: 3.9157 - val_acc: 0.2162 - val_f1_m: 0.0087
Epoch 10/20
250/250 [==============================] - 244s 978ms/step - loss: 3.8912 - acc: 0.2210 - f1_m: 0.0082 - val_loss: 3.5765 - val_acc: 0.2613 - val_f1_m: 0.0222
Epoch 11/20
250/250 [==============================] - 245s 979ms/step - loss: 3.5922 - acc: 0.2692 - f1_m: 0.0211 - val_loss: 3.2315 - val_acc: 0.3473 - val_f1_m: 0.0398
Epoch 12/20
250/250 [==============================] - 244s 975ms/step - loss: 3.2267 - acc: 0.3395 - f1_m: 0.0428 - val_loss: 2.8834 - val_acc: 0.3809 - val_f1_m: 0.0714
Epoch 13/20
250/250 [==============================] - 241s 962ms/step - loss: 2.9179 - acc: 0.3876 - f1_m: 0.0712 - val_loss: 2.5499 - val_acc: 0.4665 - val_f1_m: 0.1178
Epoch 14/20
250/250 [==============================] - 240s 962ms/step - loss: 2.6022 - acc: 0.4465 - f1_m: 0.1123 - val_loss: 2.2473 - val_acc: 0.4995 - val_f1_m: 0.1788
Epoch 15/20
250/250 [==============================] - 242s 967ms/step - loss: 2.2877 - acc: 0.5069 - f1_m: 0.1890 - val_loss: 1.9782 - val_acc: 0.5651 - val_f1_m: 0.2664
Epoch 16/20
250/250 [==============================] - 239s 956ms/step - loss: 2.0582 - acc: 0.5426 - f1_m: 0.2587 - val_loss: 1.7474 - val_acc: 0.6166 - val_f1_m: 0.3598
Epoch 17/20
250/250 [==============================] - 238s 953ms/step - loss: 1.8387 - acc: 0.5868 - f1_m: 0.3370 - val_loss: 1.5692 - val_acc: 0.6261 - val_f1_m: 0.4423
Epoch 18/20
250/250 [==============================] - 238s 953ms/step - loss: 1.6553 - acc: 0.6237 - f1_m: 0.4035 - val_loss: 1.3760 - val_acc: 0.6867 - val_f1_m: 0.5301
Epoch 19/20
250/250 [==============================] - 243s 973ms/step - loss: 1.4744 - acc: 0.6533 - f1_m: 0.4755 - val_loss: 1.2595 - val_acc: 0.6947 - val_f1_m: 0.5777
Epoch 20/20
250/250 [==============================] - 238s 950ms/step - loss: 1.3349 - acc: 0.6892 - f1_m: 0.5419 - val_loss: 1.1540 - val_acc: 0.7132 - val_f1_m: 0.6220

Optimizer 6: SGD with momentum=0.9

In [18]:
#compile
model_sgdmo = get_model()
model_sgdmo.compile(loss='categorical_crossentropy', optimizer=SGD(momentum=0.9), metrics=['acc',f1_m])
hist_sgdmo = model_sgdmo.fit_generator(
    train_generator,
    steps_per_epoch = get_steps(nb_train_samples, batch_size),
    epochs=epochs,
    validation_data = validation_generator,
    validation_steps = get_steps(nb_validation_samples, batch_size),
    callbacks = callbacks_list
)
Epoch 1/20
250/250 [==============================] - 283s 1s/step - loss: 5.0416 - acc: 0.0401 - f1_m: 2.4243e-04 - val_loss: 4.3300 - val_acc: 0.1086 - val_f1_m: 0.0048
Epoch 2/20
250/250 [==============================] - 239s 956ms/step - loss: 3.3459 - acc: 0.2551 - f1_m: 0.0784 - val_loss: 2.0053 - val_acc: 0.4870 - val_f1_m: 0.3179
Epoch 3/20
250/250 [==============================] - 239s 956ms/step - loss: 1.6440 - acc: 0.5638 - f1_m: 0.4653 - val_loss: 1.0241 - val_acc: 0.7127 - val_f1_m: 0.6995
Epoch 4/20
250/250 [==============================] - 240s 959ms/step - loss: 0.9140 - acc: 0.7432 - f1_m: 0.7190 - val_loss: 0.7338 - val_acc: 0.7863 - val_f1_m: 0.7882
Epoch 5/20
250/250 [==============================] - 240s 960ms/step - loss: 0.5982 - acc: 0.8249 - f1_m: 0.8143 - val_loss: 0.6174 - val_acc: 0.8153 - val_f1_m: 0.8172
Epoch 6/20
250/250 [==============================] - 241s 963ms/step - loss: 0.4158 - acc: 0.8783 - f1_m: 0.8722 - val_loss: 0.5371 - val_acc: 0.8393 - val_f1_m: 0.8425
Epoch 7/20
250/250 [==============================] - 243s 972ms/step - loss: 0.3157 - acc: 0.9051 - f1_m: 0.9025 - val_loss: 0.4755 - val_acc: 0.8624 - val_f1_m: 0.8650
Epoch 8/20
250/250 [==============================] - 247s 990ms/step - loss: 0.2449 - acc: 0.9260 - f1_m: 0.9221 - val_loss: 0.4640 - val_acc: 0.8654 - val_f1_m: 0.8708
Epoch 9/20
250/250 [==============================] - 246s 983ms/step - loss: 0.1910 - acc: 0.9423 - f1_m: 0.9406 - val_loss: 0.4344 - val_acc: 0.8689 - val_f1_m: 0.8755
Epoch 10/20
250/250 [==============================] - 246s 985ms/step - loss: 0.1569 - acc: 0.9554 - f1_m: 0.9538 - val_loss: 0.4055 - val_acc: 0.8849 - val_f1_m: 0.8893
Epoch 11/20
250/250 [==============================] - 248s 993ms/step - loss: 0.1352 - acc: 0.9590 - f1_m: 0.9587 - val_loss: 0.4028 - val_acc: 0.8854 - val_f1_m: 0.8894
Epoch 12/20
250/250 [==============================] - 248s 994ms/step - loss: 0.1191 - acc: 0.9658 - f1_m: 0.9635 - val_loss: 0.4213 - val_acc: 0.8949 - val_f1_m: 0.8963

Epoch 00012: ReduceLROnPlateau reducing learning rate to 0.004999999888241291.
Epoch 13/20
250/250 [==============================] - 246s 984ms/step - loss: 0.0820 - acc: 0.9773 - f1_m: 0.9770 - val_loss: 0.3905 - val_acc: 0.8969 - val_f1_m: 0.8996
Epoch 14/20
250/250 [==============================] - 249s 996ms/step - loss: 0.0658 - acc: 0.9811 - f1_m: 0.9813 - val_loss: 0.3680 - val_acc: 0.9054 - val_f1_m: 0.9064
Epoch 15/20
250/250 [==============================] - 249s 995ms/step - loss: 0.0617 - acc: 0.9840 - f1_m: 0.9834 - val_loss: 0.3761 - val_acc: 0.9109 - val_f1_m: 0.9116

Epoch 00015: ReduceLROnPlateau reducing learning rate to 0.0024999999441206455.
Epoch 16/20
250/250 [==============================] - 249s 996ms/step - loss: 0.0509 - acc: 0.9876 - f1_m: 0.9875 - val_loss: 0.3563 - val_acc: 0.9084 - val_f1_m: 0.9099
Epoch 17/20
250/250 [==============================] - 249s 996ms/step - loss: 0.0487 - acc: 0.9862 - f1_m: 0.9865 - val_loss: 0.3637 - val_acc: 0.9119 - val_f1_m: 0.9157

Epoch 00017: ReduceLROnPlateau reducing learning rate to 0.0012499999720603228.
Epoch 18/20
250/250 [==============================] - 249s 997ms/step - loss: 0.0405 - acc: 0.9903 - f1_m: 0.9903 - val_loss: 0.3531 - val_acc: 0.9084 - val_f1_m: 0.9126
Epoch 19/20
250/250 [==============================] - 246s 982ms/step - loss: 0.0383 - acc: 0.9907 - f1_m: 0.9902 - val_loss: 0.3529 - val_acc: 0.9119 - val_f1_m: 0.9143
Epoch 20/20
250/250 [==============================] - 248s 993ms/step - loss: 0.0402 - acc: 0.9912 - f1_m: 0.9902 - val_loss: 0.3530 - val_acc: 0.9129 - val_f1_m: 0.9152

Epoch 00020: ReduceLROnPlateau reducing learning rate to 0.0006249999860301614.

Optimizer 7: SGD + Nesterov with momentum=0.9

In [19]:
#compile
model_sgdmones = get_model()
model_sgdmones.compile(loss='categorical_crossentropy', optimizer=SGD(momentum=0.9, nesterov=True), metrics=['acc',f1_m])
hist_sgdmones  = model_sgdmones.fit_generator(
    train_generator,
    steps_per_epoch = get_steps(nb_train_samples, batch_size),
    epochs=epochs,
    validation_data = validation_generator,
    validation_steps = get_steps(nb_validation_samples, batch_size),
    callbacks = callbacks_list
)
Epoch 1/20
250/250 [==============================] - 295s 1s/step - loss: 5.0288 - acc: 0.0442 - f1_m: 0.0000e+00 - val_loss: 4.3071 - val_acc: 0.1191 - val_f1_m: 0.0029
Epoch 2/20
250/250 [==============================] - 243s 974ms/step - loss: 3.2497 - acc: 0.2771 - f1_m: 0.0977 - val_loss: 1.9375 - val_acc: 0.5000 - val_f1_m: 0.3884
Epoch 3/20
250/250 [==============================] - 243s 971ms/step - loss: 1.6001 - acc: 0.5817 - f1_m: 0.4869 - val_loss: 1.0060 - val_acc: 0.7187 - val_f1_m: 0.6957
Epoch 4/20
250/250 [==============================] - 242s 969ms/step - loss: 0.8932 - acc: 0.7499 - f1_m: 0.7255 - val_loss: 0.7660 - val_acc: 0.7633 - val_f1_m: 0.7622
Epoch 5/20
250/250 [==============================] - 242s 968ms/step - loss: 0.5922 - acc: 0.8276 - f1_m: 0.8198 - val_loss: 0.5647 - val_acc: 0.8388 - val_f1_m: 0.8374
Epoch 6/20
250/250 [==============================] - 244s 976ms/step - loss: 0.4195 - acc: 0.8742 - f1_m: 0.8700 - val_loss: 0.4960 - val_acc: 0.8504 - val_f1_m: 0.8542
Epoch 7/20
250/250 [==============================] - 246s 984ms/step - loss: 0.3148 - acc: 0.9049 - f1_m: 0.9016 - val_loss: 0.4507 - val_acc: 0.8654 - val_f1_m: 0.8686
Epoch 8/20
250/250 [==============================] - 246s 985ms/step - loss: 0.2435 - acc: 0.9255 - f1_m: 0.9200 - val_loss: 0.4225 - val_acc: 0.8759 - val_f1_m: 0.8775
Epoch 9/20
250/250 [==============================] - 241s 965ms/step - loss: 0.1873 - acc: 0.9448 - f1_m: 0.9420 - val_loss: 0.4541 - val_acc: 0.8724 - val_f1_m: 0.8738

Epoch 00009: ReduceLROnPlateau reducing learning rate to 0.004999999888241291.
Epoch 10/20
250/250 [==============================] - 242s 966ms/step - loss: 0.1308 - acc: 0.9635 - f1_m: 0.9620 - val_loss: 0.3869 - val_acc: 0.8914 - val_f1_m: 0.8962
Epoch 11/20
250/250 [==============================] - 242s 966ms/step - loss: 0.1028 - acc: 0.9702 - f1_m: 0.9688 - val_loss: 0.3656 - val_acc: 0.8984 - val_f1_m: 0.8987
Epoch 12/20
250/250 [==============================] - 246s 983ms/step - loss: 0.0974 - acc: 0.9715 - f1_m: 0.9713 - val_loss: 0.3748 - val_acc: 0.8959 - val_f1_m: 0.8994

Epoch 00012: ReduceLROnPlateau reducing learning rate to 0.0024999999441206455.
Epoch 13/20
250/250 [==============================] - 244s 975ms/step - loss: 0.0729 - acc: 0.9827 - f1_m: 0.9819 - val_loss: 0.3537 - val_acc: 0.9109 - val_f1_m: 0.9140
Epoch 14/20
250/250 [==============================] - 243s 973ms/step - loss: 0.0718 - acc: 0.9801 - f1_m: 0.9798 - val_loss: 0.3420 - val_acc: 0.9064 - val_f1_m: 0.9116
Epoch 15/20
250/250 [==============================] - 243s 972ms/step - loss: 0.0595 - acc: 0.9857 - f1_m: 0.9845 - val_loss: 0.3502 - val_acc: 0.9094 - val_f1_m: 0.9162

Epoch 00015: ReduceLROnPlateau reducing learning rate to 0.0012499999720603228.
Epoch 16/20
250/250 [==============================] - 242s 968ms/step - loss: 0.0550 - acc: 0.9859 - f1_m: 0.9858 - val_loss: 0.3450 - val_acc: 0.9079 - val_f1_m: 0.9125

Epoch 00016: ReduceLROnPlateau reducing learning rate to 0.0006249999860301614.
Epoch 00016: early stopping

acc / loss Plot

train acc

In [20]:
plt.figure(figsize=(10, 6))  

plt.plot(hist_rmsprop.history['acc'])  
plt.plot(hist_adam.history['acc'])  
plt.plot(hist_nadam.history['acc']) 
plt.plot(hist_sgd.history['acc']) 
plt.plot(hist_sgdnes.history['acc']) 
plt.plot(hist_sgdmo.history['acc'])
plt.plot(hist_sgdmones.history['acc'])
plt.title('train. accuracy')  
plt.ylabel('accuracy')  
plt.xlabel('epoch')  
plt.legend(['rmsprop', 'adam', 'nadam', 'sgd', 'sgd+nesterov', 'sgd+momentum', 'sgd+nesterov+momentum'], loc='lower right')  

plt.show()

train loss

In [21]:
plt.figure(figsize=(10, 6))  

plt.plot(hist_rmsprop.history['loss'])  
plt.plot(hist_adam.history['loss'])  
plt.plot(hist_nadam.history['loss']) 
plt.plot(hist_sgd.history['loss']) 
plt.plot(hist_sgdnes.history['loss']) 
plt.plot(hist_sgdmo.history['loss'])
plt.plot(hist_sgdmones.history['loss'])
plt.title('train. loss')  
plt.ylabel('loss')  
plt.xlabel('epoch')  
plt.legend(['rmsprop', 'adam', 'nadam', 'sgd', 'sgd+nesterov', 'sgd+momentum', 'sgd+nesterov+momentum'], loc='upper right')  

plt.show()

valid acc

In [22]:
plt.figure(figsize=(10, 6))  

plt.plot(hist_rmsprop.history['val_acc'])
plt.plot(hist_adam.history['val_acc'])
plt.plot(hist_nadam.history['val_acc'])
plt.plot(hist_sgd.history['val_acc'])
plt.plot(hist_sgdnes.history['val_acc'])
plt.plot(hist_sgdmo.history['val_acc'])
plt.plot(hist_sgdmones.history['val_acc'])

plt.title('valid. accuracy')  
plt.ylabel('accuracy')  
plt.xlabel('epoch')  
plt.legend(['rmsprop', 'adam', 'nadam', 'sgd', 'sgd+nesterov', 'sgd+momentum', 'sgd+nesterov+momentum'], loc='lower right')  

plt.show()

valid loss

In [23]:
plt.figure(figsize=(10, 6))  

plt.plot(hist_rmsprop.history['val_loss'])  
plt.plot(hist_adam.history['val_loss'])  
plt.plot(hist_nadam.history['val_loss']) 
plt.plot(hist_sgd.history['val_loss']) 
plt.plot(hist_sgdnes.history['val_loss']) 
plt.plot(hist_sgdmo.history['val_loss'])
plt.plot(hist_sgdmones.history['val_loss'])
plt.title('valid. loss')  
plt.ylabel('loss')  
plt.xlabel('epoch')  
plt.legend(['rmsprop', 'adam', 'nadam', 'sgd', 'sgd+nesterov', 'sgd+momentum', 'sgd+nesterov+momentum'], loc='upper right')  

plt.show()

train f1 score

In [24]:
plt.figure(figsize=(10, 6))  

plt.plot(hist_rmsprop.history['f1_m'])  
plt.plot(hist_adam.history['f1_m'])  
plt.plot(hist_nadam.history['f1_m']) 
plt.plot(hist_sgd.history['f1_m']) 
plt.plot(hist_sgdnes.history['f1_m']) 
plt.plot(hist_sgdmo.history['f1_m'])
plt.plot(hist_sgdmones.history['f1_m'])
plt.title('train. f1_score')  
plt.ylabel('f1_score')  
plt.xlabel('epoch')  
plt.legend(['rmsprop', 'adam', 'nadam', 'sgd', 'sgd+nesterov', 'sgd+momentum', 'sgd+nesterov+momentum'], loc='upper right')  

plt.show()

valid f1 score

In [25]:
plt.figure(figsize=(10, 6))  

plt.plot(hist_rmsprop.history['val_f1_m'])  
plt.plot(hist_adam.history['val_f1_m'])  
plt.plot(hist_nadam.history['val_f1_m']) 
plt.plot(hist_sgd.history['val_f1_m']) 
plt.plot(hist_sgdnes.history['val_f1_m']) 
plt.plot(hist_sgdmo.history['val_f1_m'])
plt.plot(hist_sgdmones.history['val_f1_m'])
plt.title('valid. f1_score')  
plt.ylabel('f1_score')  
plt.xlabel('epoch')  
plt.legend(['rmsprop', 'adam', 'nadam', 'sgd', 'sgd+nesterov', 'sgd+momentum', 'sgd+nesterov+momentum'], loc='upper right')  

plt.show()

결론

  • 'sgd', 'sgd+nesterov' 는 너무 늦게 수렴하여 comp에 적절한 optimizer는 아닌것 같습니다.
  • 'rmsprop', 'adam'이 비교적으로 빠른 시간안에 높은 acc에 도달합니다.
In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container {width:90% !important;}</style>"))
반응형

'competition' 카테고리의 다른 글

14th solution - 9%  (0) 2019.11.07
Pseudo Labelling  (0) 2019.11.06
Frequency Encoding이란?  (0) 2019.10.17
kaggle Top8% (681th of 8802) 🥉  (0) 2019.10.17
kaggle Top6% (95th of 1836)🥉  (0) 2019.10.17

+ Recent posts