반응형
3rd ML Month - Compare optimizer of efficientNet ¶
Reference¶
- https://shaoanlu.wordpress.com/2017/05/29/sgd-all-which-one-is-the-best-optimizer-dogs-vs-cats-toy-experiment/
- 커널 시간제한으로 EarlyStopping 설정, 동일 epoch 비교 X
Package¶
In [1]:
import gc
import os
import warnings
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import f1_score
from keras import backend as K
# for문 시간계산 lib
from tqdm import tqdm_notebook
# 교차검증 lib
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import train_test_split
#모델 lib
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential, Model
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from keras.layers import Dense, Dropout, Flatten, Activation, Conv2D, GlobalAveragePooling2D
from keras import layers
from keras.optimizers import Adam,RMSprop,SGD,Nadam
#경고메세지 무시
import warnings
warnings.filterwarnings(action='ignore')
#input 하위 디렉토리 폴터
import os
print(os.listdir("../input"))
In [2]:
#efficientnet download
!pip install git+https://github.com/qubvel/efficientnet
from efficientnet import EfficientNetB3
File Directory Setting¶
In [3]:
#crop data directory
DATA_PATH = '../input/car-crop'
os.listdir(DATA_PATH)
Out[3]:
In [4]:
#original data directory
DATA_PATH2 = '../input/2019-3rd-ml-month-with-kakr'
os.listdir(DATA_PATH2)
Out[4]:
In [5]:
# 이미지 폴더 경로
TRAIN_IMG_PATH = os.path.join(DATA_PATH, 'train')
TEST_IMG_PATH = os.path.join(DATA_PATH, 'test')
# CSV 파일 경로
df_train = pd.read_csv(os.path.join(DATA_PATH2, 'train.csv'))
df_test = pd.read_csv(os.path.join(DATA_PATH2, 'test.csv'))
df_class = pd.read_csv(os.path.join(DATA_PATH2, 'class.csv'))
train/test data Split¶
In [6]:
df_train["class"] = df_train["class"].astype('str')
df_train = df_train[['img_file', 'class']]
df_test = df_test[['img_file']]
its = np.arange(df_train.shape[0])
train_idx, val_idx = train_test_split(its, train_size = 0.8, random_state=42)
X_train = df_train.iloc[train_idx, :]
X_val = df_train.iloc[val_idx, :]
print(X_train.shape)
print(X_val.shape)
print(df_test.shape)
Parameter¶
In [7]:
def recall_m(y_true, y_pred):
true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
recall = true_positives / (possible_positives + K.epsilon())
return recall
def precision_m(y_true, y_pred):
true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
precision = true_positives / (predicted_positives + K.epsilon())
return precision
def f1_m(y_true, y_pred):
precision = precision_m(y_true, y_pred)
recall = recall_m(y_true, y_pred)
return 2*((precision*recall)/(precision+recall+K.epsilon()))
In [8]:
# Parameter
img_size = (299, 299)
image_size = 299
nb_train_samples = len(X_train)
nb_validation_samples = len(X_val)
nb_test_samples = len(df_test)
epochs = 20
batch_size = 32
# Define Generator config
train_datagen = ImageDataGenerator(
rescale=1./255,
rotation_range=20,
width_shift_range=0.2,
height_shift_range=0.2,
horizontal_flip=True,
vertical_flip=False,
zoom_range=0.2,
fill_mode='nearest')
val_datagen = ImageDataGenerator(rescale=1./255)
In [9]:
#generator
train_generator = train_datagen.flow_from_dataframe(
dataframe=X_train,
directory='../input/car-crop/train_crop',
x_col = 'img_file',
y_col = 'class',
target_size = img_size,
color_mode='rgb',
class_mode='categorical',
batch_size=batch_size,
seed=42
)
validation_generator = val_datagen.flow_from_dataframe(
dataframe=X_val,
directory='../input/car-crop/train_crop',
x_col = 'img_file',
y_col = 'class',
target_size = img_size,
color_mode='rgb',
class_mode='categorical',
batch_size=batch_size,
shuffle=False,
seed=42
)
Model¶
In [10]:
def get_steps(num_samples, batch_size):
if (num_samples % batch_size) > 0 :
return (num_samples // batch_size) + 1
else :
return num_samples // batch_size
In [11]:
%%time
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
#model path
MODEL_SAVE_FOLDER_PATH = './model/'
if not os.path.exists(MODEL_SAVE_FOLDER_PATH):
os.mkdir(MODEL_SAVE_FOLDER_PATH)
model_path = MODEL_SAVE_FOLDER_PATH + '{epoch:02d}-{val_loss:.4f}.hdf5'
patient = 2
callbacks_list = [
EarlyStopping(
# 모델의 검증 정확도 모니터링
monitor='val_loss',
# patient(정수)보다 정확도가 향상되지 않으면 훈련 종료
patience=patient,
# 검증에 대해 판단하기 위한 기준, val_loss경우 감소되는 것이므로 min
mode='min',
#얼마나 자세하게 정보를 나타낼것인가.
verbose=1
),
ReduceLROnPlateau(
monitor = 'val_loss',
#콜백 호출시 학습률(lr)을 절반으로 줄임
factor = 0.5,
#위와 동일
patience = patient / 2,
#최소학습률
min_lr=0.00001,
verbose=1,
mode='min'
) ]
gc.collect()
Out[11]:
In [12]:
#model
def get_model():
EfficientNet_model = base_model = EfficientNetB3(weights='imagenet', include_top=False,
input_shape=(299, 299, 3))
model = Sequential()
model.add(EfficientNet_model)
model.add(layers.GlobalAveragePooling2D())
model.add(layers.Dense(2048, activation='relu'))
model.add(layers.Dropout(0.25))
model.add(layers.Dense(196, activation='softmax'))
#model.summary()
return model
Optimizer 1: RMSprop¶
In [13]:
#compile
model_rmsprop = get_model()
model_rmsprop.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['acc',f1_m])
hist_rmsprop = model_rmsprop.fit_generator(
train_generator,
steps_per_epoch = get_steps(nb_train_samples, batch_size),
epochs=epochs,
validation_data = validation_generator,
validation_steps = get_steps(nb_validation_samples, batch_size),
callbacks = callbacks_list
)
Optimizer 2: Adam¶
In [14]:
#compile
model_adam = get_model()
model_adam.compile(loss='categorical_crossentropy', optimizer=Adam(), metrics=['acc',f1_m])
hist_adam = model_adam.fit_generator(
train_generator,
steps_per_epoch = get_steps(nb_train_samples, batch_size),
epochs=epochs,
validation_data = validation_generator,
validation_steps = get_steps(nb_validation_samples, batch_size),
callbacks = callbacks_list
)
Optimizer 3: Nadam¶
In [15]:
#compile
model_nadam = get_model()
model_nadam.compile(loss='categorical_crossentropy', optimizer=Nadam(), metrics=['acc',f1_m])
hist_nadam = model_nadam.fit_generator(
train_generator,
steps_per_epoch = get_steps(nb_train_samples, batch_size),
epochs=epochs,
validation_data = validation_generator,
validation_steps = get_steps(nb_validation_samples, batch_size),
callbacks = callbacks_list
)
Optimizer 4: SGD¶
In [16]:
#compile
model_sgd = get_model()
model_sgd.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['acc',f1_m])
hist_sgd = model_sgd.fit_generator(
train_generator,
steps_per_epoch = get_steps(nb_train_samples, batch_size),
epochs=epochs,
validation_data = validation_generator,
validation_steps = get_steps(nb_validation_samples, batch_size),
callbacks = callbacks_list
)
Optimizer 5: SGD + Nesterov¶
In [17]:
#compile
model_sgdnes = get_model()
model_sgdnes.compile(loss='categorical_crossentropy', optimizer=SGD(nesterov=True), metrics=['acc',f1_m])
hist_sgdnes = model_sgdnes.fit_generator(
train_generator,
steps_per_epoch = get_steps(nb_train_samples, batch_size),
epochs=epochs,
validation_data = validation_generator,
validation_steps = get_steps(nb_validation_samples, batch_size),
callbacks = callbacks_list
)
Optimizer 6: SGD with momentum=0.9¶
In [18]:
#compile
model_sgdmo = get_model()
model_sgdmo.compile(loss='categorical_crossentropy', optimizer=SGD(momentum=0.9), metrics=['acc',f1_m])
hist_sgdmo = model_sgdmo.fit_generator(
train_generator,
steps_per_epoch = get_steps(nb_train_samples, batch_size),
epochs=epochs,
validation_data = validation_generator,
validation_steps = get_steps(nb_validation_samples, batch_size),
callbacks = callbacks_list
)
Optimizer 7: SGD + Nesterov with momentum=0.9¶
In [19]:
#compile
model_sgdmones = get_model()
model_sgdmones.compile(loss='categorical_crossentropy', optimizer=SGD(momentum=0.9, nesterov=True), metrics=['acc',f1_m])
hist_sgdmones = model_sgdmones.fit_generator(
train_generator,
steps_per_epoch = get_steps(nb_train_samples, batch_size),
epochs=epochs,
validation_data = validation_generator,
validation_steps = get_steps(nb_validation_samples, batch_size),
callbacks = callbacks_list
)
acc / loss Plot¶
train acc¶
In [20]:
plt.figure(figsize=(10, 6))
plt.plot(hist_rmsprop.history['acc'])
plt.plot(hist_adam.history['acc'])
plt.plot(hist_nadam.history['acc'])
plt.plot(hist_sgd.history['acc'])
plt.plot(hist_sgdnes.history['acc'])
plt.plot(hist_sgdmo.history['acc'])
plt.plot(hist_sgdmones.history['acc'])
plt.title('train. accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['rmsprop', 'adam', 'nadam', 'sgd', 'sgd+nesterov', 'sgd+momentum', 'sgd+nesterov+momentum'], loc='lower right')
plt.show()
train loss¶
In [21]:
plt.figure(figsize=(10, 6))
plt.plot(hist_rmsprop.history['loss'])
plt.plot(hist_adam.history['loss'])
plt.plot(hist_nadam.history['loss'])
plt.plot(hist_sgd.history['loss'])
plt.plot(hist_sgdnes.history['loss'])
plt.plot(hist_sgdmo.history['loss'])
plt.plot(hist_sgdmones.history['loss'])
plt.title('train. loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['rmsprop', 'adam', 'nadam', 'sgd', 'sgd+nesterov', 'sgd+momentum', 'sgd+nesterov+momentum'], loc='upper right')
plt.show()
valid acc¶
In [22]:
plt.figure(figsize=(10, 6))
plt.plot(hist_rmsprop.history['val_acc'])
plt.plot(hist_adam.history['val_acc'])
plt.plot(hist_nadam.history['val_acc'])
plt.plot(hist_sgd.history['val_acc'])
plt.plot(hist_sgdnes.history['val_acc'])
plt.plot(hist_sgdmo.history['val_acc'])
plt.plot(hist_sgdmones.history['val_acc'])
plt.title('valid. accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['rmsprop', 'adam', 'nadam', 'sgd', 'sgd+nesterov', 'sgd+momentum', 'sgd+nesterov+momentum'], loc='lower right')
plt.show()
valid loss¶
In [23]:
plt.figure(figsize=(10, 6))
plt.plot(hist_rmsprop.history['val_loss'])
plt.plot(hist_adam.history['val_loss'])
plt.plot(hist_nadam.history['val_loss'])
plt.plot(hist_sgd.history['val_loss'])
plt.plot(hist_sgdnes.history['val_loss'])
plt.plot(hist_sgdmo.history['val_loss'])
plt.plot(hist_sgdmones.history['val_loss'])
plt.title('valid. loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['rmsprop', 'adam', 'nadam', 'sgd', 'sgd+nesterov', 'sgd+momentum', 'sgd+nesterov+momentum'], loc='upper right')
plt.show()
train f1 score¶
In [24]:
plt.figure(figsize=(10, 6))
plt.plot(hist_rmsprop.history['f1_m'])
plt.plot(hist_adam.history['f1_m'])
plt.plot(hist_nadam.history['f1_m'])
plt.plot(hist_sgd.history['f1_m'])
plt.plot(hist_sgdnes.history['f1_m'])
plt.plot(hist_sgdmo.history['f1_m'])
plt.plot(hist_sgdmones.history['f1_m'])
plt.title('train. f1_score')
plt.ylabel('f1_score')
plt.xlabel('epoch')
plt.legend(['rmsprop', 'adam', 'nadam', 'sgd', 'sgd+nesterov', 'sgd+momentum', 'sgd+nesterov+momentum'], loc='upper right')
plt.show()
valid f1 score¶
In [25]:
plt.figure(figsize=(10, 6))
plt.plot(hist_rmsprop.history['val_f1_m'])
plt.plot(hist_adam.history['val_f1_m'])
plt.plot(hist_nadam.history['val_f1_m'])
plt.plot(hist_sgd.history['val_f1_m'])
plt.plot(hist_sgdnes.history['val_f1_m'])
plt.plot(hist_sgdmo.history['val_f1_m'])
plt.plot(hist_sgdmones.history['val_f1_m'])
plt.title('valid. f1_score')
plt.ylabel('f1_score')
plt.xlabel('epoch')
plt.legend(['rmsprop', 'adam', 'nadam', 'sgd', 'sgd+nesterov', 'sgd+momentum', 'sgd+nesterov+momentum'], loc='upper right')
plt.show()
결론¶
- 'sgd', 'sgd+nesterov' 는 너무 늦게 수렴하여 comp에 적절한 optimizer는 아닌것 같습니다.
- 'rmsprop', 'adam'이 비교적으로 빠른 시간안에 높은 acc에 도달합니다.
In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container {width:90% !important;}</style>"))
반응형
'competition' 카테고리의 다른 글
14th solution - 9% (0) | 2019.11.07 |
---|---|
Pseudo Labelling (0) | 2019.11.06 |
Frequency Encoding이란? (0) | 2019.10.17 |
kaggle Top8% (681th of 8802) 🥉 (0) | 2019.10.17 |
kaggle Top6% (95th of 1836)🥉 (0) | 2019.10.17 |