Fine-Tuning InceptionV3

ImageNetで学習済みのInceptionV3をCaltech101にFine-Tuningする。

サイトに従ってFine-Tuning

参考 http://ronny.rest/blog/post_2017_10_13_tf_transfer_learning/

やってみる。

Inception V3の学習済みモデルはいつも通り https://github.com/tensorflow/models/tree/master/research/slim から http://download.tensorflow.org/models/inception_v3_2016_08_28.tar.gzls を選択

入出力

入力: Caltech101
出力:101
バッチサイズ:32

結果

できた

Initializing from Pretrained Weights
INFO:tensorflow:Restoring parameters from ./models/inception_v3.ckpt
----------------------------------------------
EPOCH 1/20 

100%|██████████| 216/216 [01:45<00:00,  2.41it/s]
100%|██████████| 54/54 [00:07<00:00,  8.15it/s]

    step:   53  loss: 0.3671 val accuracy: 0.8681
----------------------------------------------
EPOCH 2/20 

100%|██████████| 216/216 [01:29<00:00,  2.41it/s]
100%|██████████| 54/54 [00:06<00:00,  8.13it/s]

    step:   53  loss: 0.2679 val accuracy: 0.9236


----------------------------------------------
EPOCH 19/20 

100%|██████████| 216/216 [01:31<00:00,  2.34it/s]
100%|██████████| 54/54 [00:06<00:00,  7.97it/s]

    step:   53  loss: 0.2040 val accuracy: 0.9659
----------------------------------------------
EPOCH 20/20 

100%|██████████| 216/216 [01:31<00:00,  2.37it/s]
100%|██████████| 54/54 [00:06<00:00,  8.05it/s]

    step:   53  loss: 0.2002 val accuracy: 0.9653

ソースコード

サイトに従ったコードをナンバー用に変更したもの

#!/usr/bin/env python
# coding: utf-8

# In[1]:


import os,time
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" 
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
import tensorflow as tf
import numpy as np
import tensorflow.contrib.slim as slim
import tensorflow.contrib.slim.nets
#from __future__ import print_function, division
import loadimg
from tqdm import tqdm
import matplotlib.pyplot as plt

start = time.time()


# In[2]:


np_aryname = './models/data{0}.npy'
SAVE = False

if SAVE:
    X_train, Y_train, X_test, Y_test, number_of_classes = loadimg.loadimg(
        '/home/tokunn/caltech101')

    np.save(np_aryname.format('X_train'), X_train)
    np.save(np_aryname.format('Y_train'), Y_train)
    np.save(np_aryname.format('X_test'), X_test)
    np.save(np_aryname.format('Y_test'), Y_test)
    np.save(np_aryname.format('number_of_classes'), number_of_classes)
    
else: # LOAD
    X_train = np.load(np_aryname.format('X_train'))
    Y_train = np.load(np_aryname.format('Y_train'))
    X_test = np.load(np_aryname.format('X_test'))
    Y_test = np.load(np_aryname.format('Y_test'))
    number_of_classes = np.load(np_aryname.format('number_of_classes'))

print("X_train", X_train.shape)
print("Y_train", Y_train.shape)
print("X_test", X_test.shape)
print("Y_test", Y_test.shape)
print("Number of Classes", number_of_classes)


# In[3]:


SNAPSHOT_FILE = "./models/snapshot.ckpt"
PRETRAINED_SNAPSHOT_FILE = "./models/inception_v3.ckpt"

# somewhere to store the tensorboard files - to visualise the graph
TENSORBOARD_DIR = "logs"

# IMAGE SETTINGS
IMG_WIDTH, IMG_HEIGHT = [299,299] # Dimensions required by inception V3
N_CHANNELS = 3                    # Number of channels required by inception V3
N_CLASSES = number_of_classes                    # Change N_CLASSES to suit your needs


# In[4]:


graph = tf.Graph()
with graph.as_default():
    # INPUTS
    with tf.name_scope("inputs") as scope:
        input_dims = (None, IMG_HEIGHT, IMG_WIDTH, N_CHANNELS)
        tf_X = tf.placeholder(tf.float32, shape=input_dims, name="X")
        tf_Y = tf.placeholder(tf.int32, shape=[None], name="Y")
        tf_alpha = tf.placeholder_with_default(0.001, shape=None, name="alpha")
        tf_is_training = tf.placeholder_with_default(False, shape=None, name="is_training")

    # PREPROCESSING STEPS
    with tf.name_scope("preprocess") as scope:
        scaled_inputs = tf.div(tf_X, 255., name="rescaled_inputs")

    # BODY
    arg_scope = tf.contrib.slim.nets.inception.inception_v3_arg_scope()
    with tf.contrib.framework.arg_scope(arg_scope):
        tf_logits, end_points = tf.contrib.slim.nets.inception.inception_v3(
            scaled_inputs,
            num_classes=N_CLASSES,
            is_training=tf_is_training,
            dropout_keep_prob=0.8)

    # PREDICTIONS
    tf_preds = tf.to_int32(tf.argmax(tf_logits, axis=-1), name="preds")

    # LOSS - Sums all losses (even Regularization losses)
    with tf.variable_scope('loss') as scope:
        unrolled_labels = tf.reshape(tf_Y, (-1,))
        tf.losses.sparse_softmax_cross_entropy(labels=unrolled_labels,
                                               logits=tf_logits)
        tf_loss = tf.losses.get_total_loss()

    # OPTIMIZATION - Also updates batchnorm operations automatically
    with tf.variable_scope('opt') as scope:
        tf_optimizer = tf.train.AdamOptimizer(tf_alpha, name="optimizer")
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) # for batchnorm
        with tf.control_dependencies(update_ops):
            tf_train_op = tf_optimizer.minimize(tf_loss, name="train_op")
            
    # Evalution
    with tf.variable_scope('eval') as scope:
        y = tf.nn.softmax(tf_logits, name='softmax')
        accuracy = tf.reduce_mean(
            tf.cast(tf.equal(tf.argmax(y, 1), tf.cast(tf_Y, tf.int64)), tf.float32)
        )

    # PRETRAINED SAVER SETTINGS
    # Lists of scopes of weights to include/exclude from pretrained snapshot
    pretrained_include = ["InceptionV3"]
    pretrained_exclude = ["InceptionV3/AuxLogits", "InceptionV3/Logits"]

    # PRETRAINED SAVER - For loading pretrained weights on the first run
    pretrained_vars = tf.contrib.framework.get_variables_to_restore(
        include=pretrained_include,
        exclude=pretrained_exclude)
    tf_pretrained_saver = tf.train.Saver(pretrained_vars, name="pretrained_saver")

    # MAIN SAVER - For saving/restoring your complete model
    tf_saver = tf.train.Saver(name="saver")

    # TENSORBOARD - To visialize the architecture
    with tf.variable_scope('tensorboard') as scope:
        tf_summary_writer = tf.summary.FileWriter(TENSORBOARD_DIR, graph=graph)
        tf_dummy_summary = tf.summary.scalar(name="dummy", tensor=1)


# In[5]:


def initialize_vars(session):
    # INITIALIZE VARS
    if False: #tf.train.checkpoint_exists(SNAPSHOT_FILE):
        print(" Loading from Main Checkpoint")
        tf_saver.restore(session, SNAPSHOT_FILE)
    else:
        print("Initializing from Pretrained Weights")
        session.run(tf.global_variables_initializer())
        tf_pretrained_saver.restore(session, PRETRAINED_SNAPSHOT_FILE)


# In[ ]:


with tf.Session(graph=graph) as sess:
    n_epochs = 20
    print_every = 32
    batch_size = 32 # small batch size so inception v3 can be run on laptops
    steps_per_epoch = len(X_train)//batch_size
    steps_per_epoch_val = len(X_test)//batch_size

    initialize_vars(session=sess)

    for epoch in range(n_epochs):
        print("----------------------------------------------", flush=True)
        print("EPOCH {}/{}".format(epoch+1, n_epochs), flush=True, end=' ')
        #print("----------------------------------------------", flush=True)
        for step in tqdm(range(steps_per_epoch)):
            # EXTRACT A BATCH OF TRAINING DATA
            X_batch = X_train[batch_size*step: batch_size*(step+1)]
            Y_batch = Y_train[batch_size*step: batch_size*(step+1)]

            # RUN ONE TRAINING STEP - feeding batch of data
            feed_dict = {tf_X: X_batch,
                         tf_Y: Y_batch,
                         tf_alpha:0.0001,
                         tf_is_training: True}
            loss, _ = sess.run([tf_loss, tf_train_op], feed_dict=feed_dict)
            
        val_accuracy = []
        for step in tqdm(range(steps_per_epoch_val)):
            # EXTRACT A BATCH OF TEST DATA
            X_batch = X_test[batch_size*step: batch_size*(step+1)]
            Y_batch = Y_test[batch_size*step: batch_size*(step+1)]
            
            # Evalution
            feed_dict = {tf_X: X_batch,
                         tf_Y: Y_batch,
                         tf_alpha:0.0001,
                         tf_is_training: False}            
            val_accuracy.append(accuracy.eval(feed_dict=feed_dict))
            
        # PRINT FEED BACK - once every `print_every` steps
        total_val_accuracy = np.average(np.asarray(val_accuracy))
        print("\tstep: {: 4d}  loss: {:0.4f} val accuracy: {:0.4f}".format(
                    step, loss, total_val_accuracy))
        plt.plot(sess.run(tf_logits, feed_dict = {
            tf_X: [X_test[0]],
            tf_Y: [Y_test[0]],
            tf_is_training: False
        })[0])
        # SAVE SNAPSHOT - after each epoch
        tf_saver.save(sess, SNAPSHOT_FILE)


# In[ ]:


end = time.time()
print("Time : {0}".format(end-start))


# In[ ]:


plt.show()