Home

A11: Cats vs Dogs [Extra credit]

This extra credit assignment helps you practice using the Keras library for deep learning. Your task is to modify and improve a convolutional neural net for separating pictures of cats from dogs, using images from the Kaggle competition.

Because we have little time for this assignment, your task will be relatively easy. You are asked only to try some variations of example Keras code shown in class to produce a better classifier.

This assignment is extra credit. Your score on this assignment will replace your lowest assignment score, assuming your score on this assignment is higher.

Task

Python code, also available on delenn: /home/jeckroth/csci431-public/catsdogs/catsdogs.py.

# adapted from https://www.kaggle.com/jeffd23/dogs-vs-cats-redux-kernels-edition/catdognet-keras-convnet-starter

import os, cv2, random, re
import numpy as np

from keras.models import Sequential, load_model
from keras.layers import Input, Dropout, Flatten, Convolution2D, MaxPooling2D, Dense, Activation
from keras.optimizers import RMSprop, SGD, Nadam
from keras.utils import np_utils
from keras import initializations

# next three funcs for sorting file lists numerically
# from: http://stackoverflow.com/a/4623518
def tryint(s):
    try:
        return int(s)
    except:
        return s

def alphanum_key(s):
    """ Turn a string into a list of string and number chunks.
        "z23a" -> ["z", 23, "a"]
    """
    return [ tryint(c) for c in re.split('([0-9]+)', s) ]

def sort_nicely(l):
    """ Sort the given list in the way that humans expect.
    """
    return sorted(l, key=alphanum_key)

TRAIN_DIR = 'train/'
TEST_DIR = 'test1/'

ROWS = 64
COLS = 64
CHANNELS = 3

def read_image(file_path):
    img = cv2.imread(file_path, cv2.IMREAD_COLOR) #cv2.IMREAD_GRAYSCALE
    return cv2.resize(img, (ROWS, COLS), interpolation=cv2.INTER_CUBIC)

def prep_data(images):
    count = len(images)
    data = np.ndarray((count, CHANNELS, ROWS, COLS), dtype=np.uint8)

    for i, image_file in enumerate(images):
        image = read_image(image_file)
        data[i] = image.T
        if i%250 == 0: print('Processed {} of {}'.format(i, count))
    
    return data

test_images = sort_nicely([TEST_DIR+i for i in os.listdir(TEST_DIR)])

if not (os.path.isfile("train.npy") and os.path.isfile("test.npy") and os.path.isfile("labels.npy")):
    train_images = [TRAIN_DIR+i for i in sort_nicely(os.listdir(TRAIN_DIR))]
    train_dogs = [TRAIN_DIR+i for i in sort_nicely(os.listdir(TRAIN_DIR)) if 'dog' in i]
    train_cats = [TRAIN_DIR+i for i in sort_nicely(os.listdir(TRAIN_DIR)) if 'cat' in i]

    # choose how many images to train; set this smaller to get quicker results
    MAX_IMAGES = 25000
    train_images = train_dogs[0:MAX_IMAGES] + train_cats[0:MAX_IMAGES]
    random.shuffle(train_images)

    train = prep_data(train_images)
    test = prep_data(test_images)

    labels = []
    for i in train_images:
        if 'dog' in i:
            labels.append(1)
        else:
            labels.append(0)

    print("Saving...")
    np.save("labels.npy", labels)
    np.save("train.npy", train)
    np.save("test.npy", test)
else:
    print("Loading data...")
    labels = np.load("labels.npy")
    train = np.load("train.npy")
    test = np.load("test.npy")

print("Train shape: {}".format(train.shape))
print("Test shape: {}".format(test.shape))


#optimizer = SGD(lr=0.01, decay=0.0, momentum=0.9)
optimizer = RMSprop(lr=1e-4)
#optimizer = Nadam()
objective = 'binary_crossentropy'

# good idea to initialize relu units to a number very close to 0.0, say -0.01 to 0.01
def my_init(shape, name=None):
    return initializations.uniform(shape, scale=0.01, name=name)
# https://github.com/fchollet/keras/issues/3868
setattr(initializations, 'my_init', my_init)

# next lines required for visualizing convolutions later; https://github.com/fchollet/keras/issues/2310
import keras.backend as K
K.learning_phase()

def make_model():

    model = Sequential()

    # dim_ordering="th" means use TensorFlow dimension ordering, which oddly goes by "th"
    model.add(Convolution2D(32, 3, 3, border_mode='same', input_shape=(CHANNELS, ROWS, COLS), activation='relu', dim_ordering="th", init=my_init))
    model.add(Convolution2D(32, 3, 3, border_mode='same', activation='relu', dim_ordering="th", init=my_init))
    model.add(MaxPooling2D(pool_size=(2, 2), dim_ordering="th"))
    model.add(Convolution2D(64, 3, 3, border_mode='same', activation='relu', dim_ordering="th", init=my_init))
    model.add(Convolution2D(64, 3, 3, border_mode='same', activation='relu', dim_ordering="th", init=my_init))
    model.add(MaxPooling2D(pool_size=(2, 2), dim_ordering="th"))
    model.add(Convolution2D(128, 3, 3, border_mode='same', activation='relu', dim_ordering="th", init=my_init))
    model.add(Convolution2D(128, 3, 3, border_mode='same', activation='relu', dim_ordering="th", init=my_init))
    model.add(MaxPooling2D(pool_size=(2, 2), dim_ordering="th"))
    model.add(Convolution2D(256, 3, 3, border_mode='same', activation='relu', dim_ordering="th", init=my_init))
    model.add(Convolution2D(256, 3, 3, border_mode='same', activation='relu', dim_ordering="th", init=my_init))
    model.add(MaxPooling2D(pool_size=(2, 2), dim_ordering="th"))
    model.add(Flatten())
    model.add(Dense(256, activation='relu', init=my_init))
    model.add(Dropout(0.25))
    model.add(Dense(32, activation='relu', init=my_init))
    model.add(Dropout(0.25))
    model.add(Dense(1))
    model.add(Activation('sigmoid'))

    model.compile(loss=objective, optimizer=optimizer, metrics=['accuracy'])

    # print some info about the network
    model.summary()

    return model

nb_epoch = 20
batch_size = 32

def run_model():
    # first, train
    model.fit(train, labels, batch_size=batch_size, nb_epoch=nb_epoch, validation_split=0.2, shuffle=False)
    # now, predict
    predictions = model.predict(test)
    return predictions

def save_predictions(predictions):
    # note, 1=dog, 0=cat
    with open("predictions.csv", "w") as out:
        out.write("id,label\n")
        for i in range(0,len(test_images)):
            if predictions[i, 0] >= 0.5: 
                if i < 50:
                    print('Image {}: I am {:.2%} sure this is a Dog'.format(i+1, predictions[i][0]))
                out.write("{},1\n".format(i+1))
            else: 
                if i < 50:
                    print('Image {}: I am {:.2%} sure this is a Cat'.format(i+1, 1-predictions[i][0]))
                out.write("{},0\n".format(i+1))

if not os.path.isfile("model.h5"):
    model = make_model()
    predictions = run_model()
    save_predictions(predictions)
    print("Saving model")
    model.save("model.h5")
else:
    print("Loading model...")
    model = load_model("model.h5")

# http://stackoverflow.com/questions/39280813/visualization-of-convolutional-layer-in-keras-model
# util function to convert a tensor into a valid image
def deprocess_image(x):
    # normalize tensor: center on 0., ensure std is 0.1
    x -= x.mean()
    x /= (x.std() + 1e-5)
    x *= 0.1

    # clip to [0, 1]
    x += 0.5
    x = np.clip(x, 0, 1)

    # convert to RGB array
    x *= 255
    if K.image_dim_ordering() == 'th':
        x = x.transpose((1, 2, 0))
    x = np.clip(x, 0, 255).astype('uint8')
    return x

def show_convolutions():
    layercount = 12

    layerfuncs = []
    for layeridx in range(layercount):
        layerfuncs.append(K.function([model.layers[0].input], [model.layers[layeridx].output]))

    print(layerfuncs)

    for i in [0, 400, 12499]:
        cv2.imwrite("convolutions/conv__{}.png".format(i+1), test[i].T)
        for layeridx in range(layercount):
            layer = layerfuncs[layeridx]([test[i:i+1]])
            print(np.shape(layer))
            # go through each of the layer's convolutions
            j = 1
            for conv in layer[0][0]:
                img = deprocess_image(conv)
                cv2.imwrite("convolutions/conv_{}_{}_{}.png".format(i+1,layeridx+1,j), img)
                j += 1

# show_convolutions()

Deliverables

Your improved catsdogs.csv and resulting predictions.csv.

Grading rubric

You will receive full credit if your model performs better on the official Kaggle leaderboard.

CSCI 431 material by Joshua Eckroth is licensed under a Creative Commons Attribution-ShareAlike 3.0 Unported License. Source code for this website available at GitHub.