Caffe (1-3) Convert MNIST dataset to bmp and test LeNet
1. Convert MNIST dataset to bmp
MNIST is a gray scale (1 channel) dataset. The program bellow demos how to read the intensity of the images from the binary file and save it into a bmp file.
#!/usr/bin/python3
import struct
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
# filename = "../../data/mnist/train-images-idx3-ubyte"
filename = "data/mnist/train-images-idx3-ubyte"
binfile = open(filename, 'rb')
buf = binfile.read()
index = 0
# read 4 unsigned int with big-endian format
magic, numImages, numRows, numColumns = struct.unpack_from('>IIII', buf, index)
index += struct.calcsize('>IIII') # move the cursor
for image in range(0, numImages):
# the image is 28*28=784 unsigned chars
im = struct.unpack_from('>784B', buf, index)
index += struct.calcsize('>784B') # move the cursor
# create a np array to save the image
im = np.array(im, dtype='uint8')
im = im.reshape(28, 28)
# # display the image
# plt.imshow(im, cmap='gray')
# plt.show()
im = Image.fromarray(im)
im.save("bmp/train_%s.bmp" % image, "bmp")
2. Query the classifier
We need to define another model called "lenet.prototx" which represents the query process.
The red sentence defines the dimension of the input. The number 64 means 64 images. However, we just test the model with an image.
name: "LeNet"
layer {
name: "data"
type: "Input"
top: "data"
input_param { shape: { dim: 64 dim: 1 dim: 28 dim: 28 } }
}
layer {
name: "conv1"
type: "Convolution"
bottom: "data"
top: "conv1"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param {
num_output: 20
kernel_size: 5
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "pool1"
type: "Pooling"
bottom: "conv1"
top: "pool1"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv2"
type: "Convolution"
bottom: "pool1"
top: "conv2"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param {
num_output: 50
kernel_size: 5
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "pool2"
type: "Pooling"
bottom: "conv2"
top: "pool2"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "ip1"
type: "InnerProduct"
bottom: "pool2"
top: "ip1"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
inner_product_param {
num_output: 500
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "relu1"
type: "ReLU"
bottom: "ip1"
top: "ip1"
}
layer {
name: "ip2"
type: "InnerProduct"
bottom: "ip1"
top: "ip2"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
inner_product_param {
num_output: 10
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "prob"
type: "Softmax"
bottom: "ip2"
top: "prob"
}
Write a python script to query the result.
#!/usr/bin/python3
import caffe
IMAGE = "bmp/test_0.bmp"
input_image = caffe.io.load_image(IMAGE, color=False)
MODEL_FILE = "lenet.prototxt" # this is different from the training model
PRETRAINED = "lenet_iter_10000.caffemodel"
net = caffe.Classifier(MODEL_FILE, PRETRAINED)
prediction = net.predict([input_image], oversample = False)
caffe.set_mode_gpu()
print('predicted class:', prediction[0].argmax())