Caffe (1-3) Convert MNIST dataset to bmp and test LeNet

1. Convert MNIST dataset to bmp

MNIST is a gray scale (1 channel) dataset. The program bellow demos how to read the intensity of the images from the binary file and save it into a bmp file.

#!/usr/bin/python3
import struct
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image

# filename = "../../data/mnist/train-images-idx3-ubyte"
filename = "data/mnist/train-images-idx3-ubyte"

binfile = open(filename, 'rb')
buf = binfile.read()

index = 0
# read 4 unsigned int with big-endian format
magic, numImages, numRows, numColumns = struct.unpack_from('>IIII', buf, index)
index += struct.calcsize('>IIII') # move the cursor

for image in range(0, numImages):
    # the image is 28*28=784 unsigned chars
    im = struct.unpack_from('>784B', buf, index)
    index += struct.calcsize('>784B') # move the cursor
   
    # create a np array to save the image
    im = np.array(im, dtype='uint8')
    im = im.reshape(28, 28)
    
    # # display the image
    # plt.imshow(im, cmap='gray')
    # plt.show()
    
    im = Image.fromarray(im)
    im.save("bmp/train_%s.bmp" % image, "bmp")


2. Query the classifier

We need to define another model called "lenet.prototx" which represents the query process.

The red sentence defines the dimension of the input. The number 64 means 64 images. However, we just test the model with an image.

name: "LeNet"
layer {
  name: "data"
  type: "Input"
  top: "data"
  input_param { shape: { dim: 64 dim: 1 dim: 28 dim: 28 } }
}
layer {
  name: "conv1"
  type: "Convolution"
  bottom: "data"
  top: "conv1"
  param {
    lr_mult: 1
  }
  param {
    lr_mult: 2
  }
  convolution_param {
    num_output: 20
    kernel_size: 5
    stride: 1
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
    }
  }
}
layer {
  name: "pool1"
  type: "Pooling"
  bottom: "conv1"
  top: "pool1"
  pooling_param {
    pool: MAX
    kernel_size: 2
    stride: 2
  }
}
layer {
  name: "conv2"
  type: "Convolution"
  bottom: "pool1"
  top: "conv2"
  param {
    lr_mult: 1
  }
  param {
    lr_mult: 2
  }
  convolution_param {
    num_output: 50
    kernel_size: 5
    stride: 1
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
    }
  }
}
layer {
  name: "pool2"
  type: "Pooling"
  bottom: "conv2"
  top: "pool2"
  pooling_param {
    pool: MAX
    kernel_size: 2
    stride: 2
  }
}
layer {
  name: "ip1"
  type: "InnerProduct"
  bottom: "pool2"
  top: "ip1"
  param {
    lr_mult: 1
  }
  param {
    lr_mult: 2
  }
  inner_product_param {
    num_output: 500
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
    }
  }
}
layer {
  name: "relu1"
  type: "ReLU"
  bottom: "ip1"
  top: "ip1"
}
layer {
  name: "ip2"
  type: "InnerProduct"
  bottom: "ip1"
  top: "ip2"
  param {
    lr_mult: 1
  }
  param {
    lr_mult: 2
  }
  inner_product_param {
    num_output: 10
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
    }
  }
}
layer {
  name: "prob"
  type: "Softmax"
  bottom: "ip2"
  top: "prob"
}

Write a python script to query the result.

#!/usr/bin/python3
import caffe

IMAGE = "bmp/test_0.bmp"
input_image = caffe.io.load_image(IMAGE, color=False)

MODEL_FILE = "lenet.prototxt" # this is different from the training model 
PRETRAINED = "lenet_iter_10000.caffemodel"
net = caffe.Classifier(MODEL_FILE, PRETRAINED) 

prediction = net.predict([input_image], oversample = False)
caffe.set_mode_gpu()
print('predicted class:', prediction[0].argmax())


留言

熱門文章