diff --git a/Modules/Biophotonics/python/iMC/scripts/ipcai_to_caffe/__init__.py b/Modules/Biophotonics/python/iMC/scripts/ipcai_to_caffe/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/Modules/Biophotonics/python/iMC/scripts/ipcai_to_caffe/ipcai_solver.prototxt b/Modules/Biophotonics/python/iMC/scripts/ipcai_to_caffe/ipcai_solver.prototxt new file mode 100644 index 0000000000..d76ba8c7f1 --- /dev/null +++ b/Modules/Biophotonics/python/iMC/scripts/ipcai_to_caffe/ipcai_solver.prototxt @@ -0,0 +1,24 @@ +# The train/test net protocol buffer definition +train_net: "ipcai_train.prototxt" +test_net: "ipcai_test.prototxt" +# test_iter specifies how many forward passes the test should carry out. +# In the case of MNIST, we have test batch size 100 and 100 test iterations, +# covering the full 10,000 testing images. +test_iter: 100 +# Carry out testing every 500 training iterations. +test_interval: 500 +# The base learning rate, momentum and the weight decay of the network. +base_lr: 0.01 +momentum: 0.0 +weight_decay: 0.0005 +# The learning rate policy +lr_policy: "inv" +gamma: 0.0001 +power: 0.75 +# Display every 100 iterations +display: 1000 +# The maximum number of iterations +max_iter: 10000 +# snapshot intermediate results +snapshot: 5000 +snapshot_prefix: "snapshot" diff --git a/Modules/Biophotonics/python/iMC/scripts/ipcai_to_caffe/ipcai_test.prototxt b/Modules/Biophotonics/python/iMC/scripts/ipcai_to_caffe/ipcai_test.prototxt new file mode 100644 index 0000000000..8bdd2313aa --- /dev/null +++ b/Modules/Biophotonics/python/iMC/scripts/ipcai_to_caffe/ipcai_test.prototxt @@ -0,0 +1,77 @@ +layer { + name: "data" + type: "HDF5Data" + top: "data" + top: "label" + hdf5_data_param { + source: "ipcai_test_hdf5.h5_list.txt" + batch_size: 50 + } +} +layer { + name: "fc1" + type: "InnerProduct" + bottom: "data" + top: "fc1" + inner_product_param { + num_output: 100 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0.1 + } + } +} +layer { + name: "relu1" + type: "ReLU" + bottom: "fc1" + top: "fc1" +} +layer { + name: "fc2" + type: "InnerProduct" + bottom: "fc1" + top: "fc2" + inner_product_param { + num_output: 100 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0.1 + } + } +} +layer { + name: "relu2" + type: "ReLU" + bottom: "fc2" + top: "fc2" +} +layer { + name: "score" + type: "InnerProduct" + bottom: "fc2" + top: "score" + inner_product_param { + num_output: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0.1 + } + } +} +layer { + name: "loss" + type: "EuclideanLoss" + bottom: "score" + bottom: "label" + top: "loss" +} diff --git a/Modules/Biophotonics/python/iMC/scripts/ipcai_to_caffe/ipcai_train.prototxt b/Modules/Biophotonics/python/iMC/scripts/ipcai_to_caffe/ipcai_train.prototxt new file mode 100644 index 0000000000..7367414c4a --- /dev/null +++ b/Modules/Biophotonics/python/iMC/scripts/ipcai_to_caffe/ipcai_train.prototxt @@ -0,0 +1,77 @@ +layer { + name: "data" + type: "HDF5Data" + top: "data" + top: "label" + hdf5_data_param { + source: "ipcai_train_hdf5.h5_list.txt" + batch_size: 100 + } +} +layer { + name: "fc1" + type: "InnerProduct" + bottom: "data" + top: "fc1" + inner_product_param { + num_output: 100 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0.1 + } + } +} +layer { + name: "relu1" + type: "ReLU" + bottom: "fc1" + top: "fc1" +} +layer { + name: "fc2" + type: "InnerProduct" + bottom: "fc1" + top: "fc2" + inner_product_param { + num_output: 100 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0.1 + } + } +} +layer { + name: "relu2" + type: "ReLU" + bottom: "fc2" + top: "fc2" +} +layer { + name: "score" + type: "InnerProduct" + bottom: "fc2" + top: "score" + inner_product_param { + num_output: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0.1 + } + } +} +layer { + name: "loss" + type: "EuclideanLoss" + bottom: "score" + bottom: "label" + top: "loss" +} diff --git a/Modules/Biophotonics/python/iMC/scripts/ipcai_to_caffe/script_create_hdf5_database.py b/Modules/Biophotonics/python/iMC/scripts/ipcai_to_caffe/script_create_hdf5_database.py new file mode 100644 index 0000000000..cf8115835a --- /dev/null +++ b/Modules/Biophotonics/python/iMC/scripts/ipcai_to_caffe/script_create_hdf5_database.py @@ -0,0 +1,31 @@ +import h5py, os +import caffe +import numpy as np +import pandas as pd + +from regression.preprocessing import preprocess + + +def create_hdf5(path_to_simulation_results, hdf5_name): + + df = pd.read_csv(path_to_simulation_results, header=[0, 1]) + + X, y = preprocess(df, snr=10.) + y = y.values + + with h5py.File(hdf5_name,'w') as H: + H.create_dataset('data', data=X ) # note the name X given to the dataset! + H.create_dataset('label', data=y ) # note the name y given to the dataset! + with open(hdf5_name + '_list.txt','w') as L: + L.write(hdf5_name) # list all h5 files you are going to use + + +data_root = "/media/wirkert/data/Data/2016_02_02_IPCAI/results/intermediate" + +TRAIN_IMAGES = os.path.join(data_root, + "ipcai_revision_colon_mean_scattering_train_all_spectrocam.txt") +TEST_IMAGES = os.path.join(data_root, + "ipcai_revision_colon_mean_scattering_test_all_spectrocam.txt") + +create_hdf5(TRAIN_IMAGES, "ipcai_train_hdf5.h5") +create_hdf5(TEST_IMAGES, "ipcai_test_hdf5.h5") diff --git a/Modules/Biophotonics/python/iMC/scripts/ipcai_to_caffe/script_create_lmdb_database.py b/Modules/Biophotonics/python/iMC/scripts/ipcai_to_caffe/script_create_lmdb_database.py new file mode 100644 index 0000000000..ad0160c8ce --- /dev/null +++ b/Modules/Biophotonics/python/iMC/scripts/ipcai_to_caffe/script_create_lmdb_database.py @@ -0,0 +1,52 @@ + +import os + +import pandas as pd +import lmdb +import caffe + +from regression.preprocessing import preprocess + + +def create_lmdb(path_to_simulation_results, lmdb_name): + + df = pd.read_csv(path_to_simulation_results, header=[0, 1]) + + X, y = preprocess(df, snr=10.) + y = y.values * 1000 + + # We need to prepare the database for the size. We'll set it 10 times + # greater than what we theoretically need. There is little drawback to + # setting this too big. If you still run into problem after raising + # this, you might want to try saving fewer entries in a single + # transaction. + map_size = X.nbytes * 10 + + env = lmdb.open(lmdb_name, map_size=map_size) + + with env.begin(write=True) as txn: + # txn is a Transaction object + for i in range(X.shape[0]): + datum = caffe.proto.caffe_pb2.Datum() + datum.channels = X.shape[1] + datum.height = 1 + datum.width = 1 + datum.data = X[i].tobytes() # or .tostring() if numpy < 1.9 + datum.label = int(y[i]) + str_id = '{:08}'.format(i) + + # The encode is only essential in Python 3 + txn.put(str_id.encode('ascii'), datum.SerializeToString()) + + +data_root = "/media/wirkert/data/Data/2016_02_02_IPCAI/results/intermediate" + +TRAIN_IMAGES = os.path.join(data_root, + "ipcai_revision_colon_mean_scattering_train_all_spectrocam.txt") +TEST_IMAGES = os.path.join(data_root, + "ipcai_revision_colon_mean_scattering_test_all_spectrocam.txt") + +create_lmdb(TRAIN_IMAGES, "ipcai_train_lmdb") +create_lmdb(TEST_IMAGES, "ipcai_test_lmdb") + + diff --git a/Modules/Biophotonics/python/iMC/scripts/ipcai_to_caffe/script_train_caffe.py b/Modules/Biophotonics/python/iMC/scripts/ipcai_to_caffe/script_train_caffe.py new file mode 100644 index 0000000000..56f3216002 --- /dev/null +++ b/Modules/Biophotonics/python/iMC/scripts/ipcai_to_caffe/script_train_caffe.py @@ -0,0 +1,83 @@ +from pylab import * + +import caffe +from caffe import layers as L, params as P + + +def ipcai(database, batch_size): + # our version of LeNet: a series of linear and simple nonlinear transformations + n = caffe.NetSpec() + + n.data, n.label = L.HDF5Data(batch_size=batch_size, source=database, ntop=2) + + n.fc1 = L.InnerProduct(n.data, num_output=100, weight_filler=dict(type='xavier'), + bias_filler=dict(type='constant', value=0.1)) + n.relu1 = L.ReLU(n.fc1, in_place=True) + n.fc2 = L.InnerProduct(n.relu1, num_output=100, weight_filler=dict(type='xavier'), + bias_filler=dict(type='constant', value=0.1)) + n.relu2 = L.ReLU(n.fc2, in_place=True) + n.score = L.InnerProduct(n.relu2, num_output=1, weight_filler=dict(type='xavier'), + bias_filler=dict(type='constant', value=0.1)) + n.loss = L.EuclideanLoss(n.score, n.label) + + return n.to_proto() + +with open('ipcai_train.prototxt', 'w') as f: + f.write(str(ipcai('ipcai_train_hdf5.h5_list.txt', 100))) + +with open('ipcai_test.prototxt', 'w') as f: + f.write(str(ipcai('ipcai_test_hdf5.h5_list.txt', 50))) + +caffe.set_device(0) +caffe.set_mode_gpu() + +### load the solver and create train and test nets +solver = None # ignore this workaround for lmdb data (can't instantiate two solvers on the same data) +solver = caffe.SGDSolver('ipcai_solver.prototxt') + +# each output is (batch size, feature dim, spatial dim) +print [(k, v.data.shape) for k, v in solver.net.blobs.items()] + +# just print the weight sizes (we'll omit the biases) +print [(k, v[0].data.shape) for k, v in solver.net.params.items()] + +solver.net.forward() # train net +print solver.test_nets[0].forward() # test net (there can be more than one) + +niter = 100000 +test_interval = 1000 +# losses will also be stored in the log +train_loss = zeros(niter) +test_acc = zeros(int(np.ceil(niter / test_interval))) +output = zeros((niter, 8, 10)) + +# the main solver loop +for it in range(niter): + solver.step(1) # SGD by Caffe + + # store the train loss + train_loss[it] = solver.net.blobs['loss'].data + + # store the output on the first test batch + # (start the forward pass at fc1 to avoid loading new data) + solver.test_nets[0].forward(start='fc1') + output[it] = solver.test_nets[0].blobs['score'].data[:8] + + # run a full test every so often + # (Caffe can also do this for us and write to a log, but we show here + # how to do it directly in Python, where more complicated things are easier.) + if it % test_interval == 0: + print 'Iteration', it, 'testing...' + mean = 0. + for i in range(100): + solver.test_nets[0].forward() + mean += np.sum(np.abs(np.squeeze(solver.test_nets[0].blobs['score'].data) + - solver.test_nets[0].blobs['label'].data)) + mean = mean / 5000 + test_acc[it // test_interval] = mean * 100. # % + +print "final testing accuracy: ", test_acc[-1] + + +print solver.test_nets[0].blobs['score'].data +print solver.test_nets[0].blobs['label'].data \ No newline at end of file