第四課筆記 · CC框架深度學習機器學習 caffe

[TOC] # 第四課 1. 編寫自定義層 2. 訓練過程可視化 # 視頻 ## 下載鏈接: https://pan.baidu.com/s/1NyvieeskIhq8F3kQqgWR7w 密碼: qhky ## 在線觀看(強烈推薦) 1080 高清無碼帶字幕 https://youtu.be/fnTlICNbPag # 準備文件這是手寫訓練數據集 ![](https://box.kancloud.cn/3caac25ca0a4c16a59ab9e9fb3dc809c_196x165.png) # 自定義層編寫自定義層就不用lmdb啦，研究學習更方便。 # 解析數據集mnist 首先我們先要解析他. http://yann.lecun.com/exdb/mnist/ ![](https://box.kancloud.cn/597dd1c07138469fd78e49393aa30ad9_543x476.png) 這個網站說明了mnist的數據的結構是怎么樣的,要如何去讀取出來. ![](https://box.kancloud.cn/8b57d3b1ee83c935987ac5c8285ece64_515x498.png) # 訓練過程可視化有了自定義層，那么卷積的結果我們就能夠看到了。雖然越往后面的過程我們人類基本是看不懂了。 ![](https://box.kancloud.cn/0f98430d62efa522600b1b585512724d_1145x689.png) # 源代碼 ``` #include <cv.h> #include <highgui.h> #include <cc_utils.h> #include <Windows.h> using namespace cv; using namespace std; using namespace cc; #pragma comment(lib, "libcaffe.lib") //解析訓練的數據集label vector<unsigned char> parseTrainingSetLabel(const char* file){ vector<unsigned char> out; FILE* f = fopen(file, "rb"); if (f){ int val[2]; fread(val, 1, sizeof(val), f); int num = val[1]; num = ((num & 0xFF) << 24) | ((num & 0xFF00) << 8) | ((num & 0xFF0000) >> 8) | ((num & 0xFF000000) >> 24); out.resize(num); fread(&out[0], 1, out.size(), f); fclose(f); } return out; } vector<Mat> parseTrainingSetImage(const char* file){ vector<Mat> out; FILE* f = fopen(file, "rb"); if (f){ int val[4]; fread(val, 1, sizeof(val), f); int num = val[1]; num = ((num & 0xFF) << 24) | ((num & 0xFF00) << 8) | ((num & 0xFF0000) >> 8) | ((num & 0xFF000000) >> 24); Mat temp(28, 28, CV_8U); for (int i = 0; i < num; ++i){ fread(temp.data, 1, 28 * 28, f); out.emplace_back(temp.clone()); } fclose(f); } return out; } class MnistDataLayer : public DataLayer{ public: SETUP_LAYERFUNC(MnistDataLayer); virtual ~MnistDataLayer(){ stopBatchLoader(); } //要在loadBatch里面實現數據的加載過程 virtual void loadBatch(Blob** top, int numTop){ Blob* image = top[0]; Blob* label = top[1]; float* label_ptr = label->mutable_cpu_data(); for (int i = 0; i < batch_size_; ++i){ int ind = item_inds_[item_cursor_]; Mat img = images_[ind].clone(); int lab = labels_[ind]; //賦值到caffe的blob里面去就好了 *label_ptr++ = lab; //轉換類型，然后乘以縮放系數 img.convertTo(img, CV_32F, 0.00390625); image->setDataRGB(i, img); item_cursor_++; if (item_cursor_ == images_.size()){ //歸零后繼續打亂 item_cursor_ = 0; std::random_shuffle(item_inds_.begin(), item_inds_.end()); } } } virtual void setup(const char* name, const char* type, const char* param_str, int phase, Blob** bottom, int numBottom, Blob** top, int numTop){ this->phase_ = phase; //準備數據 if (this->phase_ == PhaseTrain){ labels_ = parseTrainingSetLabel("train-labels-idx1-ubyte"); images_ = parseTrainingSetImage("train-images-idx3-ubyte"); } else{ labels_ = parseTrainingSetLabel("t10k-labels-idx1-ubyte"); images_ = parseTrainingSetImage("t10k-images-idx3-ubyte"); } item_cursor_ = 0; item_inds_.resize(images_.size()); //10 //0, 1, 2, 3, 4, 5, 6, 7, 8, 10 //3, 2, 0, 5, 1, 6, 7, 9, 10, 8 for (int i = 0; i < item_inds_.size(); ++i) item_inds_[i] = i; //隨機打亂數組 std::random_shuffle(item_inds_.begin(), item_inds_.end()); //解析參數并設置輸出的shape auto params = parseParamStr(param_str); this->batch_size_ = getParamInt(params, "batch_size"); int width = getParamInt(params, "width"); int height = getParamInt(params, "height"); CV_Assert(this->batch_size_ > 0); top[0]->Reshape(batch_size_, 1, height, width); top[1]->Reshape(batch_size_, 1, 1, 1); __super::setup(name, type, param_str, phase, bottom, numBottom, top, numTop); } virtual void reshape(Blob** bottom, int numBottom, Blob** top, int numTop){ } private: int item_cursor_; vector<int> item_inds_; //條目索引 int batch_size_; vector<Mat> images_; vector<unsigned char> labels_; int phase_; }; class VisualLayer : public AbstractCustomLayer{ public: SETUP_LAYERFUNC(VisualLayer); virtual void setup(const char* name, const char* type, const char* param_str, int phase, Blob** bottom, int numBottom, Blob** top, int numTop){ } virtual void forward(Blob** bottom, int numBottom, Blob** top, int numTop){ Blob* conv2 = bottom[0]; Mat mat(conv2->height(), conv2->width()*conv2->channel(), CV_32F); float* ptr = conv2->mutable_cpu_data(); for (int i = 0; i < conv2->channel(); ++i){ Mat(conv2->height(), conv2->width(), CV_32F, ptr).copyTo(mat(Rect(conv2->width()*i, 0, conv2->width(), conv2->height()))); ptr += conv2->width() * conv2->height(); } resize(mat, mat, mat.size() * 10); imshow("ip2", mat); waitKey(1); } virtual void reshape(Blob** bottom, int numBottom, Blob** top, int numTop){ } }; void main(){ #if 0 //解析訓練的label文件 auto train_label = parseTrainingSetLabel("train-labels-idx1-ubyte"); auto train_images = parseTrainingSetImage("train-images-idx3-ubyte"); auto test_label = parseTrainingSetLabel("t10k-labels-idx1-ubyte"); auto test_images = parseTrainingSetImage("t10k-images-idx3-ubyte"); #endif SetCurrentDirectoryA("F:/research/1.CCDL/第四課-手寫自定義層-mnist-lenet/data"); installRegister(); INSTALL_LAYER(MnistDataLayer); INSTALL_LAYER(VisualLayer); WPtr<Solver> solver = loadSolverFromPrototxt("lenet_solver.prototxt"); solver->Solve(); } ```