https://blog.csdn.net/chennbnbnb/article/details/97660084
```
/*
@作者:CHH
@版本V1.0
@郵箱:chh_is_dog@163.com
@編寫時間:2019-7-29
@功能:使用反向傳播神經網絡識別手寫字體
@使用方法:
1.生成的文件會自動到同一目錄下尋找MNIST的四個手寫字體庫
2.字體庫下載地址:http://yann.lecun.com/exdb/mnist/
3.DEBUG模式下只會使用少量訓練樣本
4.輸出流為stdout
@模型結構:
1.激活函數:sigmod
2.損失函數:交叉熵損失函數
3.迭代方式:反向傳播+最速梯度下降
4.學習率=0.5
5.三層神經元,連接方式: 全連接
a.輸入層:28*28個
b.隱藏層:20個
c.輸出層:10個
@程序結構:分為4個模塊,面向過程化
1.實現矩陣類以及矩陣的基本操作
2.讀入MNIST的數據并進行清洗
3.實現FP->BP->GD
4.模型評估
*/
#include <iostream>
#include <cstdio>
#include <vector>
#include <algorithm>
#include <cmath>
#include <fstream>
#include <cstdlib>
#include <ctime>
#include <cstring>
using namespace std;
#define DEBUG
const int IMAGE_SIZE = 28 * 28;
const int LABEL_SIZE = 1;
const int OUT_SIZE = 10;
const double INF = 1.7e308;
const double EPS = 1e-6;
const double E = 2.718281828459;
#ifdef DEBUG
const int NUM_TRAIN = 100;
const int NUM_TEST = 10;
#else
const int NUM_TRAIN = 60000;
const int NUM_TEST = 10000;
#endif
//矩陣
typedef vector<vector<double>> Matrix;
//矩陣構造
void construct(Matrix &mat, int row, int col)
{
mat.resize(row);
for (int i = 0; i < row; i++)
mat[i].resize(col);
}
void construct(Matrix &mat, int row, int col, double val)
{
mat.resize(row);
for (int i = 0; i < row; i++)
mat[i].resize(col, val);
}
//矩陣加法
inline const Matrix addition(const Matrix &a, const Matrix &b)
{
if (a.size() != b.size() || a[0].size() != b[0].size())
throw "ERROR: Matrix addition format wrong";
Matrix res;
res.resize(a.size());
for (int i = 0; i < a.size(); i++)
res[i].resize(a[0].size());
for (int i = 0; i < a.size(); i++)
for (int j = 0; j < a[i].size(); j++)
res[i][j] = a[i][j] + b[i][j];
return res;
}
//矩陣減法
inline const Matrix subtract(const Matrix &a, const Matrix &b)
{
if (a.size() != b.size() || a[0].size() != b[0].size())
throw "ERROR: Matrix subtract format wrong";
Matrix res;
res.resize(a.size());
for (int i = 0; i < a.size(); i++)
res[i].resize(a[0].size());
for (int i = 0; i < a.size(); i++)
for (int j = 0; j < a[i].size(); j++)
res[i][j] = a[i][j] - b[i][j];
return res;
}
//矩陣乘法
inline const Matrix multiply(const Matrix &a, const Matrix &b)
{
if (a[0].size() != b.size())
throw "ERROR: Matrix multiply format wrong";
Matrix res;
res.resize(a.size());
for (int i = 0; i < a.size(); i++)
res[i].resize(b[0].size(), 0);
for (int i = 0; i < res.size(); i++)
for (int j = 0; j < res[0].size(); j++)
for (int k = 0; k < a[0].size(); k++)
res[i][j] += a[i][k] * b[k][j];
return res;
}
//矩陣點乘
inline const Matrix matmul(const Matrix &a, const Matrix &b)
{
if (a.size() != b.size() || a[0].size() != b[0].size())
throw "ERROR: Matrix matmul format wrong";
Matrix res;
res.resize(a.size());
for (int i = 0; i < a.size(); i++)
res[i].resize(a[0].size());
for (int i = 0; i < res.size(); i++)
for (int j = 0; j < res[0].size(); j++)
res[i][j] = a[i][j] * b[i][j];
return res;
}
//矩陣對標量的乘法
inline const Matrix multiply(const Matrix &a, double val)
{
Matrix res;
res.resize(a.size());
for (int i = 0; i < a.size(); i++)
res[i].resize(a[0].size());
for (int i = 0; i < res.size(); i++)
for (int j = 0; j < res[0].size(); j++)
res[i][j] = a[i][j] * val;
return res;
}
//矩陣對標量的除法
inline const Matrix division(const Matrix &a, double val)
{
Matrix res;
res.resize(a.size());
for (int i = 0; i < a.size(); i++)
res[i].resize(a[0].size());
for (int i = 0; i < res.size(); i++)
for (int j = 0; j < res[0].size(); j++)
res[i][j] = a[i][j] / val;
return res;
}
//矩陣轉置
inline const Matrix transpose(const Matrix &mat)
{
Matrix res;
res.resize(mat[0].size());
for (int i = 0; i < res.size(); i++)
res[i].resize(mat.size());
for (int i = 0; i < res.size(); i++)
for (int j = 0; j < res[0].size(); j++)
res[i][j] = mat[j][i];
return res;
}
//打印矩陣
void printMatrix(const Matrix &mat)
{
cout << mat.size() << " * " << mat[0].size() << endl;
for (int i = 0; i < mat.size(); i++)
{
for (int j = 0; j < mat[i].size(); j++)
cout << mat[i][j] << " ";
cout << endl;
}
}
//打印圖片
inline void printImage(const Matrix &data)
{
for (int i = 0; i < 28; i++)
{
for (int j = 0; j < 28; j++)
{
printf("%.2lf ", data[28 * i + j][0]);
}
cout << '\n';
}
}
//數據點
struct Point
{
//double image[IMAGE_SIZE];
//bool label[OUT_SIZE];
Matrix image;
Matrix label;
Point(char *image, uint8_t num)
{
this->image.resize(IMAGE_SIZE);
for (int i = 0; i < IMAGE_SIZE; i++)
{
this->image[i].resize(1);
this->image[i][0] = (uint8_t)image[i];
}
label.resize(OUT_SIZE);
for (int i = 0; i < OUT_SIZE; i++)
{
label[i].resize(1);
label[i][0] = 0;
}
label[num][0] = 1;
}
};
vector<Point> TrainData, TestData;
//讀入數據
void readALLData(vector<Point> &train, vector<Point> &test)
{
char rubbish[16];
ifstream train_images("./train-images.idx3-ubyte", ios::binary | ios::in);
ifstream train_labels("./train-labels.idx1-ubyte", ios::binary | ios::in);
train_images.read(rubbish, 16); //4*32bit_integer
train_labels.read(rubbish, 8); //2*32bit_integer
for (int i = 0; i < NUM_TRAIN; i++)
{
char image[IMAGE_SIZE];
uint8_t num;
train_images.read(image, IMAGE_SIZE);
train_labels.read((char *)(&num), LABEL_SIZE);
train.push_back({image, num});
// printImage(train[i].image);
}
ifstream test_images("./t10k-images.idx3-ubyte", ios::binary | ios::in);
ifstream test_labels("./t10k-labels.idx1-ubyte", ios::binary | ios::in);
test_images.read(rubbish, 16); //4*32bit_integer
test_labels.read(rubbish, 8); //2*32bit_integer
for (int i = 0; i < NUM_TEST; i++)
{
char image[IMAGE_SIZE];
uint8_t num;
test_images.read(image, IMAGE_SIZE);
test_labels.read((char *)(&num), LABEL_SIZE);
test.push_back({image, num});
}
// printImage(test[0].image);
// for(int i=0; i<10; i++)
// if(test[0].label[i])
// cout<<i;
}
//歸一化
void Normalize(vector<Point> &set)
{
vector<Point>::iterator it;
it = set.begin();
while (it != set.end())
{
for (int i = 0; i < IMAGE_SIZE; i++)
{
it->image[i][0] /= 180.0; //映射到[0, 1.4]之間
}
it++;
}
}
//神經網絡
//網絡結構
//三層:輸入:28*28->隱藏:20->輸出:10
const int LAYER_NUM = 3;
const int NEURE_NUM[LAYER_NUM] = {IMAGE_SIZE, 20, OUT_SIZE};
//激活函數
inline double sigmod(double x)
{
return 1.0 / (1 + pow(E, -x)); //BUG,分母還有個1+
}
inline const Matrix sigmod(const Matrix &mat)
{
Matrix res;
construct(res, mat.size(), mat[0].size());
for (int i = 0; i < res.size(); i++)
for (int j = 0; j < res[0].size(); j++)
res[i][j] = sigmod(mat[i][j]);
return res;
}
//相關變量
vector<Matrix> Weight; //權重
vector<Matrix> Bias; //偏移量
vector<Matrix> Error; //誤差
vector<Matrix> der_Weight; //權重的偏導數
vector<Matrix> der_Bias; //偏移量的偏導數
vector<Matrix> receive; //接受的值
vector<Matrix> activation; //激發后的值
int iteration; //迭代次數
double step; //學習率
//神經網絡參數初始化
void initialize(void)
{
//超參數
iteration = 200;
step = 0.5;
//權重初始化
Weight.resize(LAYER_NUM);
for (int i = 1; i < LAYER_NUM; i++)
{
//構造形狀
construct(Weight[i], NEURE_NUM[i], NEURE_NUM[i - 1]);
//隨機化參數
srand(time(0));
for (int j = 0; j < Weight[i].size(); j++)
for (int k = 0; k < Weight[i][0].size(); k++)
{
Weight[i][j][k] = ((double)(rand() % 1000) / 700 - 0.5) * sqrt(1.0 / NEURE_NUM[i - 1]);
//cout << Weight[i][j][k] << endl;
}
}
//偏移量初始化
Bias.resize(LAYER_NUM);
for (int i = 1; i < LAYER_NUM; i++)
{
//構造形狀
//0初始化
construct(Bias[i], NEURE_NUM[i], 1, 0);
}
//誤差初始化
Error.resize(LAYER_NUM);
for (int i = 1; i < LAYER_NUM; i++)
{
//構造形狀
//0初始化
construct(Error[i], NEURE_NUM[i], 1, 0);
}
//權重的偏導數初始化
der_Weight.resize(LAYER_NUM);
for (int i = 1; i < LAYER_NUM; i++)
{
//構造形狀
//0初始化
construct(der_Weight[i], NEURE_NUM[i], NEURE_NUM[i - 1], 0);
}
//偏移量的偏導數初始化
der_Bias.resize(LAYER_NUM);
for (int i = 1; i < LAYER_NUM; i++)
{
//構造形狀
//0初始化
construct(der_Bias[i], NEURE_NUM[i], 1, 0);
}
//接受的值初始化
receive.resize(LAYER_NUM);
for (int i = 1; i < LAYER_NUM; i++)
{
//構造形狀
//0初始化
construct(receive[i], NEURE_NUM[i], 1, 0);
}
//激發后的值初始化
activation.resize(LAYER_NUM);
for (int i = 0; i < LAYER_NUM; i++)
{
//構造形狀
//0初始化
construct(activation[i], NEURE_NUM[i], 1, 0);
}
}
//清零
inline void zeroClear(vector<Matrix> &mat)
{
for (int i = 0; i < mat.size(); i++)
for (int j = 0; j < mat[i].size(); j++)
for (int k = 0; k < mat[i][0].size(); k++)
mat[i][j][k] = 0;
}
//單個樣本點前向傳播
inline void forwardPropagation(const Point &point)
{
activation[0] = point.image;
for (int i = 1; i < LAYER_NUM; i++)
{
receive[i] = multiply(Weight[i], activation[i - 1]);
activation[i] = sigmod(receive[i]);
}
}
//單個樣本點反向傳播
inline void backPropagation(const Point &point)
{
Error[LAYER_NUM - 1] = subtract(activation[LAYER_NUM - 1], point.label);
Matrix ONE;
for (int i = LAYER_NUM - 2; i >= 1; i--) //注意是反向傳播
{
construct(ONE, activation[i].size(), activation[i][0].size(), 1);
Error[i] = matmul(multiply(transpose(Weight[i + 1]), Error[i + 1]), matmul(activation[i], subtract(ONE, activation[i])));
}
}
//偏導數累加
inline void accumulateDerivate(void)
{
for (int i = 1; i < LAYER_NUM; i++)
{
der_Weight[i] = addition(der_Weight[i], multiply(Error[i], transpose(activation[i - 1])));
der_Bias[i] = addition(der_Bias[i], Error[i]);
}
}
//計算偏導數
inline void calculateDerivate(void)
{
for (int i = 1; i < LAYER_NUM; i++)
{
der_Weight[i] = division(der_Weight[i], NUM_TRAIN);
der_Bias[i] = division(der_Bias[i], NUM_TRAIN);
}
}
//梯度下降
inline void gradientDescent(void)
{
for (int i = 1; i < LAYER_NUM; i++)
{
Weight[i] = subtract(Weight[i], multiply(der_Weight[i], step));
Bias[i] = subtract(Bias[i], multiply(der_Bias[i], step));
}
}
//輸出結果是否正確
inline bool match(const Matrix &res, const Matrix &label)
{
int max_pos = 0;
for (int i = 1; i < OUT_SIZE; i++)
if (res[i][0] > res[max_pos][0])
max_pos = i;
return label[max_pos][0] == 1;
}
//檢查有無錯誤樣本
inline bool mistake(void)
{
for (int i = 0; i < NUM_TRAIN; i++)
{
const Point &point = TrainData[i];
forwardPropagation(point);
if (!match(activation[LAYER_NUM - 1], point.label))
return true;
}
return false;
}
//評估學習率
inline double evaluateStudy(void)
{
int cnt = 0;
for (int i = 0; i < NUM_TRAIN; i++)
{
const Point &point = TrainData[i];
forwardPropagation(point);
if (match(activation[LAYER_NUM - 1], point.label))
cnt++;
}
return (double)cnt / NUM_TRAIN;
}
//評估范化效率
inline double evaluateModel(void)
{
int cnt = 0;
for (int i = 0; i < NUM_TEST; i++)
{
const Point &point = TestData[i];
forwardPropagation(point);
if (match(activation[LAYER_NUM - 1], point.label))
cnt++;
}
return (double)cnt / NUM_TEST;
}
//輸出參數
inline void showParameter(void)
{
cout << "權重: " << endl;
for (int i = 1; i < LAYER_NUM; i++)
printMatrix(Weight[i]);
cout << "偏移量: " << endl;
for (int i = 1; i < LAYER_NUM; i++)
printMatrix(Bias[i]);
}
int main(void)
{
clock_t start_time = clock();
{
readALLData(TrainData, TestData);
Normalize(TrainData);
Normalize(TestData);
initialize();
cout << "--------初始化參數--------" << endl;
cout << "學習率: " << step << endl;
showParameter();
cout << "--------初始化參數--------" << endl
<< endl;
int i = 0;
while (i < iteration && mistake())
{
try
{
zeroClear(der_Weight);
zeroClear(der_Bias);
for (int j = 0; j < NUM_TRAIN; j++)
{
forwardPropagation(TrainData[j]);
backPropagation(TrainData[j]);
accumulateDerivate();
}
calculateDerivate();
gradientDescent();
}
catch (char const *message)
{
cout << message << endl;
}
i++;
}
//輸出結果
cout << "--------模型評估--------" << endl;
cout << "迭代次數: " << i << endl;
cout << "訓練樣本量: " << NUM_TRAIN << endl;
cout << "學習率: " << evaluateStudy() << endl;
cout << "測試樣本量: " << NUM_TEST << endl;
cout << "范化效率: " << evaluateModel() << endl;
cout << "參數:" << endl;
showParameter();
cout << "--------模型評估--------" << endl
<< endl;
}
clock_t end_time = clock();
cout << "--------時間--------" << endl;
cout << "耗時: " << (double)(end_time - start_time) / CLOCKS_PER_SEC << 's' << endl;
cout << "--------時間--------" << endl
<< endl;
return 0;
}
```