使用现代C++构建的前向、反向传播神经网络的例子。构建一个具有任意层数和每层任意结点数的全连接神经网络(backpropagation Neural Network)。这个例子也包括了基本的梯度下降优化,要求每次train训练结束以后将weights权重矩阵保存到文本文件,以供下次添加其它训练数据后,使用上次训练的权重矩阵接着训练!
ChatGPT
为了保存和读取权重矩阵,我们可以简单地将权重矩阵和偏置向量保存到文本文件中。下面是一个完整的例子,其中包括了如何保存和读取权重。
//使用现代C++构建的前向、反向传播神经网络的例子。构建一个具有任意层数和每层任意结点数的全连接神经网络(backpropagation Neural Network)。这个例子也包括了基本的梯度下降优化,要求每次train训练结束以后将weights权重矩阵保存到文本文件,以供下次添加其它训练数据后,使用上次训练的权重矩阵接着训练!modernc++ArbitraryLayerBackprogagationNN23saveWeightMatrix230801.cpp : 此文件包含 "main" 函数。程序执行将在此处开始并结束。
#include <iostream>
#include <fstream> //“初始化” : 无法从“const std::string”转换为“int”
#include <vector>
//#include <cmath>
#include <random>
#include <sstream>
#pragma warning(disable: 4996)
using namespace std;
class Layer {
public:
std::vector<std::vector<double>> weights;
std::vector<double> biases;
std::vector<double> outputs;
std::vector<double> gradients;
Layer(int inputSize, int outputSize) {
std::random_device rd;
std::mt19937 gen(rd());
std::normal_distribution<> d(0.0, 1.0);
weights.resize(outputSize, std::vector<double>(inputSize));
biases.resize(outputSize);
outputs.resize(outputSize);
gradients.resize(inputSize);
for (auto& w : weights) {
std::generate(w.begin(), w.end(), [&]() { return d(gen); });
}
std::generate(biases.begin(), biases.end(), [&]() { return d(gen); });
}
std::vector<double> forward(const std::vector<double>& input) {
for (int i = 0; i < weights.size(); ++i) {
double sum = 0.0;
for (int j = 0; j < input.size(); ++j) {
sum += weights[i][j] * input[j];
}
sum += biases[i];
outputs[i] = 1.0 / (1.0 + std::exp(-sum)); // Sigmoid activation
}
return outputs;
}
std::vector<double> backward(const std::vector<double>& prev_gradients) {
for (int i = 0; i < gradients.size(); ++i) {
gradients[i] = 0.0;
for (int j = 0; j < outputs.size(); ++j) {
gradients[i] += weights[j][i] * prev_gradients[j];
}
}
for (int i = 0; i < weights.size(); ++i) {
for (int j = 0; j < weights[i].size(); ++j) {
weights[i][j] -= 0.1 * prev_gradients[i] * outputs[i] * (1.0 - outputs[i]); // Learning rate = 0.1
}
biases[i] -= 0.1 * prev_gradients[i] * outputs[i] * (1.0 - outputs[i]); // Learning rate = 0.1
}
return gradients;
}
};
void save_weights(const std::vector<Layer>& network, const std::string& filename) {
std::ofstream file(filename);
if (!file.is_open()) {
std::cout << "Could not open file for writing: " << filename << std::endl;
return;
}
for (const auto& layer : network) {
for (const auto& w : layer.weights) {
for (const auto& val : w) {
file << val << " ";
}
file << std::endl;
}
file << "----" << std::endl;
for (const auto& b : layer.biases) {
file << b << " ";
}
file << std::endl << "====" << std::endl;
}
file.close();
}
void load_weights(std::vector<Layer>& network, const std::string& filename) {
std::ifstream file(filename);
if (!file.is_open()) {
std::cout << "Could not open file for reading: " << filename << std::endl;
return;
}
std::string line;
int layer_index = 0;
while (layer_index < network.size()) {
Layer& layer = network[layer_index];
int row_index = 0;
while (std::getline(file, line) && line != "----") {
std::istringstream iss(line);
int col_index = 0;
for (double val; iss >> val;) {
layer.weights[row_index][col_index] = val;
col_index++;
}
row_index++;
}
std::getline(file, line);
std::istringstream iss(line);
int col_index = 0;
for (double val; iss >> val;) {
layer.biases[col_index] = val;
col_index++;
}
std::getline(file, line); // "===="
layer_index++;
}
file.close();
}
string get_timestamp() {
time_t now = time(0);
tm* localtm = localtime(&now);
stringstream ss;
ss << 1900 + localtm->tm_year << (1 + localtm->tm_mon) << localtm->tm_mday << localtm->tm_hour << localtm->tm_min << localtm->tm_sec;
return ss.str();
}
int main() {
std::vector<int> layerSizes = { 2, 4, 3, 1 };
std::vector<Layer> network;
for (int i = 1; i < layerSizes.size(); ++i) {
network.push_back(Layer(layerSizes[i - 1], layerSizes[i]));
}
// Uncomment to load weights from a file
// load_weights(network, "weights.txt");
std::vector<double> input = { 0.1, 0.2 };
std::vector<double> target = { 0.3 };
vector<double> input00[5];
vector<double> target00[5];
input00[0] = { 1.0 , 0.0 };
input00[1] = { 1.0 , 1.0 };
input00[2] = { 0.0 , 0.0 };
input00[3] = { 0.0 , 1.0 };
target00[0] = { 1.0 };
target00[1] = { 0.0 };
target00[2] = { 0 };
target00[3] = { 1.0 };
//外循环
for (int ii = 0; true; ++ii) {//for110ii
LabeInnerLoopStart:
int choice1;
cout << endl << "Menu:" << endl;
cout << "1调取权重矩阵,并训练,并保存新的权重矩阵" << endl;;
cout << "2调取权重矩阵,并训练,但不保存新的权重矩阵" << endl;;
cout << "3保存现在内存中的权重矩阵" << endl;;
cout << "4初始化权重矩阵,并训练,并且不保存新的权重矩阵" << endl;
cout << "5保存权重,并且打上时间戳……" << endl;
cin >> choice1;
string filename01 = "\/weights220101.txt";
if (1 == choice1 || 2 == choice1) {
load_weights(network, filename01);
}
if (choice1 == 4) { //初始化权重矩阵
for (Layer& layer : network) {
Layer newLayer(layer.weights[0].size(), layer.weights.size());
layer = newLayer;
}
}
if (1 == choice1 || 2 == choice1 || choice1 == 4) {//if220choice1
//内循环训练
for (int epoch = 0; epoch < 50000; ++epoch) { //for110epoch
for (int jj = 0; jj < 4; ++jj) {//for330jj
input = input00[jj];
target = target00[jj];
std::vector<double> output = input;
// Forward Pass
for (auto& layer : network) {
output = layer.forward(output);
}
// Compute loss (Mean Square Error)
double loss = 0.0;
for (int i = 0; i < target.size(); ++i) {
loss += 0.5 * std::pow(target[i] - output[i], 2);
}
if (0 == epoch % 100) {
std::cout << "Epoch: " << epoch << ", Loss: " << loss << std::endl;
}
// Compute output layer gradients
std::vector<double> gradients(target.size());
for (int i = 0; i < target.size(); ++i) {
gradients[i] = -(target[i] - output[i]) * output[i] * (1 - output[i]);
}
// Backward Pass
for (auto it = network.rbegin(); it != network.rend(); ++it) {
gradients = it->backward(gradients);
}
// Uncomment to save weights after each epoch
// save_weights(network, "weights.txt");
}//for330jj
}//for110epoch
}//if220choice1
// Save the weights to a file
save_weights(network, filename01);// "weights220101.txt");
if (5==choice1||choice1 == 4) {
string new_filename = "\/weights" + get_timestamp() + ".txt";
save_weights(network, new_filename);
}
//cout << endl;
//
input00[1] = { 1.0 , 1.0 };
vector<double> output = input00[1];
// Forward Pass
for (auto& layer : network) {
output = layer.forward(output);
}
cout << "forward01:" << output[0];
cout << endl;
input00[2] = { 0, 1.0 };
// vector<double>
output = input00[2];
output[0] = input00[2][0];
output[1] = input00[2][1];
// Forward Pass
for (auto& layer : network) {
output = layer.forward(output);
}
cout << "forward02:" << output[0];
cout << endl;
//
}//for110ii //外循环-=结束
return 0;
}