我的第三个cudnn程序(cifar10改cifar100)

cudnn还是不熟

改起来没那么顺手,不急,慢慢来,时间久了,就熟练了!

现在的cudnn,训练起来,总是差10分!说明自己写的cpu版本的程序还是很自信的!

cifar100不好训练,听说很难,试了试,确实!我在我那个71分的vgg上训练cifar100,得分train/test=30/25,改成cudnn最好15分的样子!

得分不重要,这里相互验证,说明cifar100程序cudnn改成功了!所以记录一下,得分低,说明以后还有余地!

先看两次训练结果:

线性层多了一层,有效果:

最好15.86分!可以训练,说明程序ok!

cifar10改cifar100,改动如下:(这个cifar10程序,上了62分,我们前面也记录了!)

1,改动,标签和图像的加载

2,改10类为100类,这个也有难度,就像当初把单一图像改成3通道rgb图像输入lenet网络,费了不少功夫和周折!

好,先看cifar100加载:还是整理了两个类,一个训练,一个测试:

struct cifar10Samlpe

{

cifar10Samlpe(const std::vector<float>& image_, const unsigned char &label_) :image(image_), label(label_) {}

std::vector<float> image;//32*32

unsigned char label;

};

class cifar100Dataset//checked 2.0

{

public:

cifar100Dataset(const std::string &image_file, const std::string &label_file) {

load_images(image_file);

//load_labels(label_file);

if (images.size() != labels.size()) {

throw std::runtime_error("Number of images and labels mismatch");

}

indices.resize(images.size());

for (size_t i = 0; i<indices.size(); i++)

indices[i] = i;

}

std::vector<cifar10Samlpe> next_batch(size_t batch_size) {

std::vector<cifar10Samlpe> batch;

batch.reserve(batch_size);

for (size_t i = 0; i<batch_size; i++) {

if (current_idx >= indices.size()) current_idx = 0;

size_t idx = indices[current_idx++];

batch.emplace_back(cifar10Samlpe{ images[idx], labels[idx] });

}

return batch;

}

size_t size() const { return images.size(); }

private:

std::vector<std::vector<float>> images;

std::vector<unsigned char> labels;

std::vector<size_t> indices;

size_t current_idx = 0;

void load_images(const std::string &path) {

FILE* mnist_file = NULL;

int num = 50000; int rows = 32; int cols = 32;

images.resize(num, std::vector<float>(rows*cols * 3));

labels.resize(num);

unsigned char label; unsigned char label2;

unsigned char image_buffer[32 * 32]; //保存图片信息

int err = fopen_s(&mnist_file, "c:\\traincifar100.bin", "rb");

if (mnist_file == NULL)

{

// cout << "load data from your file err..." << endl;

return;

}

else

{

//cout << "loading data...[in func -->> load_mnist_data]" << endl;

}

for (int i = 0; i < num; i++)//5万张

{

fread((char*)&label, sizeof(label), 1, mnist_file);//第一字节20个超类,不用
fread((char*)&label2, sizeof(label2), 1, mnist_file);//使用第二字节100个细类,
labels[i] = label2;

fread(image_buffer, 1, 32 * 32, mnist_file);

for (int j = 0; j <32; j++)

{

for (int k = 0; k < 32; k++)

{

int shuffle_index = (j + 0) * 32 + k + 0;

images[i][shuffle_index] = image_buffer[shuffle_index] / 255.0f;//b

}

}

//g

fread(image_buffer, 1, 32 * 32, mnist_file);

for (int j = 0; j <32; j++)

{

for (int k = 0; k < 32; k++)

{

int shuffle_index = (j + 0) * 32 + k + 0;

images[i][shuffle_index + 32 * 32] = image_buffer[shuffle_index] / 255.0f;//

}

}

//r

fread(image_buffer, 1, 32 * 32, mnist_file);

for (int j = 0; j <32; j++)

{

for (int k = 0; k < 32; k++)

{

int shuffle_index = (j + 0) * 32 + k + 0;

images[i][shuffle_index + 32 * 32 * 2] = image_buffer[shuffle_index] / 255.0f;//

}

}

}

fclose(mnist_file);

mnist_file = NULL;

}

};

class cifar100DatasetTEST{

public:

cifar100DatasetTEST(const std::string &image_file, const std::string &label_file) {

load_images(image_file);

//load_labels(label_file);

if (images.size() != labels.size()) {

throw std::runtime_error("Number of images and labels mismatch");

}

indices.resize(images.size());

for (size_t i = 0; i<indices.size(); i++)

indices[i] = i;

}

std::vector<cifar10Samlpe> next_batch(size_t batch_size) {

std::vector<cifar10Samlpe> batch;

batch.reserve(batch_size);

for (size_t i = 0; i<batch_size; i++) {

if (current_idx >= indices.size()) current_idx = 0;

size_t idx = indices[current_idx++];

batch.emplace_back(cifar10Samlpe{ images[idx], labels[idx] });

}

return batch;

}

size_t size() const { return images.size(); }

private:

std::vector<std::vector<float>> images;

std::vector<unsigned char> labels;

std::vector<size_t> indices;

size_t current_idx = 0;

void load_images(const std::string &path) {

FILE* mnist_file = NULL;

int num = 10000; int rows = 32; int cols = 32;

images.resize(num, std::vector<float>(rows*cols * 3));

labels.resize(num);

unsigned char label; unsigned char label2;

unsigned char image_buffer[32 * 32]; //保存图片信息

int err = fopen_s(&mnist_file, "c:\\testcifar100.bin", "rb");

if (mnist_file == NULL)

{

// cout << "load data from your file err..." << endl;

return;

}

else

{

//cout << "loading data...[in func -->> load_mnist_data]" << endl;

}

for (int i = 0; i < 10000; i++)

{

fread((char*)&label, sizeof(label), 1, mnist_file);
fread((char*)&label2, sizeof(label2), 1, mnist_file);
labels[i] = label2;

fread(image_buffer, 1, 32 * 32, mnist_file);

for (int j = 0; j <32; j++)

{

for (int k = 0; k < 32; k++)

{

int shuffle_index = (j + 0) * 32 + k + 0;

images[i][shuffle_index] = image_buffer[shuffle_index] / 255.0f;//b

}

}

//g

fread(image_buffer, 1, 32 * 32, mnist_file);

for (int j = 0; j <32; j++)

{

for (int k = 0; k < 32; k++)

{

int shuffle_index = (j + 0) * 32 + k + 0;

images[i][shuffle_index + 32 * 32] = image_buffer[shuffle_index] / 255.0f;//b

}

}

//r

fread(image_buffer, 1, 32 * 32, mnist_file);

for (int j = 0; j <32; j++)

{

for (int k = 0; k < 32; k++)

{

int shuffle_index = (j + 0) * 32 + k + 0;

images[i][shuffle_index + 32 * 32 * 2] = image_buffer[shuffle_index] / 255.0f;//b

}

}

}

fclose(mnist_file);

mnist_file = NULL;

}

};

要注意的,已经标红!其他和cifar10加载一样!

下来就是要在cifar10程序上10类改100

先看架构中改100的地方:a:

LeNet(cublasHandle_t &cublas_, cudnnHandle_t &cudnn_, int batch_) :cublas(cublas_), cudnn(cudnn_), batch(batch_) {

layers.emplace_back(std::make_shared<Conv2D>(cudnn, batch, 3, 12, 32, 32, 5));//输入->>>c1,5*5,1*28*28-》6*24*24

layers.emplace_back(std::make_shared<ReLU>(cudnn, batch, 12, 28, 28)); //输入->>>c1,5*5,1*28*28-》6*24*24

//layers.emplace_back(std::make_shared<Conv2D>(cudnn, batch, 12, 12, 30, 30, 3));//输入->>>c1,5*5,1*28*28-》6*24*24

//layers.emplace_back(std::make_shared<ReLU>(cudnn, batch, 12, 28, 28)); //输入->>>c1,5*5,1*28*28-》6*24*24

layers.emplace_back(std::make_shared<MaxPool2D>(cudnn, batch, 12, 28, 28, 2, 2, 0, 2)); //s2,6*24*24->>6*12*12

layers.emplace_back(std::make_shared<Conv2D>(cudnn, batch, 12, 16, 14, 14, 5));//c3,6*12*12->>16*8*8

layers.emplace_back(std::make_shared<ReLU>(cudnn, batch, 16, 10, 10)); //c3,6*12*12->>16*8*8

layers.emplace_back(std::make_shared<MaxPool2D>(cudnn, batch, 16, 10, 10, 2, 2, 0, 2)); //s4,16*8*8->>16*4*4

layers.emplace_back(std::make_shared<Linear>(cublas, batch, 16 * 5 * 5, 120));//c5,16*4*4->>>120

layers.emplace_back(std::make_shared<ReLU>(cudnn, batch, 120, 1, 1)); //c5,16*4*4->>>120

layers.emplace_back(std::make_shared<Linear>(cublas, batch, 120, 100));//120->84

layers.emplace_back(std::make_shared<ReLU>(cudnn, batch, 100, 1, 1)); //120->84

layers.emplace_back(std::make_shared<Linear>(cublas, batch,100, 100));//84->10
cudaMalloc(&output, batch * 100 * sizeof(float));

cudaMalloc(&grad_input, batch * 3 * 32*32 * sizeof(float));

}

先看架构中改100的地方:b :

void forward(float *input_)override {

input = input_;

for (const auto &l : layers) {

l->forward(input);

input = l->get_output();

}

cudaMemcpy(output, input, sizeof(float)*batch * 100, cudaMemcpyDeviceToDevice);

}

再看训练函数以及测试函数中:

另外softmax计算中也要注意:

不少地方啊!很容易遗漏!我是对着cpu版本改动的,还好,通过编译,运行正常,虽然得分不高!但这已经是巨大进步了!

我们三个cudnn程序,循序渐进,只针对性的改动,很容易成功!

相关推荐
DX_水位流量监测1 分钟前
压力式水位计的技术特性与应用实践
大数据·网络·人工智能·安全·信息可视化
SCBAiotAigc12 分钟前
langchain1.2学习笔记(一):安装langchain
人工智能·python·langchain
中國龍在廣州15 分钟前
生成不遗忘,「超长时序」世界模型,北大EgoLCD长短时记忆加持
人工智能·深度学习·算法·自然语言处理·chatgpt
豌豆学姐31 分钟前
Sora2 能做什么?25 秒视频生成 API 的一次接入实践
大数据·人工智能·小程序·aigc·php·开源软件
Cherry的跨界思维35 分钟前
25、AI时代的数字生存战:爬虫与反爬虫的数据争夺全面解析
人工智能·爬虫·机器学习·python爬虫·python办公自动化·python反爬虫
测试人社区-千羽40 分钟前
智能化测试:AI驱动的测试覆盖率提升方法论与实战路径
运维·人工智能·测试工具·程序人生·自动化·测试覆盖率
龙腾AI白云1 小时前
图神经网络初探(1)
人工智能
躺柒1 小时前
读共生:4.0时代的人机关系02人机合作后
大数据·人工智能·机器人·人机交互·人机协作·人机对话
itwangyang5201 小时前
AIDD药物筛选与设计详细方法
人工智能·python
yiersansiwu123d1 小时前
生成式AI落地潮:从技术狂热到商业价值重构
人工智能·重构