现有一个Excel A1.csv。
其表格第一列为:生物样本的名称;其他列为:生物样本的含量。表格第一行第一列是空格,第一行其他列为:受试者名称。
需求
如下:设计一个程序,可以指定受试者名称(某列),对该列所有生物样本含量进行排序,保留含量前X名的生物样本,其余生物样本含量合并,命名为others。
设计思路:
1.如何根据受试者名称(第一行),读取csv中指定某列数据?
cpp
vector<string> getRownames() {
ifstream file("A1.csv");
string line;
string rowname;
vector<string> rownames;
getline(file, line);
for (int i = 0; i < line.length();) {
if (line[i] == ',') {
rowname = line.substr(0, i);
rownames.push_back(rowname);
line = line.substr(i + 1, line.length());
i = 0;
}
else i++;
}
for (auto i : rownames) {
cout << i << " ";
}
return rownames;
}
通过这段代码,就可以得到一个储存每列名称的vector了。
cpp
int findRowname(vector<string> rownames,string aim) {
for (int i = 0; i < rownames.size(); i++) {
if (rownames[i] == aim) {
return i;
}
}
}
通过这段代码,就可以得到目标受试者所在列了。
2.如何获得目标受试者所在列的数据及其对应样本名称?
cpp
vector<pair<string, double>> getNameAndData(int row) {
vector<pair<string, double>> NameAndData;
ifstream file("A1.csv");
string line;
string rowdata;
double value;
getline(file, line);
while (getline(file, line)) {
vector<string> rowdatas;
for (int i = 0; i < line.length();) {
if (line[i] == ',') {
rowdata = line.substr(0, i);
rowdatas.push_back(rowdata);
line = line.substr(i + 1, line.length());
i = 0;
}
else i++;
}
value = stod(rowdatas[row]);
NameAndData.push_back({ rowdatas[0], value });
}
for (auto i : NameAndData) {
cout << i.first << " " << i.second << endl;
}
return NameAndData;
}
循环读取文件每一行,把每一行都存进vector,找到vector的0列(样本名)与row列(受试者所在列),将其pair,存入新vector。
这里有代码重复了,就是从line转换为vector的代码,可以将其封装。
cpp
void split(string line, vector<string>& linedatas) {
string linedata;
for (int i = 0; i < line.length();) {
if (line[i] == ',') {
linedata = line.substr(0, i);
linedatas.push_back(linedata);
line = line.substr(i + 1, line.length());
i = 0;
}
else i++;
}
return;
}
需要时直接传入line和vector就可以啦。
3. 排序,并保留前X行,剩余行合并为others
复习了lambda表达式与sort函数结合的用法
cpp
vector<pair<string, double>> getNewNameAndData(vector<pair<string, double>> NameAndData,int reserve) {
sort(NameAndData.begin(), NameAndData.end(), [](const auto& a, const auto& b) {
return a.second > b.second;
});
vector<pair<string, double>> NewNameAndData(reserve + 1);
for (int i = 0; i < reserve; i++) {
NewNameAndData[i] = NameAndData[i];
}
double reservesum = 0;
for (int i = reserve; i < NameAndData.size(); i++) {
reservesum = reservesum + NameAndData[i].second;
}
NewNameAndData[reserve] = { "Others",reservesum };
for (auto i : NewNameAndData) {
cout << i.first << " " << i.second << endl;
}
return NewNameAndData;
}
4.将new vector写入新文件
cpp
void getNewCsv(vector<pair<string, double>> NewNameAndData,int row) {
ofstream file("newA.csv");
ifstream file1("A1.csv");
string line;
getline(file1, line);
vector<string> data0;
split(line, data0);
file << data0[0] << "," << data0[row] << endl;
for (int i = 0; i < NewNameAndData.size(); i++) {
file << NewNameAndData[i].first << "," << NewNameAndData[i].second << endl;
}
return;
}
5.完整代码
cpp
#include <iostream>
#include <fstream>
#include <sstream>
#include <iostream>
#include <fstream>
#include <sstream>
#include <string>
#include <vector>
#include <algorithm>
#include <cassert>
using namespace std;
class Solution {
public:
vector<string> getRownames() {
ifstream file("A1.csv");
string line;
string rowname;
vector<string> rownames;
getline(file, line);
split(line, rownames);
/*
for (auto i : rownames) {
cout << i << " ";
}
*/
return rownames;
}
int findRowname(vector<string> rownames,string aim) {
for (int i = 0; i < rownames.size(); i++) {
if (rownames[i] == aim) {
return i;
}
}
return 0;
}
vector<pair<string, double>> getNameAndData(int row) {
vector<pair<string, double>> NameAndData;
ifstream file("A1.csv");
string line;
string rowdata;
double value;
getline(file, line);
while (getline(file, line)) {
vector<string> rowdatas;
split(line, rowdatas);
value = stod(rowdatas[row]);
NameAndData.push_back({ rowdatas[0], value });
}
/*
for (auto i : NameAndData) {
cout << i.first << " " << i.second << endl;
}
*/
return NameAndData;
}
vector<pair<string, double>> getNewNameAndData(vector<pair<string, double>> NameAndData,int reserve) {
sort(NameAndData.begin(), NameAndData.end(), [](const auto& a, const auto& b) {
return a.second > b.second;
});
vector<pair<string, double>> NewNameAndData(reserve + 1);
for (int i = 0; i < reserve; i++) {
NewNameAndData[i] = NameAndData[i];
}
double reservesum = 0;
for (int i = reserve; i < NameAndData.size(); i++) {
reservesum = reservesum + NameAndData[i].second;
}
NewNameAndData[reserve] = { "Others",reservesum };
/*
for (auto i : NewNameAndData) {
cout << i.first << " " << i.second << endl;
}
*/
return NewNameAndData;
}
void getNewCsv(vector<pair<string, double>> NewNameAndData,int row,string aim, int reverse) {
stringstream ss;
ss << reverse;
string s1 = ss.str();
string str = aim + "_"+s1+"_newA.csv";
ofstream file(str);
ifstream file1("A1.csv");
string line;
getline(file1, line);
vector<string> data0;
split(line, data0);
file << data0[0] << "," << data0[row] << endl;
for (int i = 0; i < NewNameAndData.size(); i++) {
file << NewNameAndData[i].first << "," << NewNameAndData[i].second << endl;
}
cout << "文件已经保存在"<< str <<"中。";
return;
}
private:
void split(string line, vector<string>& linedatas) {
string linedata;
for (int i = 0; i < line.length();) {
if (line[i] == ',') {
linedata = line.substr(0, i);
linedatas.push_back(linedata);
line = line.substr(i + 1, line.length());
i = 0;
}
else i++;
}
return;
}
};
int main()
{
Solution solution;
vector<string> rownames=solution.getRownames();
// 用户输入目标名
string aim;
cout << "请输入目标名: ";
cin >> aim;
int row=solution.findRowname(rownames, aim);
//cout << row;
vector<pair<string, double>> NameAndData = solution.getNameAndData(row);
// 用户输入保留行数
int reverse;
cout << "请输入保留行数: ";
cin >> reverse;
vector<pair<string, double>> NewNameAndData = solution.getNewNameAndData(NameAndData, reverse);
solution.getNewCsv(NewNameAndData,row,aim,reverse);
return 0;
}