1. 前置知识
可以阅读下阮一峰老师的es6入门
2. 从一个demo讲起
How to convert a Node.js stream of event callback functions into an Async Iterator
2.1 csv-parse
的使用
npm csv-parse,The csv-parse package is a parser converting CSV text input into arrays or objects. It is part of the CSV project.It implements the Node.js stream.Transform API. It also provides a simple callback-based API for convenience.
简单讲就是csv-parse
这个库可以解析后缀名为csv的文件,它既实现了Nodejs中流式API读取,也提供了基于回调函数的形式。
其实csv-parse
这个库有Async iterator API,不过不是这里的重点,我们暂时忽略它。
csv-parse
的使用示例:
javascript
import { parse } from 'csv-parse';
const records = [];
const parser = parse({
delimiter: ':'
});
parser.on('readable', function(){
let record;
while ((record = parser.read()) !== null) {
records.push(record);
}
});
parser.on('error', function(err){
console.error(err.message);
});
// Write data to the stream
parser.write("root:x:0:0:root:/root:/bin/bash\n");
parser.write("someone:x:1022:1022::/home/someone:/bin/bash\n");
// Close the readable stream
parser.end();
如何将上面的代码转为一个异步循环,使用for await of
进行遍历迭代,变成如下代码:
javascript
const records = [];
for await (const record of asyncIterable) {
records.push(record);
}
2.2 这样做有什么好处?
为什么要将parser.on('readable', () => {})
变成for await (const record of asyncIterable) {}
?
-
可以同时遍历多个流,例如同时逐行的比较多个csv文件的差异;
-
如果要用回调的方式来同时逐行的比较多个csv文件差异,很难;
注意: 如果只是读取一个csv文件,无需这样。
3. 具体实现
这里说明下,
csv-parse
即可以在Nodejs中使用,也可以在浏览器环境下使用。这里以Nodejs环境下为示例
- 本地准备两个csv文件
- static/1.csv
某班级上学期学生成绩
姓名,期中成绩,期末成绩
张三,100,90
李四,99,99
王五,70,89
赵六,55,58
苏七,100,100
- static/2.csv
某班级下学期学生成绩
姓名,期中成绩,期末成绩
张三,99,90
李四,89,97
王五,90,93
赵六,65,73
苏七,100,100
- demo.mjs
javascript
import path from "node:path";
import fs from "node:fs";
import { fileURLToPath } from 'node:url';
import { parse } from 'csv-parse';
// 获取当前文件的 URL 并转换为绝对路径
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
const parser1 = fs.createReadStream(path.resolve(__dirname, "./static/1.csv"))
.pipe(parse({ delimiter: ',' }));
const parser2 = fs.createReadStream(path.resolve(__dirname, "./static/2.csv"))
.pipe(parse({ delimiter: ',' }));
// 关键是这里,将csv流式读取,生成一个异步的迭代器
async function* createCsvParseStream(parser) {
let done = false; // 记录迭代是否结束
const queue = [];
parser.on('readable', () => {
let record;
while (!done && (record = parser.read()) !== null) {
queue.push(record);
}
});
parser.on('error', (err) => {
console.error(err);
done = true;
throw err; // 抛出错误,使得异步迭代器能够捕获
});
parser.on('end', () => {
done = true;
});
try {
while (!done || queue.length > 0) {
if (queue.length === 0) {
// 如果队列为空且还没有结束,则等待数据
await new Promise((resolve) => setImmediate(resolve));
} else {
// 产生队列中的记录
yield queue.shift(); // 每次迭代时,将数据取出来返回出去
}
}
} finally {
// 确保在完成时关闭解析器
parser.end();
}
}
- 筛选出下学期与上学期,期中成绩和期末成绩都进步的同学
javascript
try {
const asyncIter1 = createCsvParseStream(parser1);
const asyncIter2 = createCsvParseStream(parser2);
let [res1, res2] = await Promise.all([asyncIter1.next(), asyncIter2.next()]);
const diffs = [];
while (!res1.done && !res2.done) {
console.log("do stuff:", res1, res2);
const [arr1, arr2] = [res1.value, res2.value];
// 筛选出第2学期与第1学期,期中成绩和期末成绩都进步的同学
if(arr1[0] === arr2[0] && Number(arr2[1]) >= Number(arr1[1]) && Number(arr2[2]) >= Number(arr1[2])) {
diffs.push([arr1, arr2]);
}
[res1, res2] = await Promise.all([asyncIter1.next(), asyncIter2.next()]);
}
console.log(diffs);
} catch (error) {
console.error(error);
}
- 执行打印日志
执行node index.mjs
后输出,可以看到已经筛选出3位同学符合条件
log
do stuff: { value: [ '姓名', '期中成绩', '期末成绩' ], done: false } { value: [ '姓名', '期中成绩', '期末成绩' ], done: false }
do stuff: { value: [ '张三', '100', '90' ], done: false } { value: [ '张三', '99', '90' ], done: false }
do stuff: { value: [ '李四', '99', '99' ], done: false } { value: [ '李四', '89', '97' ], done: false }
do stuff: { value: [ '王五', '70', '89' ], done: false } { value: [ '王五', '90', '93' ], done: false }
do stuff: { value: [ '赵六', '55', '58' ], done: false } { value: [ '赵六', '65', '73' ], done: false }
do stuff: { value: [ '苏七', '100', '100' ], done: false } { value: [ '苏七', '100', '100' ], done: false }
[
[ [ '王五', '70', '89' ], [ '王五', '90', '93' ] ],
[ [ '赵六', '55', '58' ], [ '赵六', '65', '73' ] ],
[ [ '苏七', '100', '100' ], [ '苏七', '100', '100' ] ]
]
4. 运用到浏览器常用的FileReader
API中来
- index.html
html
<input type="file" id="fileInput" multiple />
<script type="module">
import {parse} from "./node_modules/csv-parse/dist/esm/index.js";
// 照葫芦画瓢,再来一遍
async function* createCsvParseStream(input) {
let done = false;
const queue = [];
const fileReader = new FileReader();
fileReader.onload = function() {
console.log("this.result:", this.result);
parse(this.result, { delimiter: "," }, (err, data) => {
if(err) {
console.error(err);
done = true;
throw new Error(err);
return;
}
queue.push(data);
done = true;
});
}
fileReader.onerror = function(e) {
console.error(e);
done = true;
throw new Error(e)
}
fileReader.onloadend = function() {
console.log("onloadend");
}
fileReader.readAsText(input);
while (!done || queue.length > 0) {
if (queue.length === 0) {
// 如果队列为空且还没有结束,则等待数据
await new Promise((resolve) => setTimeout(resolve));
} else {
// 产生队列中的记录
yield* queue.shift();
}
}
}
window.addEventListener("DOMContentLoaded", () => {
const fileInput = document.querySelector("#fileInput");
fileInput.addEventListener("change", async function(e) {
const files = e?.target?.files;
console.log("files:", files);
// 这里暂且比较两个csv文件
if(files.length === 2) {
try {
const arr = [...files].map(file => createCsvParseStream(file));
const diffs = [];
let r = await Promise.all(arr.map(gen => gen.next()));
while(r.every(i => !i.done)) {
console.log("do stuff:",r);
const [arr1, arr2] = [r[0].value, r[1].value];
// 筛选出第2学期与第1学期,期中成绩和期末成绩保持进步的同学
if(arr1[0] === arr2[0] && Number(arr2[1]) >= Number(arr1[1]) && Number(arr2[2]) >= Number(arr1[2])) {
diffs.push([arr1, arr2]);
}
r = await Promise.all(arr.map(gen => gen.next()));
}
console.log("diffs:", diffs);
} catch (error) {
console.error(error);
}
}
});
})
</script>
或者使用for await of
,这样写:
javascript
fileInput.addEventListener("change", async function(e) {
const files = e?.target?.files;
console.log("files:", files);
if(files.length === 2) {
try {
const gens = [...files].map(file => createCsvParseStream(file));
const diffs = [];
const run = async function () {
const results = [];
for await (const r of gens.map(gen => gen.next())) {
results.push(r);
if(results.length === 2 && results.every(i => !i.done)) {
console.log("do stuff:", results);
const [arr1, arr2] = [results[0].value, results[1].value];
if(arr1[0] === arr2[0] && Number(arr2[1]) >= Number(arr1[1]) && Number(arr2[2]) >= Number(arr1[2])) {
diffs.push([arr1, arr2]);
}
await run();
}
}
}
await run();
console.log("diffs:", diffs);
} catch (error) {
console.error(error);
}
}
});
- 选择两个csv文件后,筛选出下学期比上学期成绩进步的同学
log
do stuff: [{"value":["姓名","期中成绩","期末成绩"],"done":false},{"value":["姓名","期中成绩","期末成绩"],"done":false}]
index.html:75 do stuff: [{"value":["张三","100","90"],"done":false},{"value":["张三","99","90"],"done":false}]
index.html:75 do stuff: [{"value":["李四","99","99"],"done":false},{"value":["李四","89","97"],"done":false}]
index.html:75 do stuff: [{"value":["王五","70","89"],"done":false},{"value":["王五","90","93"],"done":false}]
index.html:75 do stuff: [{"value":["赵六","55","58"],"done":false},{"value":["赵六","65","73"],"done":false}]
index.html:75 do stuff: [{"value":["苏七","100","100"],"done":false},{"value":["苏七","100","100"],"done":false}]
index.html:85 diffs: [[["王五","70","89"],["王五","90","93"]],[["赵六","55","58"],["赵六","65","73"]],[["苏七","100","100"],["苏七","100","100"]]]
5. 参考资料
6. 最后
如果文章对您有帮助,可以关注我的个人公众号半个柠檬2020
,偶尔也会在公众号上面更新一些自己的学习笔记。