使用nodejs stream实现文件和文件夹的上传部署

在开发过程中经常遇到构建出来的静态文件需要推送到测试服务器进行部署，为了能和命令行搭配并且部署便捷并且通用部署，开发了一个这么一个小工具，这个工具可以快速读取文件夹上传整个文件夹到服务器，并且可以和构建命令集成。以下是实现过程：

实现stream.Readable读取多个文件

实现文件上传第一步是使用stream.Readable来创建一个可以读取多个文件的数据可读流。

这里创建一个类继承stream.Readable

javascript 复制代码

import { Readable } from 'node:stream'

export class ReadFilesStream extends Readable {
  constructor(basePath, path, isolationPath) {
    super()
  }
  _read(size) {
  }
}

需要子类实现的方法是_read这个仅限内部实现使用，参数是size，也就是stream.Readable options中的highWaterMark大小，以kb为单位。

这个方法的调用有两种方式触发

arduino 复制代码

1、调用this.push(chunk)推入数据。

2、外部调用read或者pipe管道下读取数据时触发。

多文件读取实现

基于以上两个方法push和_read实现一套多个文件的读取写入到可读流。

首先收集多个文件：

javascript 复制代码

import fs from 'node:fs'
import path from 'node:path'
export function findFiles(basePath, path) {
  const files = []
  collectFiles(basePath, path, files)
  return files
}

function collectFiles(base, p, files) {
  const fullPath = path.resolve(base, p)
  const stat = fs.statSync(fullPath)
  if (stat.isFile()) {
    files.push({
      path: p.split(path.sep).join('/'),
      fullPath: fullPath,
      size: stat.size
    })
    return
  }
  if (!stat.isDirectory()) {
    return
  }
  const dirs = fs.readdirSync(fullPath)
  dirs.forEach((p1) => {
    collectFiles(base, path.join(p, p1), files)
  })
}

收集到某个文件夹下所有的文件以及绝对路径、相对路径、还有文件大小，文件大小以B为单位也就是一个字节。以上需要注意的是使用path.sep为匹配符分割路径，因为在win和linux上文件路径分隔符是不一致的，所以这里做统一化处理都是"/"。

接着开始实现一个读取多文件的可读流。

javascript 复制代码

import { Readable } from 'node:stream'
import { findFiles } from '../utils/findFiles'
import { createReadStream } from 'node:fs'

export class ReadFilesStream extends Readable {
  constructor(basePath, path, isolationPath) {
    super()
    const files = findFiles(basePath, path)
    if (files.length === 0) {
      return
    }
    this._files = files
    this.getCurrentStream()
  }
  _nextData = null
  _fileMange
  _currentStream = null
  _currntIndex = 0
  _files = []
  _isAllowRead = true
  get maxFilesLength() {
    return this._files.length
  }
  onEnd() {
    if (this._currntIndex === this.maxFilesLength) {
      this.push(null)
      return
    }
    this.getCurrentStream()
  }
  getCurrentStream() {
    const file = this._files[this._currntIndex]
    this._currentStream = createReadStream(file.fullPath, {
      highWaterMark: 16
    })
    this._currentStream.on('data', (chunk) => {
      const isPush = this.push(chunk)
      if (!isPush) {
        this._currentStream.pause()
      }
    })
    this._currentStream.on('end', () => {
      this._currntIndex++
      this.onEnd()
    })
  }
  // 外部有读取会触发该方法
  _read(size) {
    this._currentStream.resume()
  }
}

需要注意的点：

1、this.push返回false之后代表缓存区已经满了，不可以再推入数据，但是已经push的还存在。

2、每次this.push之后都会调用_read，外部管道或者调用read读取的时候也会触发_read,所以可以在外部读取数据之后触发的_read中恢复文件流的继续读取。

以上一个读取多个文件的可读流已经实现。接下来需要实现的功能是接入net.Socket实现数据上传。

一个套接字是由host+port组成，这里继承net.Socket实现一个发送字节流的套接字。

javascript 复制代码

import net from 'node:net'
import chalk from 'chalk'
export class Cilent extends net.Socket {
  connectServer(host, port) {
    return new Promise((resolve, reject) => {
      this.on('error', (err) => {
        if (err) {
          reject(err)
          console.warn('链接失败')
        }
      })
      this.on('data', (chunk) => {
       console.log(chunk.toString())
      })
      this.on('close', (hadError) => {
        console.log(chalk.bgYellow(`数据写入${hadError ? '失败': '成功'}链接关闭`))
      })
      this.on('ready', () => {
        resolve(null)
      })
      this.connect({
        port: port,
        host: host,
        keepAlive: true
      })
    })
  }
}

数据流编码压缩

套接字实现完成之后需要分割文件编码压缩等功能。

这个文件上传是基于tcp协议上实现的，tcp协议本身是保证流传输的可靠性，发送的是一个个的字节流，所以我们需要自己封装一个简单的协议来区分各个文件流和基本的操作。

以下是协议内容：

p 复制代码

`\r\n[action]\n[path]\n[size]\r\n\`

action：操作动作，分为 cretate，delete，后期可扩展

path：操作路径

size: 文件大小

以上通过\r\n分割字节流获取每一段报文的操作，size标记读取长度，可以保证有效分割每个文件。代码实现如下细节看注释：

javascript 复制代码

import { Readable } from 'node:stream'
import { findFiles } from '../utils/findFiles'
import { createReadStream } from 'node:fs'
import chalk from 'chalk'

export class ReadFilesStream extends Readable {
  // basepath基础根路径
  // path要上传的文件夹名字
  // isolationPath服务端要创建的名字
  constructor(basePath, path, isolationPath) {
    super()
    const files = findFiles(basePath, path)
    if (files.length === 0) {
      return
    }
    this._files = files
    // 发送delete动作 size为0 执行删除操作 发送create创建新的文件夹
    const operaBuffer = Buffer.from(`\r\ndelete\n${isolationPath}\n0\r\n\r\ncreate\n${isolationPath}\n0\r\n`)
    this.push(operaBuffer)
    this.getCurrentStream()
  }
  _nextData = null
  _fileMange
  _currentStream = null
  _currntIndex = 0
  _files = []
  _isAllowRead = true
  get maxFilesLength() {
    return this._files.length
  }
  onEnd() {
    if (this._currntIndex === this.maxFilesLength) {
      // 结束
      const totalSize = this._files.reduce((prev, cur) => {
        return prev + cur.size
      }, 0)
      console.log(chalk.bgYellow(`数据读取结束总发送${totalSize / 1024}kb`))
      // push null是结束标记
      this.push(null)
      return
    }
    this.getCurrentStream()
  }
  getCurrentStream() {
    const file = this._files[this._currntIndex]
    console.log(chalk.yellow('文件读取中:', file.fullPath))
    this._currentStream = createReadStream(file.fullPath, {
      highWaterMark: 16
    })
    // 每个文件流发送之前先发送文件的基本信息 路径和大小
    const headerBuffer = Buffer.from(`\r\nchunk\n${file.path}\n${file.size}\r\n`)
    // 即使返回false也依然会推送入缓存区，但是不能继续推送，否则会内存溢出
    if (!this.push(headerBuffer)) {
      // push返回false代表消费端已满缓存区已满暂停读取
      this._currentStream.pause()
    }
    this._currentStream.on('data', (chunk) => {
      const isPush = this.push(chunk)
      if (!isPush) {
        this._currentStream.pause()
      }
    })
    this._currentStream.on('end', () => {
      this._currntIndex++
      this.onEnd()
    })
  }
  // 消费端外部有读取会触发该方法
  _read(size) {
    this._currentStream.resume()
  }
}

在这其中为了减少传输的字节数，使用gzip进行压缩，nodejs提供了pipeline函数，可以进行管道拼接如下：

javascript 复制代码

import { createGzip, createUnzip } from 'node:zlib'
import { pipeline } from 'node:stream'

export function gzip(source, destination) {
  const _gzip = createGzip()
  pipeline(source, _gzip, destination, (error) => {
    if (error) {
      console.log(error.message)
    }
  })
}

export function ungzip(source, destination) {
  const _ungzip = createUnzip()
  pipeline(source, _ungzip, destination, (error) => {
    if (error) {
      console.log(error.message)
    }
  })
}

gzip(readFilesStream, client)

这样便对流实现了gzip压缩；减少了数据发送。

服务端实现

接下来便是服务端的实现：

服务端的实现主要是创建服务接收数据，处理动作，分割文件流，写入文件这几步。

创建服务功能代码如下：

javascript 复制代码

import net from 'node:net'
import { EventEmitter } from 'node:events'
import { ungzip } from '../zip/index.js'
import { WriteZipFilesStream } from '../stream/WriteZipFilesStream.js'
import chalk from 'chalk'
export class Server extends EventEmitter{
  constructor(basePath) {
    super()
    this._basePath = basePath
    this._server = net.createServer((socket) => {
      this.onConnectListen(socket)
    })

    this._server.on('listening', () => {
      this.emit('sucess')
    })
    this._server.on('error', (e) => {
      this.emit('error', e)
    })

    this._server.on('close', () => {
      this.emit('close')
    })
    this._server.listen(9872)
  }
  _basePath
  /**
   * 侦听建立连接的sock
   * @param {net.Socket} socket 
   */
  onConnectListen(socket) {
    const writeFilesStream = new WriteZipFilesStream(this._basePath, {
      highWaterMark: 16
    })
    // 发生错误主动关闭，socket在发生错误的时候需要主动关闭
    socket.on('error', () => {
      writeFilesStream.end()
    })
    // 读取结束管道会自动关闭 writeFilesStream会自动调用socket.end
    ungzip(socket, writeFilesStream)
  }
  destory() {
    this._server.close((err) => {
      if (err) {
        console.warn(`关闭失败${err.message}`)
      } else {
        console.log('关闭成功')
      }
    })
  }
}

接下来实现最核心的writeFilesStream，直接上代码：

javascript 复制代码

import { Writable } from 'node:stream'
import fs from 'node:fs'
import path from 'node:path'
// const fullPath = path.resolve(process.cwd(), './source')
export class WriteZipFilesStream extends Writable {
  constructor(basePath) {
    super()
    this._basePath = basePath
  }
  _basePath
  _isolationPath = ''
  _isParseHeader = false
  _size = 0
  _currentWriteStram
  _state = {
    // 初始状态
    currentIndex: 0,
    0: {
      character: 13,
      matchNext: 1,
      noMatchNext: 0
    },
    1: {
      character: 10,
      matchNext: 2,
      noMatchNext: 0
    },
    2: {
      character: 13,
      matchNext: 1,
      noMatchNext: 0
    }
  }
  _stash = []
  get currentIndex() {
    return this._state.currentIndex
  }
  set currentIndex(val) {
    this._state.currentIndex = val
  }
  get currentState() {
    return this._state[this.currentIndex]
  }
  _matchState(character) {
    if (this.currentState.character === character) {
      this.currentIndex = this.currentState.matchNext
    } else {
      this.currentIndex = this.currentState.noMatchNext
    }
    if (this.currentIndex === 2) {
      return true
    }
    return false
  }
  _parse(chunk) {
    if (this._size > 0) {
      this._size = this._size - 1
      // console.log('写入数据')
      this._stash.push(chunk)
      if (this._size === 0) {

        const buffer = Buffer.from(this._stash)
        this._stash = []
        return buffer
      }
      // console.log('this._size', this._size)
      return false
    }
    // 简单的状态机匹配\r\n
    const isMatch = this._matchState(chunk)
    if (this._isParseHeader) {
      this._stash.push(chunk)
    }
    if (isMatch) {
      if (!this._isParseHeader) {
        this._isParseHeader = true
      } else {
        // 解析头部
        const chunStr = Buffer.from(this._stash).toString()
        const [opera, path, size] = chunStr.replace(/\r\n/g, '').split('\n')
        this._size = parseInt(size)
        if (opera === 'chunk') {
          this._currentWriteStram = this.createFileStream(path)
        }
        // action的处理
        if (opera === 'delete') {
          console.log('删除文件', path)
          this.deleteFile(path)
        }
        if (opera === 'create') {
          console.log('创建文件', path)
          this.createFile(path)
        }
        this._isParseHeader = false
        this._stash = []
      }
    }
    return false
  }
  _write(chunk, encoding, cb) {
    // 获取chunk长度
    const chunkLen = chunk.length
    const self = this
    let i = 0
    function writeChunk() {
      // 按字节读取解析请求头协议
      while (i < chunkLen) {
        // 读取到的文件字节
        const buffer = self._parse(chunk[i])
        if (buffer) {
          // 即使返回false也依然会推送入缓存区，但是不能继续推送，否则会内存溢出
          // 写入，如果文件流缓冲区满了会返回fasle需要等待drain才能继续写入
          if (!self._currentWriteStram.write(buffer)) {
            self._currentWriteStram.once('drain', () => {
              writeChunk()
            })
            break
          }
        }
        i++
      }
      if (i >= chunkLen) {
        cb()
      }
    }
    writeChunk()
  }
  deleteFile(p) {
    const absPath = path.join(this._basePath, p)
    fs.rmSync(absPath, {
      force: true,
      recursive: true
    })
  }
  createFile(p) {
    this._isolationPath = p
  }
  createFileStream(p) {
    const paths = p.split('/')
    const fileName = paths.pop()
    const absPath = path.join(this._basePath, this._isolationPath, paths.join(path.sep))
    console.log('写入文件绝对路径', absPath)
    if (!fs.existsSync(absPath)) {
      fs.mkdirSync(absPath, {
        recursive: true
      })
    }
    return fs.createWriteStream(path.join(absPath, fileName), {
      flags: 'w+'
    })
  }
}

这里需要注意的点：

1、_write是子类需要实现Writable的方法，cb回调函数的作用是通知写入完成，适合于同步异步的情况，只有在调用cb之后才会写入下一段chunk。

2、写入的chunk会小于等于highWaterMark。

至此再加上启动代码，一个简单的文件推送完成了。

javascript 复制代码

import { ReadFilesStream } from './stream/ReadFilesStream.js'
import { Cilent } from './net/Client.js'
import { Server } from './net/Server.js'
import minimist from 'minimist'
import chalk from 'chalk'
import path from 'node:path'
import fs from 'node:fs'
import { gzip } from './zip/index.js'

function start() {
  const agrv = minimist(process.argv.slice(2))
  if (agrv._.includes('push')) {
    const configFile = path.join(process.cwd(), 'pushc.js')
    let config = {
      basePath: process.cwd(),
      path: 'dist',
      isolationPath: 'public',
      serveHost: '127.0.0.1'
    }
    if (fs.existsSync(configFile)) {
      try {
        const chunkStr = require(configFile)
        config = Object.assign({}, config, chunkStr)
      } catch (error) {
        console.log(chalk.red('配置文件错误'))
        return
      }
    }
    startClient(config.basePath, config.path, config.isolationPath, config.serveHost)
  }
  if (agrv._.includes('serve')) {
    const basepath = agrv.basepath
    if (!basepath) {
      console.log(chalk.red('启动服务需要指定--basepath'))
      return
    }
    engineServe(basepath)
  }
}

function startClient(basePath, path, isolationPath, serveHost) {
  const client = new Cilent()
  client.connectServer(serveHost, 9872).then(() => {
    console.log(chalk.yellow('链接服务器成功...'))
    const readFilesStream = new ReadFilesStream(basePath, path, isolationPath)
    gzip(readFilesStream, client)
    // readFilesStream.pipe(client)
  }).catch((e) => {
    console.log(chalk.red('链接服务器失败...'))
  })
}
function engineServe(basepath) {
  const server = new Server(basepath)
  server.on('error', () => {
    console.log(chalk.red('服务器启动失败'))
  })
  server.on('sucess', () => {
    console.log(chalk.yellow('服务器启动成功'))
  })
}
start()

包地址：www.npmjs.com/package/spu...