C# 流式处理

什么是流式处理

流式处理(Streaming) 是一种边读边处理的方式,而不是一次性把所有数据加载到内存中。它的核心优势是:

内存占用低 ------ 不需要一次性加载全部数据

响应速度快 ------ 可以在数据还没读完时就开始处理

适合大数据量 ------ 尤其适合 TB 级文件、网络流、数据库大表查询等

C# 中如何实现流式处理

.NET Core 3.0+ / .NET 5+ 中,引入了 IAsyncEnumerable<T> (异步枚举器),配合 await foreach 就可以很方便地实现异步流式处理。

cs 复制代码
public static async IAsyncEnumerable<List<T>> BatchAsync<T>(
    this IAsyncEnumerable<T> source, int batchSize)
{
    var batch = new List<T>(batchSize);
    await foreach (var item in source)
    {
        batch.Add(item);
        if (batch.Count == batchSize)
        {
            yield return batch;
            batch = new List<T>(batchSize);
        }
    }
    if (batch.Count > 0)
        yield return batch;
}
cs 复制代码
var entities = dbContext.Set<MyEntity>()
    .Where(x => x.Status == "Pending")
    .AsAsyncEnumerable(); // 流式读取

await foreach (var batch in entities.BatchAsync(1000))
{
    // 每批 1000 条,批量更新
    await UpdateBatchAsync(batch);
}

通用框架设计思路

抽象成 3 层结构

  1. 数据源(Source)

    • 负责提供数据(文件、TCP、数据库、传感器等)
    • 统一返回 IAsyncEnumerable<T>
  2. 处理器(Processor)

    • 负责处理单个数据项
    • 可替换不同的业务逻辑
  3. 流式处理引擎(StreamingEngine)

    • 把 Source 和 Processor 组合起来
    • 提供统一的执行、取消、错误处理能力

封装与用例:

cs 复制代码
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Net;
using System.Net.Sockets;
using System.Text;
using System.Threading;
using System.Threading.Channels;
using System.Threading.Tasks;
using Microsoft.EntityFrameworkCore;

// ================================
// 1. 接口定义
// ================================

public interface ISource<T>
{
    IAsyncEnumerable<T> ReadAsync(CancellationToken token = default);
}

public interface IProcessor<T>
{
    ValueTask ProcessAsync(T item, CancellationToken token = default);
    ValueTask ProcessBatchAsync(List<T> batch, CancellationToken token = default);
}

// ================================
// 2. 流式处理引擎(批处理 + 重试 + 背压)
// ================================

public class StreamingEngine<T>
{
    private readonly ISource<T> _source;
    private readonly IProcessor<T> _processor;
    private readonly int _batchSize;
    private readonly int _maxRetries;
    private readonly TimeSpan _retryDelay;
    private readonly int _channelCapacity;

    public StreamingEngine(
        ISource<T> source,
        IProcessor<T> processor,
        int batchSize = 0,
        int maxRetries = 3,
        TimeSpan? retryDelay = null,
        int channelCapacity = 1000)
    {
        _source = source ?? throw new ArgumentNullException(nameof(source));
        _processor = processor ?? throw new ArgumentNullException(nameof(processor));
        _batchSize = batchSize;
        _maxRetries = maxRetries;
        _retryDelay = retryDelay ?? TimeSpan.FromSeconds(1);
        _channelCapacity = channelCapacity;
    }

    public async Task RunAsync(CancellationToken token = default)
    {
        var channel = Channel.CreateBounded<T>(_channelCapacity);

        var producer = Task.Run(async () =>
        {
            try
            {
                await foreach (var item in _source.ReadAsync(token))
                {
                    await channel.Writer.WriteAsync(item, token);
                }
            }
            finally
            {
                channel.Writer.Complete();
            }
        }, token);

        var consumer = ConsumeAsync(channel.Reader, token);

        await Task.WhenAll(producer, consumer);
    }

    private async Task ConsumeAsync(ChannelReader<T> reader, CancellationToken token)
    {
        if (_batchSize > 0)
        {
            await ProcessInBatchesAsync(reader, token);
        }
        else
        {
            await ProcessSingleItemsAsync(reader, token);
        }
    }

    private async Task ProcessSingleItemsAsync(ChannelReader<T> reader, CancellationToken token)
    {
        while (await reader.WaitToReadAsync(token))
        {
            while (reader.TryRead(out var item))
            {
                await RetryAsync(() => _processor.ProcessAsync(item, token), token);
            }
        }
    }

    private async Task ProcessInBatchesAsync(ChannelReader<T> reader, CancellationToken token)
    {
        var batch = new List<T>(_batchSize);

        while (await reader.WaitToReadAsync(token))
        {
            while (reader.TryRead(out var item))
            {
                batch.Add(item);

                if (batch.Count >= _batchSize)
                {
                    await RetryAsync(() => _processor.ProcessBatchAsync(batch, token), token);
                    batch.Clear();
                }
            }
        }

        if (batch.Count > 0)
        {
            await RetryAsync(() => _processor.ProcessBatchAsync(batch, token), token);
        }
    }

    private async Task RetryAsync(Func<Task> operation, CancellationToken token)
    {
        for (int i = 0; i < _maxRetries; i++)
        {
            try
            {
                await operation();
                return;
            }
            catch
            {
                if (i == _maxRetries - 1) throw;
                await Task.Delay(_retryDelay, token);
            }
        }
    }
}

// ================================
// 3. 数据源实现
// ================================

public class FileSource<T> : ISource<T>
{
    private readonly string _filePath;
    private readonly Func<string, T> _converter;

    public FileSource(string filePath, Func<string, T> converter)
    {
        _filePath = filePath;
        _converter = converter;
    }

    public async IAsyncEnumerable<T> ReadAsync(CancellationToken token = default)
    {
        using var reader = new StreamReader(new FileStream(_filePath, FileMode.Open, FileAccess.Read, FileShare.Read, 4096, true));
        string line;
        while ((line = await reader.ReadLineAsync()) != null)
        {
            token.ThrowIfCancellationRequested();
            yield return _converter(line);
        }
    }
}

public class DatabaseSource<T> : ISource<T> where T : class
{
    private readonly DbContext _context;
    private readonly int _batchSize;

    public DatabaseSource(DbContext context, int batchSize = 1000)
    {
        _context = context;
        _batchSize = batchSize;
    }

    public async IAsyncEnumerable<T> ReadAsync(CancellationToken token = default)
    {
        var offset = 0;
        while (true)
        {
            var batch = await _context.Set<T>()
                .Skip(offset)
                .Take(_batchSize)
                .AsNoTracking()
                .ToListAsync(token);

            if (batch.Count == 0) yield break;

            foreach (var item in batch)
                yield return item;

            offset += _batchSize;
        }
    }
}

public class TcpSource<T> : ISource<T>
{
    private readonly int _port;
    private readonly Func<string, T> _converter;

    public TcpSource(int port, Func<string, T> converter)
    {
        _port = port;
        _converter = converter;
    }

    public async IAsyncEnumerable<T> ReadAsync(CancellationToken token = default)
    {
        var listener = new TcpListener(IPAddress.Any, _port);
        listener.Start();
        using var client = await listener.AcceptTcpClientAsync();
        using var stream = client.GetStream();
        var buffer = new byte[4096];
        int bytesRead;
        while ((bytesRead = await stream.ReadAsync(buffer, 0, buffer.Length, token)) > 0)
        {
            var data = Encoding.UTF8.GetString(buffer, 0, bytesRead);
            yield return _converter(data);
        }
        listener.Stop();
    }
}

public class SensorSource<T> : ISource<T>
{
    private readonly Func<double, T> _converter;
    private readonly int _intervalMs;

    public SensorSource(Func<double, T> converter, int intervalMs = 100)
    {
        _converter = converter;
        _intervalMs = intervalMs;
    }

    public async IAsyncEnumerable<T> ReadAsync(CancellationToken token = default)
    {
        var rnd = new Random();
        while (!token.IsCancellationRequested)
        {
            await Task.Delay(_intervalMs, token);
            yield return _converter(rnd.NextDouble() * 100);
        }
    }
}

// ================================
// 4. 处理器实现
// ================================

public class ConsoleProcessor<T> : IProcessor<T>
{
    public ValueTask ProcessAsync(T item, CancellationToken token = default)
    {
        Console.WriteLine($"Processed: {item}");
        return ValueTask.CompletedTask;
    }

    public ValueTask ProcessBatchAsync(List<T> batch, CancellationToken token = default)
    {
        Console.WriteLine($"Processed batch of {batch.Count} items");
        return ValueTask.CompletedTask;
    }
}

// ================================
// 5. 使用示例
// ================================

public class Program
{
    public static async Task Main(string[] args)
    {
        // 示例1:文件流处理
        var fileSource = new FileSource<string>("data.txt", line => line);
        var fileProcessor = new ConsoleProcessor<string>();
        var fileEngine = new StreamingEngine<string>(
            fileSource,
            fileProcessor,
            batchSize: 100,
            maxRetries: 3,
            channelCapacity: 5000);
        await fileEngine.RunAsync();

        // 示例2:数据库流处理(需自行实现DbContext)
        // var dbContext = new MyDbContext();
        // var dbSource = new DatabaseSource<MyEntity>(dbContext);
        // var dbProcessor = new ConsoleProcessor<MyEntity>();
        // var dbEngine = new StreamingEngine<MyEntity>(
        //     dbSource,
        //     dbProcessor,
        //     batchSize: 1000);
        // await dbEngine.RunAsync();

        // 示例3:TCP流处理
        // var tcpSource = new TcpSource<string>(8888, data => data);
        // var tcpProcessor = new ConsoleProcessor<string>();
        // var tcpEngine = new StreamingEngine<string>(
        //     tcpSource,
        //     tcpProcessor);
        // await tcpEngine.RunAsync();

        // 示例4:传感器流处理
        // var sensorSource = new SensorSource<double>(value => value);
        // var sensorProcessor = new ConsoleProcessor<double>();
        // var sensorEngine = new StreamingEngine<double>(
        //     sensorSource,
        //     sensorProcessor,
        //     batchSize: 5);
        // await sensorEngine.RunAsync();
    }
}

// 为了让代码可编译,这里放一个空的DbContext示例
public class MyDbContext : DbContext
{
    public DbSet<MyEntity> MyEntities { get; set; }

    protected override void OnConfiguring(DbContextOptionsBuilder optionsBuilder)
    {
        optionsBuilder.UseInMemoryDatabase("TestDb");
    }
}

public class MyEntity
{
    public int Id { get; set; }
    public string Name { get; set; }
}

代码说明

  • ISource<T>:数据源接口,你可以轻松扩展 WebSocket、Kafka 等新源
  • IProcessor<T>:数据处理器接口,可实现写入数据库、调用 API 等业务逻辑
  • StreamingEngine<T>
    • 支持单条处理批处理
    • 内置失败重试机制
    • 使用 Channel 实现背压,防止生产速度过快
  • 四种数据源
    • FileSource:大文件流式读取
    • DatabaseSource:数据库分页流式读取
    • TcpSource:TCP 网络流
    • SensorSource:实时传感器数据模拟
相关推荐
疯狂的Alex6 小时前
【C#避坑实战系列文章16】性能优化(CPU / 内存占用过高问题解决)
开发语言·性能优化·c#
我就是我--不一样的烟火6 小时前
Log4net库的使用
c#·log4net·简化配置
周杰伦fans9 小时前
C# 中的 简单工厂模式 (Simple Factory)
c#·简单工厂模式
ajassi200010 小时前
开源 C# 快速开发(十一)线程
http·开源·c#
周杰伦fans18 小时前
C# 23种设计模式详解与示例
开发语言·设计模式·c#
xb113220 小时前
C#——方法的定义、调用与调试
开发语言·c#
code bean20 小时前
【C#】以 BlockingCollection 为核心的多相机 YOLO 检测任务处理框架
c#
时光追逐者1 天前
一款专门为 WPF 打造的开源 Office 风格用户界面控件库
ui·开源·c#·.net·wpf
-可乐加冰吗1 天前
SuperMap iObjects .NET 11i 二次开发(十五)—— 类型转换之面转点
visualstudio·c#·.net