1GB以下
cs
using System.Text;
namespace DotnetReadTxt;
class Program
{
static void Main(string[] args)
{
try
{
Encoding.RegisterProvider(CodePagesEncodingProvider.Instance);
var gb2312 = Encoding.GetEncoding("GB2312");
int index = 0;
using (StreamReader sr = new StreamReader("ip20240529.txt",gb2312))
{
string line;
// 逐行读取文件,直到读取完毕
while ((line = sr.ReadLine()) != null)
{
// 在这里处理每一行,例如打印到控制台
index++;
if (index < 10)
{
Console.WriteLine(line);
}
}
}
Console.WriteLine("读取完毕~~");
}
catch (Exception e)
{
// 处理可能出现的异常,例如文件不存在或没有读取权限等
Console.WriteLine("The file could not be read:");
Console.WriteLine(e.Message);
}
}
/// <summary>
/// 获取文本文件的字符编码类型
/// </summary>
/// <param name="fileName"></param>
/// <returns></returns>
static Encoding GetTextFileEncodingType(string fileName)
{
Encoding encoding = Encoding.Default;
FileStream fileStream = new FileStream(fileName, FileMode.Open, FileAccess.Read);
BinaryReader binaryReader = new BinaryReader(fileStream, encoding);
byte[] buffer = binaryReader.ReadBytes((int)fileStream.Length);
binaryReader.Close();
fileStream.Close();
if (buffer.Length >= 3 && buffer[0] == 239 && buffer[1] == 187 && buffer[2] == 191)
{
encoding = Encoding.UTF8;
}
else if (buffer.Length >= 3 && buffer[0] == 254 && buffer[1] == 255 && buffer[2] == 0)
{
encoding = Encoding.BigEndianUnicode;
}
else if (buffer.Length >= 3 && buffer[0] == 255 && buffer[1] == 254 && buffer[2] == 65)
{
encoding = Encoding.Unicode;
}
else if (IsUTF8Bytes(buffer))
{
encoding = Encoding.UTF8;
}
return encoding;
}
/// <summary>
/// 判断是否是不带 BOM 的 UTF8 格式
/// BOM(Byte Order Mark),字节顺序标记,出现在文本文件头部,Unicode编码标准中用于标识文件是采用哪种格式的编码。
/// </summary>
/// <param name="data"></param>
/// <returns></returns>
private static bool IsUTF8Bytes(byte[] data)
{
int charByteCounter = 1; //计算当前正分析的字符应还有的字节数
byte curByte; //当前分析的字节.
for (int i = 0; i < data.Length; i++)
{
curByte = data[i];
if (charByteCounter == 1)
{
if (curByte >= 0x80)
{
//判断当前
while (((curByte <<= 1) & 0x80) != 0)
{
charByteCounter++;
}
//标记位首位若为非0 则至少以2个1开始 如:110XXXXX...........1111110X
if (charByteCounter == 1 || charByteCounter > 6)
{
return false;
}
}
}
else
{
//若是UTF-8 此时第一位必须为1
if ((curByte & 0xC0) != 0x80)
{
return false;
}
charByteCounter--;
}
}
if (charByteCounter > 1)
{
throw new Exception("非预期的byte格式");
}
return true;
}
}
1GB以上
ReadTxtFileLine.cs
cs
using System.Text;
namespace DotnetReadTxt;
/// <summary>
/// 本类用于使用StreamReader.Read()方法,实现逐行读取文本文件,
/// </summary>
public class ReadTxtFileLine
{
//文件读取的状态,当为false时,代表未读完最后一行,true为读完了最后一行
int _IsReadEnd = 0;
System.IO.StreamReader sr1;
int _LoopRowNumNow = 0;
//定义了一个是否读到最后的属性,数据类型为整数,大于0未读到,等于0是表示末尾了
public int IsReadEnd
{
get => _IsReadEnd;
}
//构造函数
public ReadTxtFileLine(string TxtFilePath, Encoding FileEncoding)
{
sr1 = new System.IO.StreamReader(TxtFilePath, FileEncoding);
_IsReadEnd = 1;
}
//成员方法,执行一次,返回1行的结果,当全部读完,依然执行该方法,将返回空字符串""
public string GetLineStr()
{
string strLine = "";
int charCode = 0;
while (sr1.Peek() > 0)
{
charCode = sr1.Read();
if (charCode == 10) //发现换行符char10就返回拼接字符串
{
_LoopRowNumNow++;
return strLine;
}
else
{
if (charCode != 13)
{
//将一行的数据重新拼接起来
strLine += ((char)charCode).ToString();
}
}
}
_IsReadEnd = -1;
sr1.Close();
sr1.Dispose();
return strLine;
}
}
使用
cs
static void Main(string[] args)
{
try
{
Encoding.RegisterProvider(CodePagesEncodingProvider.Instance);
var gb2312 = Encoding.GetEncoding("GB2312");
ReadTxtFileLine ReadTxtFileTest1 = new ReadTxtFileLine("ip20240529.txt", gb2312);
int index = 0;
while (ReadTxtFileTest1.IsReadEnd>0)
{
//这里将读出来的1行赋值给str
string str = ReadTxtFileTest1.GetLineStr();
index++;
if (index < 10)
{
Console.WriteLine(str);
}
}
Console.WriteLine("读取完毕~~");
}
catch (Exception e)
{
// 处理可能出现的异常,例如文件不存在或没有读取权限等
Console.WriteLine("The file could not be read:");
Console.WriteLine(e.Message);
}
}
gb2312读取需要安装一个库
System.Text.Encoding.CodePages
感谢浪人甲虫大佬文章