Flash 块设备驱动开发

一文读懂 Flash 底层原理

1、概述

在嵌入式 Linux 中,Flash 的管理依赖一套清晰的分层设计。我们可以把整个系统拆成两部分:

  • 硬件层:我们已经学习了 NOR 和 NAND 的物理结构差异(浮栅晶体管、并联 vs 串联)、读写擦除的电气特性(XIP、页/块操作、先擦后写)。
  • 软件层:我们将看到 Linux 内核如何通过 MTD(Memory Technology Device) 子系统,将这些硬件差异抽象成统一的接口。

🔹 硬件回顾

Flash 分为两大类:

✔ NOR Flash

  • 支持随机读取(XIP)
  • 按块 block 擦除
  • 类似"只读内存"

✔ NAND Flash

  • 按 page 读写、block 擦除
  • 存在坏块
  • 需要 ECC

🔹 软件层目标

Linux 通过 MTD(Memory Technology Device) 子系统:

👉 把不同 Flash 的硬件差异统一抽象成接口

2、NOR Flash

2.1 代码详解

我们以 spi-nor 驱动为例。

kernel\drivers\mtd\spi-nor\core.c

c 复制代码
static int spi_nor_probe(struct spi_mem *spimem)
{
	struct spi_device *spi = spimem->spi;
	struct flash_platform_data *data = dev_get_platdata(&spi->dev);
	struct spi_nor *nor;
	......
	/* 为 spi_nor 分配空间 */
	nor = devm_kzalloc(&spi->dev, sizeof(*nor), GFP_KERNEL);
	if (!nor)
		return -ENOMEM;
	......
	ret = spi_nor_scan(nor, flash_name, &hwcaps);
	if (ret)
		return ret;
	...
	/* 注册 mtd 设备(该接口最终调用 add_mtd_device 创建 MTD 设备 ) */
	return mtd_device_register(&nor->mtd, data ? data->parts : NULL,
			   data ? data->nr_parts : 0);
}

注册 mtd 字符设备时,最重要的就是传入的 mtd_info 结构。上层统一通过这个结构访问。

c 复制代码
struct mtd_info {
	u_char type;
	uint32_t flags;
	uint64_t size;	 // Total size of the MTD

	/* "Major" erase size for the device. Naïve users may take this
	 * to be the only erase size available, or may use the more detailed
	 * information below if they desire
	 */
	uint32_t erasesize;
	/* Minimal writable flash unit size. In case of NOR flash it is 1 (even
	 * though individual bits can be cleared), in case of NAND flash it is
	 * one NAND page (or half, or one-fourths of it), in case of ECC-ed NOR
	 * it is of ECC block size, etc. It is illegal to have writesize = 0.
	 * Any driver registering a struct mtd_info must ensure a writesize of
	 * 1 or larger.
	 */
	uint32_t writesize;

	/*
	 * Size of the write buffer used by the MTD. MTD devices having a write
	 * buffer can write multiple writesize chunks at a time. E.g. while
	 * writing 4 * writesize bytes to a device with 2 * writesize bytes
	 * buffer the MTD driver can (but doesn't have to) do 2 writesize
	 * operations, but not 4. Currently, all NANDs have writebufsize
	 * equivalent to writesize (NAND page size). Some NOR flashes do have
	 * writebufsize greater than writesize.
	 */
	uint32_t writebufsize;

	uint32_t oobsize;   // Amount of OOB data per block (e.g. 16)
	uint32_t oobavail;  // Available OOB bytes per block

	/*
	 * If erasesize is a power of 2 then the shift is stored in
	 * erasesize_shift otherwise erasesize_shift is zero. Ditto writesize.
	 */
	unsigned int erasesize_shift;
	unsigned int writesize_shift;
	/* Masks based on erasesize_shift and writesize_shift */
	unsigned int erasesize_mask;
	unsigned int writesize_mask;

	/*
	 * read ops return -EUCLEAN if max number of bitflips corrected on any
	 * one region comprising an ecc step equals or exceeds this value.
	 * Settable by driver, else defaults to ecc_strength.  User can override
	 * in sysfs.  N.B. The meaning of the -EUCLEAN return code has changed;
	 * see Documentation/ABI/testing/sysfs-class-mtd for more detail.
	 */
	unsigned int bitflip_threshold;

	/* Kernel-only stuff starts here. */
	const char *name;
	int index;

	/* OOB layout description */
	const struct mtd_ooblayout_ops *ooblayout;

	/* NAND pairing scheme, only provided for MLC/TLC NANDs */
	const struct mtd_pairing_scheme *pairing;

	/* the ecc step size. */
	unsigned int ecc_step_size;

	/* max number of correctible bit errors per ecc step */
	unsigned int ecc_strength;

	/* Data for variable erase regions. If numeraseregions is zero,
	 * it means that the whole device has erasesize as given above.
	 */
	int numeraseregions;
	struct mtd_erase_region_info *eraseregions;

	/*
	 * Do not call via these pointers, use corresponding mtd_*()
	 * wrappers instead.
	 */
	int (*_erase) (struct mtd_info *mtd, struct erase_info *instr);
	int (*_point) (struct mtd_info *mtd, loff_t from, size_t len,
		       size_t *retlen, void **virt, resource_size_t *phys);
	int (*_unpoint) (struct mtd_info *mtd, loff_t from, size_t len);
	int (*_read) (struct mtd_info *mtd, loff_t from, size_t len,
		      size_t *retlen, u_char *buf);
	int (*_write) (struct mtd_info *mtd, loff_t to, size_t len,
		       size_t *retlen, const u_char *buf);
	int (*_panic_write) (struct mtd_info *mtd, loff_t to, size_t len,
			     size_t *retlen, const u_char *buf);
	int (*_read_oob) (struct mtd_info *mtd, loff_t from,
			  struct mtd_oob_ops *ops);
	int (*_write_oob) (struct mtd_info *mtd, loff_t to,
			   struct mtd_oob_ops *ops);
	int (*_get_fact_prot_info) (struct mtd_info *mtd, size_t len,
				    size_t *retlen, struct otp_info *buf);
	int (*_read_fact_prot_reg) (struct mtd_info *mtd, loff_t from,
				    size_t len, size_t *retlen, u_char *buf);
	int (*_get_user_prot_info) (struct mtd_info *mtd, size_t len,
				    size_t *retlen, struct otp_info *buf);
	int (*_read_user_prot_reg) (struct mtd_info *mtd, loff_t from,
				    size_t len, size_t *retlen, u_char *buf);
	int (*_write_user_prot_reg) (struct mtd_info *mtd, loff_t to,
				     size_t len, size_t *retlen, u_char *buf);
	int (*_lock_user_prot_reg) (struct mtd_info *mtd, loff_t from,
				    size_t len);
	int (*_writev) (struct mtd_info *mtd, const struct kvec *vecs,
			unsigned long count, loff_t to, size_t *retlen);
	void (*_sync) (struct mtd_info *mtd);
	int (*_lock) (struct mtd_info *mtd, loff_t ofs, uint64_t len);
	int (*_unlock) (struct mtd_info *mtd, loff_t ofs, uint64_t len);
	int (*_is_locked) (struct mtd_info *mtd, loff_t ofs, uint64_t len);
	int (*_block_isreserved) (struct mtd_info *mtd, loff_t ofs);
	int (*_block_isbad) (struct mtd_info *mtd, loff_t ofs);
	int (*_block_markbad) (struct mtd_info *mtd, loff_t ofs);
	int (*_max_bad_blocks) (struct mtd_info *mtd, loff_t ofs, size_t len);
	int (*_suspend) (struct mtd_info *mtd);
	void (*_resume) (struct mtd_info *mtd);
	void (*_reboot) (struct mtd_info *mtd);
	/*
	 * If the driver is something smart, like UBI, it may need to maintain
	 * its own reference counting. The below functions are only for driver.
	 */
	int (*_get_device) (struct mtd_info *mtd);
	void (*_put_device) (struct mtd_info *mtd);

	/*
	 * flag indicates a panic write, low level drivers can take appropriate
	 * action if required to ensure writes go through
	 */
	bool oops_panic_write;

	struct notifier_block reboot_notifier;  /* default mode before reboot */

	/* ECC status information */
	struct mtd_ecc_stats ecc_stats;
	/* Subpage shift (NAND) */
	int subpage_sft;

	void *priv;

	struct module *owner;
	struct device dev;
	int usecount;
	struct mtd_debug_info dbg;
	struct nvmem_device *nvmem;

	/*
	 * Parent device from the MTD partition point of view.
	 *
	 * MTD masters do not have any parent, MTD partitions do. The parent
	 * MTD device can itself be a partition.
	 */
	struct mtd_info *parent;

	/* List of partitions attached to this MTD device */
	struct list_head partitions;

	struct mtd_part part;
	struct mtd_master master;
};

实际上,Flash 驱动主要就是初始化 mtd_info 结构体,填充读、写、擦除等函数。下面就是 mtd_info 初始化过程。

c 复制代码
int spi_nor_scan(struct spi_nor *nor, const char *name,
		 const struct spi_nor_hwcaps *hwcaps)
{
	const struct flash_info *info;
	struct device *dev = nor->dev;
	struct mtd_info *mtd = &nor->mtd;
	......
	/* 根据 flash id 信息,找到相应的静态变量,描述 flash 的基础信息 */
	info = spi_nor_get_flash_info(nor, name);
	if (IS_ERR(info))
		return PTR_ERR(info);
	......
	/* 初始化 mtd_info 结构 */
	mtd->_write = spi_nor_write;

	/* 除了依赖静态变量,对于支持 SFDP 的 flash 来说,可以动态解析出 flash 的基础信息 */
	ret = spi_nor_init_params(nor);
	if (ret)
		return ret;
	......
	/* 初始化 mtd_info 结构 */
	mtd->_erase = spi_nor_erase;
	mtd->_read = spi_nor_read;
	......
	/*
	 * Configure the SPI memory:
	 * - select op codes for (Fast) Read, Page Program and Sector Erase.
	 * - set the number of dummy cycles (mode cycles + wait states).
	 * - set the SPI protocols for register and memory accesses.
	 */
	ret = spi_nor_setup(nor, hwcaps);
	if (ret)
		return ret;
	......
}

read/write 函数。函数入参是一个字节地址,就是你需要读取/写入的地址。访问方式:

命令 + 地址 → Flash

c 复制代码
static int spi_nor_read(struct mtd_info *mtd, loff_t from, size_t len,
			size_t *retlen, u_char *buf);
			
static int spi_nor_write(struct mtd_info *mtd, loff_t to, size_t len,
	size_t *retlen, const u_char *buf);
c 复制代码
 */
static ssize_t spi_nor_spimem_write_data(struct spi_nor *nor, loff_t to,
					 size_t len, const u8 *buf)
{
	/* 
	 * 写命令 ------ nor->program_opcode 
	 * 写地址 ------ to
	 */
	struct spi_mem_op op =
		SPI_MEM_OP(SPI_MEM_OP_CMD(nor->program_opcode, 1),
			   SPI_MEM_OP_ADDR(nor->addr_width, to, 1),
			   SPI_MEM_OP_NO_DUMMY,
			   SPI_MEM_OP_DATA_OUT(len, buf, 1));
	......
}

static ssize_t spi_nor_spimem_read_data(struct spi_nor *nor, loff_t from,
					size_t len, u8 *buf)
{
	/* 
	 * 读命令 ------ nor->read_opcode
	 * 读地址 ------ from
	 */
	struct spi_mem_op op =
		SPI_MEM_OP(SPI_MEM_OP_CMD(nor->read_opcode, 1),
			   SPI_MEM_OP_ADDR(nor->addr_width, from, 1),
			   SPI_MEM_OP_DUMMY(nor->read_dummy, 1),
			   SPI_MEM_OP_DATA_IN(len, buf, 1));
	......
}

不同的 Flash 控制器,支持不同的命令格式:

c 复制代码
/* Flash opcodes. */
#define SPINOR_OP_WRDI		0x04	/* Write disable */
#define SPINOR_OP_WREN		0x06	/* Write enable */
#define SPINOR_OP_RDSR		0x05	/* Read status register */
#define SPINOR_OP_WRSR		0x01	/* Write status register 1 byte */
#define SPINOR_OP_RDSR2		0x3f	/* Read status register 2 */
#define SPINOR_OP_WRSR2		0x3e	/* Write status register 2 */
#define SPINOR_OP_READ		0x03	/* Read data bytes (low frequency) */
#define SPINOR_OP_READ_FAST	0x0b	/* Read data bytes (high frequency) */
#define SPINOR_OP_READ_1_1_2	0x3b	/* Read data bytes (Dual Output SPI) */
#define SPINOR_OP_READ_1_2_2	0xbb	/* Read data bytes (Dual I/O SPI) */
#define SPINOR_OP_READ_1_1_4	0x6b	/* Read data bytes (Quad Output SPI) */
#define SPINOR_OP_READ_1_4_4	0xeb	/* Read data bytes (Quad I/O SPI) */
#define SPINOR_OP_READ_1_1_8	0x8b	/* Read data bytes (Octal Output SPI) */
#define SPINOR_OP_READ_1_8_8	0xcb	/* Read data bytes (Octal I/O SPI) */
#define SPINOR_OP_PP		0x02	/* Page program (up to 256 bytes) */
#define SPINOR_OP_PP_1_1_4	0x32	/* Quad page program */
#define SPINOR_OP_PP_1_4_4	0x38	/* Quad page program */
#define SPINOR_OP_PP_1_1_8	0x82	/* Octal page program */
#define SPINOR_OP_PP_1_8_8	0xc2	/* Octal page program */
#define SPINOR_OP_BE_4K		0x20	/* Erase 4KiB block */
#define SPINOR_OP_BE_4K_PMC	0xd7	/* Erase 4KiB block on PMC chips */
#define SPINOR_OP_BE_32K	0x52	/* Erase 32KiB block */
#define SPINOR_OP_CHIP_ERASE	0xc7	/* Erase whole flash chip */
#define SPINOR_OP_SE		0xd8	/* Sector erase (usually 64KiB) */
#define SPINOR_OP_RDID		0x9f	/* Read JEDEC ID */
#define SPINOR_OP_RDSFDP	0x5a	/* Read SFDP */
#define SPINOR_OP_RDCR		0x35	/* Read configuration register */
#define SPINOR_OP_WRCR		0x31	/* Write configure register */
#define SPINOR_OP_RDFSR		0x70	/* Read flag status register */
#define SPINOR_OP_CLFSR		0x50	/* Clear flag status register */
#define SPINOR_OP_RDEAR		0xc8	/* Read Extended Address Register */
#define SPINOR_OP_WREAR		0xc5	/* Write Extended Address Register */

2.2 总结

FLASH 驱动中的关键内容如下:

  • 实现 mtd_info 中的 _read、_write、_erase 函数
  • 初始化 mtd_info 中的 erasesize。这个 erasesize 很重要,会影响上层文件系统的垃圾回收、文件系统管理
    • 默认选择 Flash 支持的最大的 erasesize
  • 根据设备执行情况,设置最优的 Flash 的能力。例如
    • 设置读取数据模式,标准/快速、双线、四线
    • 设置写入数据模式,标准,四线

3、NAND Flash

3.1 代码详解

我们以 rk 系列的 NAND Flash 控制器驱动为例进行讲解。

kernel\drivers\mtd\rknand\rknand_base_ko.c

c 复制代码
static int rknand_probe(struct platform_device *pdev)
{
	struct rknand_info *nand_info;
	int err = 0;
	NAND_DEBUG(NAND_DEBUG_LEVEL0,"rk_nand_probe: \n");
	gpNandInfo = kzalloc(sizeof(struct rknand_info), GFP_KERNEL);
	if (!gpNandInfo)
		return -ENOMEM;
		
	nand_info = gpNandInfo;
	......
	/* 初始化 mtd_info 信息 */
	if(rknand_info_init(nand_info))
	{
		err = -ENXIO;
		goto  exit_free;
	}
	/* add_rknand_device 接口中,会调用 add_mtd_device 创建 MTD 设备 */
	nand_info->add_rknand_device = add_rknand_device;
	nand_info->get_rknand_device = get_rknand_device;
	......
}

初始化 mtd_info 信息:

c 复制代码
static int rknand_info_init(struct rknand_info *nand_info)
{
	struct mtd_info	   *mtd = &rknand_mtd;
	struct rknand_chip *rknand = &nand_info->rknand;  

	rknand->state = FL_READY;
	rknand->rknand_schedule_enable = 1;
	rknand->pFlashCallBack = NULL;
	init_waitqueue_head(&rknand->wq);

    mtd->oobsize = 0;
    mtd->oobavail = 0;
    mtd->ecclayout = 0;
    mtd->erasesize = 32*0x200;
    mtd->writesize = 8*0x200;

	// Fill in remaining MTD driver data 
	mtd->type = MTD_NANDFLASH;
	mtd->flags = (MTD_WRITEABLE|MTD_NO_ERASE);//
	mtd->_erase = rknand_erase;
	mtd->_point = NULL;
	mtd->_unpoint = NULL;
	mtd->_read = rknand_read;
	mtd->_write = rknand_write;
	//mtd->discard = rknand_diacard;
	mtd->_read_oob = NULL;
	mtd->_write_oob = NULL;
	mtd->_panic_write = rknand_panic_write;

	mtd->_sync = rknand_sync;
	mtd->_lock = NULL;
	mtd->_unlock = NULL;
	mtd->_suspend = NULL;
	mtd->_resume = NULL;
	mtd->_block_isbad = rknand_block_isbad;
	mtd->_block_markbad = rknand_block_markbad;
	mtd->owner = THIS_MODULE;
    return 0;
}

NAND Flash 需要实现的函数,相比 NOR Flash 就会多几个,因为硬件差异的原因。例如:_read_oob、_write_oob、_sync 等函数( NAND Flash 比 NOR Flash 多了一个 OOB 区域,需要去管理)。

c 复制代码
static int rknand_read(struct mtd_info *mtd, loff_t from, size_t len,
	size_t *retlen, u_char *buf);
	
static int rknand_write(struct mtd_info *mtd, loff_t from, size_t len,
	size_t *retlen, const u_char *buf);

可以看到,mtd_info 实现的接口是固定的,即 MTD 层向 FLASH 驱动传递的都是字节地址。这和之前说的 NAND Flash 不能随机读写是不是有冲突啊?别着急,接着往下看。

c 复制代码
static int rknand_read(struct mtd_info *mtd, loff_t from, size_t len,
	size_t *retlen, u_char *buf)
{
	int ret = 0;
	
	/*
	 * 虽然 mtd 层传递的是地址 from,但是驱动中,需要手动把地址转换为 LBA(Logic Blk Address) 
	 * rk3568,因为硬件的 NAND 控制器自带一个 FTL 硬件模块,所以软件上封装了一层对 FTL 的函数处理。目前这部分代码是闭源的
	 * FTL 负责将 LAB 映射成物理 Blk,以及做 GC 和磨损均衡
	 */
	int sector = len>>9;
	int LBA = (int)(from>>9);

	if(sector && gpNandInfo->ftl_read)
	{
		ret = gpNandInfo->ftl_read(LBA, sector, buf);
		if(ret)
		 *retlen = 0;
	}
	return ret;
}

static int rknand_write(struct mtd_info *mtd, loff_t from, size_t len,
	size_t *retlen, const u_char *buf)
{
	int ret = 0;
	int sector = len>>9;
	int LBA = (int)(from>>9);

	if(sector && gpNandInfo->ftl_write)// cmy
	{
		if(LBA < SysImageWriteEndAdd)//0x4E000)
		{
			//NAND_DEBUG(NAND_DEBUG_LEVEL0,">>> FtlWriteImage: LBA=0x%08X  sector=%d\n",LBA, sector);
            ret = gpNandInfo->ftl_write(LBA, sector, (void *)buf,1);
        }
		else
        {
            ret = gpNandInfo->ftl_write(LBA, sector, (void *)buf,0);
        }
	}
	*retlen = len;
	return 0;
}

和 NOR flash 一样,NAND Flash 本质上也是通过 命令 + 地址(单位为 page,不再是随机字节地址) 的方式来读写数据。
kernel\include\linux\mtd\rawnand.h

c 复制代码
/*
 * Standard NAND flash commands
 */
#define NAND_CMD_READ0		0
#define NAND_CMD_READ1		1
#define NAND_CMD_RNDOUT		5
#define NAND_CMD_PAGEPROG	0x10
#define NAND_CMD_READOOB	0x50
#define NAND_CMD_ERASE1		0x60
#define NAND_CMD_STATUS		0x70
#define NAND_CMD_SEQIN		0x80
#define NAND_CMD_RNDIN		0x85
#define NAND_CMD_READID		0x90
#define NAND_CMD_ERASE2		0xd0
#define NAND_CMD_PARAM		0xec
#define NAND_CMD_GET_FEATURES	0xee
#define NAND_CMD_SET_FEATURES	0xef
#define NAND_CMD_RESET		0xff

/* Extended commands for large page devices */
#define NAND_CMD_READSTART	0x30
#define NAND_CMD_RNDOUTSTART	0xE0
#define NAND_CMD_CACHEDPROG	0x15

#define NAND_CMD_NONE		-1

3.2 拓展

很多新人在接触 Flash 存储时,会自然而然地把它和硬盘联系起来,认为 Flash 也会像硬盘那样有 MBR 或 GPT 分区表。实际上,Flash 的分区概念完全不同。

在嵌入式系统中,Flash 是通过 MTD(Memory Technology Device)子系统来管理的,而分区(Partition)只是对 Flash 芯片上的存储区域进行逻辑划分。每个分区可以独立擦写、读取和挂载,但它们 没有标准的分区表结构,也不依赖于文件系统。换句话说,Flash 分区更多是为了软件管理和功能隔离,而不是像硬盘那样为了兼容操作系统的通用分区格式。

通常,一个 Flash 芯片会被划分为几个用途明确的分区,例如:

  • Bootloader 分区:存放启动程序
  • Kernel 分区:存放内核镜像
  • Rootfs 分区:存放根文件系统
  • Data 分区:存放用户数据

MTD 设备在系统中就是按照这些分区来创建的,这意味着当你访问一个 MTD 设备时,其实是在操作某个具体分区,而不是整个 Flash 芯片。理解这一点对于嵌入式开发、固件升级以及数据管理都非常重要。

假设你有一个 Flash 芯片,并在设备树中为它定义了 3 个分区,比如 bootloader、kernel 和 rootfs。系统启动后,你会在 /dev/ 目录下看到类似下面这样的设备节点:

  • /dev/mtd0 和 /dev/mtdblock0 对应第一个分区(例如 bootloader)
  • /dev/mtd1 和 /dev/mtdblock1 对应第二个分区(例如 kernel)
  • /dev/mtd2 和 /dev/mtdblock2 对应第三个分区(例如 rootfs)

4、总结

相关推荐
泰白聊AI2 小时前
AI 编程时代的规范驱动开发:OpenSpec 实践指南
服务器·人工智能·驱动开发·ai·aigc·ai编程
itman3012 小时前
C语言字符串必知:末尾有个隐藏的\0,新手易踩坑
c语言·字符串·内存管理·库函数·指针操作
-Springer-2 小时前
STM32 学习 —— 个人学习笔记10-2(I2C 通信外设 & 硬件 I2C 读写 MPU6050)
笔记·stm32·学习
chushiyunen2 小时前
ai人工智能笔记(二)
笔记
爱吃生蚝的于勒2 小时前
【Linux】重中之重!TCP协议
linux·运维·服务器·网络·学习·tcp/ip
zhensherlock3 小时前
Protocol Launcher 系列:1Writer iOS 上的 Markdown 文档管理
javascript·笔记·ios·typescript·node.js·iphone·ipad
旖-旎3 小时前
分治(计算右侧小于当前元素的个数)(7)
c++·学习·算法·leetcode·排序算法·归并排序
benpaodeDD3 小时前
JDBC内容学习
学习
EmmaXLZHONG3 小时前
Django By Example - 学习笔记
笔记·python·学习·django