1、概述
在嵌入式 Linux 中,Flash 的管理依赖一套清晰的分层设计。我们可以把整个系统拆成两部分:
- 硬件层:我们已经学习了 NOR 和 NAND 的物理结构差异(浮栅晶体管、并联 vs 串联)、读写擦除的电气特性(XIP、页/块操作、先擦后写)。
- 软件层:我们将看到 Linux 内核如何通过 MTD(Memory Technology Device) 子系统,将这些硬件差异抽象成统一的接口。
🔹 硬件回顾
Flash 分为两大类:
✔ NOR Flash
- 支持随机读取(XIP)
- 按块 block 擦除
- 类似"只读内存"
✔ NAND Flash
- 按 page 读写、block 擦除
- 存在坏块
- 需要 ECC
🔹 软件层目标
Linux 通过 MTD(Memory Technology Device) 子系统:
👉 把不同 Flash 的硬件差异统一抽象成接口
2、NOR Flash
2.1 代码详解
我们以 spi-nor 驱动为例。
kernel\drivers\mtd\spi-nor\core.c
c
static int spi_nor_probe(struct spi_mem *spimem)
{
struct spi_device *spi = spimem->spi;
struct flash_platform_data *data = dev_get_platdata(&spi->dev);
struct spi_nor *nor;
......
/* 为 spi_nor 分配空间 */
nor = devm_kzalloc(&spi->dev, sizeof(*nor), GFP_KERNEL);
if (!nor)
return -ENOMEM;
......
ret = spi_nor_scan(nor, flash_name, &hwcaps);
if (ret)
return ret;
...
/* 注册 mtd 设备(该接口最终调用 add_mtd_device 创建 MTD 设备 ) */
return mtd_device_register(&nor->mtd, data ? data->parts : NULL,
data ? data->nr_parts : 0);
}
注册 mtd 字符设备时,最重要的就是传入的 mtd_info 结构。上层统一通过这个结构访问。
c
struct mtd_info {
u_char type;
uint32_t flags;
uint64_t size; // Total size of the MTD
/* "Major" erase size for the device. Naïve users may take this
* to be the only erase size available, or may use the more detailed
* information below if they desire
*/
uint32_t erasesize;
/* Minimal writable flash unit size. In case of NOR flash it is 1 (even
* though individual bits can be cleared), in case of NAND flash it is
* one NAND page (or half, or one-fourths of it), in case of ECC-ed NOR
* it is of ECC block size, etc. It is illegal to have writesize = 0.
* Any driver registering a struct mtd_info must ensure a writesize of
* 1 or larger.
*/
uint32_t writesize;
/*
* Size of the write buffer used by the MTD. MTD devices having a write
* buffer can write multiple writesize chunks at a time. E.g. while
* writing 4 * writesize bytes to a device with 2 * writesize bytes
* buffer the MTD driver can (but doesn't have to) do 2 writesize
* operations, but not 4. Currently, all NANDs have writebufsize
* equivalent to writesize (NAND page size). Some NOR flashes do have
* writebufsize greater than writesize.
*/
uint32_t writebufsize;
uint32_t oobsize; // Amount of OOB data per block (e.g. 16)
uint32_t oobavail; // Available OOB bytes per block
/*
* If erasesize is a power of 2 then the shift is stored in
* erasesize_shift otherwise erasesize_shift is zero. Ditto writesize.
*/
unsigned int erasesize_shift;
unsigned int writesize_shift;
/* Masks based on erasesize_shift and writesize_shift */
unsigned int erasesize_mask;
unsigned int writesize_mask;
/*
* read ops return -EUCLEAN if max number of bitflips corrected on any
* one region comprising an ecc step equals or exceeds this value.
* Settable by driver, else defaults to ecc_strength. User can override
* in sysfs. N.B. The meaning of the -EUCLEAN return code has changed;
* see Documentation/ABI/testing/sysfs-class-mtd for more detail.
*/
unsigned int bitflip_threshold;
/* Kernel-only stuff starts here. */
const char *name;
int index;
/* OOB layout description */
const struct mtd_ooblayout_ops *ooblayout;
/* NAND pairing scheme, only provided for MLC/TLC NANDs */
const struct mtd_pairing_scheme *pairing;
/* the ecc step size. */
unsigned int ecc_step_size;
/* max number of correctible bit errors per ecc step */
unsigned int ecc_strength;
/* Data for variable erase regions. If numeraseregions is zero,
* it means that the whole device has erasesize as given above.
*/
int numeraseregions;
struct mtd_erase_region_info *eraseregions;
/*
* Do not call via these pointers, use corresponding mtd_*()
* wrappers instead.
*/
int (*_erase) (struct mtd_info *mtd, struct erase_info *instr);
int (*_point) (struct mtd_info *mtd, loff_t from, size_t len,
size_t *retlen, void **virt, resource_size_t *phys);
int (*_unpoint) (struct mtd_info *mtd, loff_t from, size_t len);
int (*_read) (struct mtd_info *mtd, loff_t from, size_t len,
size_t *retlen, u_char *buf);
int (*_write) (struct mtd_info *mtd, loff_t to, size_t len,
size_t *retlen, const u_char *buf);
int (*_panic_write) (struct mtd_info *mtd, loff_t to, size_t len,
size_t *retlen, const u_char *buf);
int (*_read_oob) (struct mtd_info *mtd, loff_t from,
struct mtd_oob_ops *ops);
int (*_write_oob) (struct mtd_info *mtd, loff_t to,
struct mtd_oob_ops *ops);
int (*_get_fact_prot_info) (struct mtd_info *mtd, size_t len,
size_t *retlen, struct otp_info *buf);
int (*_read_fact_prot_reg) (struct mtd_info *mtd, loff_t from,
size_t len, size_t *retlen, u_char *buf);
int (*_get_user_prot_info) (struct mtd_info *mtd, size_t len,
size_t *retlen, struct otp_info *buf);
int (*_read_user_prot_reg) (struct mtd_info *mtd, loff_t from,
size_t len, size_t *retlen, u_char *buf);
int (*_write_user_prot_reg) (struct mtd_info *mtd, loff_t to,
size_t len, size_t *retlen, u_char *buf);
int (*_lock_user_prot_reg) (struct mtd_info *mtd, loff_t from,
size_t len);
int (*_writev) (struct mtd_info *mtd, const struct kvec *vecs,
unsigned long count, loff_t to, size_t *retlen);
void (*_sync) (struct mtd_info *mtd);
int (*_lock) (struct mtd_info *mtd, loff_t ofs, uint64_t len);
int (*_unlock) (struct mtd_info *mtd, loff_t ofs, uint64_t len);
int (*_is_locked) (struct mtd_info *mtd, loff_t ofs, uint64_t len);
int (*_block_isreserved) (struct mtd_info *mtd, loff_t ofs);
int (*_block_isbad) (struct mtd_info *mtd, loff_t ofs);
int (*_block_markbad) (struct mtd_info *mtd, loff_t ofs);
int (*_max_bad_blocks) (struct mtd_info *mtd, loff_t ofs, size_t len);
int (*_suspend) (struct mtd_info *mtd);
void (*_resume) (struct mtd_info *mtd);
void (*_reboot) (struct mtd_info *mtd);
/*
* If the driver is something smart, like UBI, it may need to maintain
* its own reference counting. The below functions are only for driver.
*/
int (*_get_device) (struct mtd_info *mtd);
void (*_put_device) (struct mtd_info *mtd);
/*
* flag indicates a panic write, low level drivers can take appropriate
* action if required to ensure writes go through
*/
bool oops_panic_write;
struct notifier_block reboot_notifier; /* default mode before reboot */
/* ECC status information */
struct mtd_ecc_stats ecc_stats;
/* Subpage shift (NAND) */
int subpage_sft;
void *priv;
struct module *owner;
struct device dev;
int usecount;
struct mtd_debug_info dbg;
struct nvmem_device *nvmem;
/*
* Parent device from the MTD partition point of view.
*
* MTD masters do not have any parent, MTD partitions do. The parent
* MTD device can itself be a partition.
*/
struct mtd_info *parent;
/* List of partitions attached to this MTD device */
struct list_head partitions;
struct mtd_part part;
struct mtd_master master;
};
实际上,Flash 驱动主要就是初始化 mtd_info 结构体,填充读、写、擦除等函数。下面就是 mtd_info 初始化过程。
c
int spi_nor_scan(struct spi_nor *nor, const char *name,
const struct spi_nor_hwcaps *hwcaps)
{
const struct flash_info *info;
struct device *dev = nor->dev;
struct mtd_info *mtd = &nor->mtd;
......
/* 根据 flash id 信息,找到相应的静态变量,描述 flash 的基础信息 */
info = spi_nor_get_flash_info(nor, name);
if (IS_ERR(info))
return PTR_ERR(info);
......
/* 初始化 mtd_info 结构 */
mtd->_write = spi_nor_write;
/* 除了依赖静态变量,对于支持 SFDP 的 flash 来说,可以动态解析出 flash 的基础信息 */
ret = spi_nor_init_params(nor);
if (ret)
return ret;
......
/* 初始化 mtd_info 结构 */
mtd->_erase = spi_nor_erase;
mtd->_read = spi_nor_read;
......
/*
* Configure the SPI memory:
* - select op codes for (Fast) Read, Page Program and Sector Erase.
* - set the number of dummy cycles (mode cycles + wait states).
* - set the SPI protocols for register and memory accesses.
*/
ret = spi_nor_setup(nor, hwcaps);
if (ret)
return ret;
......
}
read/write 函数。函数入参是一个字节地址,就是你需要读取/写入的地址。访问方式:
命令 + 地址 → Flash
c
static int spi_nor_read(struct mtd_info *mtd, loff_t from, size_t len,
size_t *retlen, u_char *buf);
static int spi_nor_write(struct mtd_info *mtd, loff_t to, size_t len,
size_t *retlen, const u_char *buf);
c
*/
static ssize_t spi_nor_spimem_write_data(struct spi_nor *nor, loff_t to,
size_t len, const u8 *buf)
{
/*
* 写命令 ------ nor->program_opcode
* 写地址 ------ to
*/
struct spi_mem_op op =
SPI_MEM_OP(SPI_MEM_OP_CMD(nor->program_opcode, 1),
SPI_MEM_OP_ADDR(nor->addr_width, to, 1),
SPI_MEM_OP_NO_DUMMY,
SPI_MEM_OP_DATA_OUT(len, buf, 1));
......
}
static ssize_t spi_nor_spimem_read_data(struct spi_nor *nor, loff_t from,
size_t len, u8 *buf)
{
/*
* 读命令 ------ nor->read_opcode
* 读地址 ------ from
*/
struct spi_mem_op op =
SPI_MEM_OP(SPI_MEM_OP_CMD(nor->read_opcode, 1),
SPI_MEM_OP_ADDR(nor->addr_width, from, 1),
SPI_MEM_OP_DUMMY(nor->read_dummy, 1),
SPI_MEM_OP_DATA_IN(len, buf, 1));
......
}
不同的 Flash 控制器,支持不同的命令格式:
c
/* Flash opcodes. */
#define SPINOR_OP_WRDI 0x04 /* Write disable */
#define SPINOR_OP_WREN 0x06 /* Write enable */
#define SPINOR_OP_RDSR 0x05 /* Read status register */
#define SPINOR_OP_WRSR 0x01 /* Write status register 1 byte */
#define SPINOR_OP_RDSR2 0x3f /* Read status register 2 */
#define SPINOR_OP_WRSR2 0x3e /* Write status register 2 */
#define SPINOR_OP_READ 0x03 /* Read data bytes (low frequency) */
#define SPINOR_OP_READ_FAST 0x0b /* Read data bytes (high frequency) */
#define SPINOR_OP_READ_1_1_2 0x3b /* Read data bytes (Dual Output SPI) */
#define SPINOR_OP_READ_1_2_2 0xbb /* Read data bytes (Dual I/O SPI) */
#define SPINOR_OP_READ_1_1_4 0x6b /* Read data bytes (Quad Output SPI) */
#define SPINOR_OP_READ_1_4_4 0xeb /* Read data bytes (Quad I/O SPI) */
#define SPINOR_OP_READ_1_1_8 0x8b /* Read data bytes (Octal Output SPI) */
#define SPINOR_OP_READ_1_8_8 0xcb /* Read data bytes (Octal I/O SPI) */
#define SPINOR_OP_PP 0x02 /* Page program (up to 256 bytes) */
#define SPINOR_OP_PP_1_1_4 0x32 /* Quad page program */
#define SPINOR_OP_PP_1_4_4 0x38 /* Quad page program */
#define SPINOR_OP_PP_1_1_8 0x82 /* Octal page program */
#define SPINOR_OP_PP_1_8_8 0xc2 /* Octal page program */
#define SPINOR_OP_BE_4K 0x20 /* Erase 4KiB block */
#define SPINOR_OP_BE_4K_PMC 0xd7 /* Erase 4KiB block on PMC chips */
#define SPINOR_OP_BE_32K 0x52 /* Erase 32KiB block */
#define SPINOR_OP_CHIP_ERASE 0xc7 /* Erase whole flash chip */
#define SPINOR_OP_SE 0xd8 /* Sector erase (usually 64KiB) */
#define SPINOR_OP_RDID 0x9f /* Read JEDEC ID */
#define SPINOR_OP_RDSFDP 0x5a /* Read SFDP */
#define SPINOR_OP_RDCR 0x35 /* Read configuration register */
#define SPINOR_OP_WRCR 0x31 /* Write configure register */
#define SPINOR_OP_RDFSR 0x70 /* Read flag status register */
#define SPINOR_OP_CLFSR 0x50 /* Clear flag status register */
#define SPINOR_OP_RDEAR 0xc8 /* Read Extended Address Register */
#define SPINOR_OP_WREAR 0xc5 /* Write Extended Address Register */
2.2 总结
FLASH 驱动中的关键内容如下:
- 实现
mtd_info中的 _read、_write、_erase 函数 - 初始化
mtd_info中的erasesize。这个erasesize很重要,会影响上层文件系统的垃圾回收、文件系统管理- 默认选择 Flash 支持的最大的
erasesize值
- 默认选择 Flash 支持的最大的
- 根据设备执行情况,设置最优的 Flash 的能力。例如
- 设置读取数据模式,标准/快速、双线、四线
- 设置写入数据模式,标准,四线
3、NAND Flash
3.1 代码详解
我们以 rk 系列的 NAND Flash 控制器驱动为例进行讲解。
kernel\drivers\mtd\rknand\rknand_base_ko.c
c
static int rknand_probe(struct platform_device *pdev)
{
struct rknand_info *nand_info;
int err = 0;
NAND_DEBUG(NAND_DEBUG_LEVEL0,"rk_nand_probe: \n");
gpNandInfo = kzalloc(sizeof(struct rknand_info), GFP_KERNEL);
if (!gpNandInfo)
return -ENOMEM;
nand_info = gpNandInfo;
......
/* 初始化 mtd_info 信息 */
if(rknand_info_init(nand_info))
{
err = -ENXIO;
goto exit_free;
}
/* add_rknand_device 接口中,会调用 add_mtd_device 创建 MTD 设备 */
nand_info->add_rknand_device = add_rknand_device;
nand_info->get_rknand_device = get_rknand_device;
......
}
初始化 mtd_info 信息:
c
static int rknand_info_init(struct rknand_info *nand_info)
{
struct mtd_info *mtd = &rknand_mtd;
struct rknand_chip *rknand = &nand_info->rknand;
rknand->state = FL_READY;
rknand->rknand_schedule_enable = 1;
rknand->pFlashCallBack = NULL;
init_waitqueue_head(&rknand->wq);
mtd->oobsize = 0;
mtd->oobavail = 0;
mtd->ecclayout = 0;
mtd->erasesize = 32*0x200;
mtd->writesize = 8*0x200;
// Fill in remaining MTD driver data
mtd->type = MTD_NANDFLASH;
mtd->flags = (MTD_WRITEABLE|MTD_NO_ERASE);//
mtd->_erase = rknand_erase;
mtd->_point = NULL;
mtd->_unpoint = NULL;
mtd->_read = rknand_read;
mtd->_write = rknand_write;
//mtd->discard = rknand_diacard;
mtd->_read_oob = NULL;
mtd->_write_oob = NULL;
mtd->_panic_write = rknand_panic_write;
mtd->_sync = rknand_sync;
mtd->_lock = NULL;
mtd->_unlock = NULL;
mtd->_suspend = NULL;
mtd->_resume = NULL;
mtd->_block_isbad = rknand_block_isbad;
mtd->_block_markbad = rknand_block_markbad;
mtd->owner = THIS_MODULE;
return 0;
}
NAND Flash 需要实现的函数,相比 NOR Flash 就会多几个,因为硬件差异的原因。例如:_read_oob、_write_oob、_sync 等函数( NAND Flash 比 NOR Flash 多了一个 OOB 区域,需要去管理)。
c
static int rknand_read(struct mtd_info *mtd, loff_t from, size_t len,
size_t *retlen, u_char *buf);
static int rknand_write(struct mtd_info *mtd, loff_t from, size_t len,
size_t *retlen, const u_char *buf);
可以看到,mtd_info 实现的接口是固定的,即 MTD 层向 FLASH 驱动传递的都是字节地址。这和之前说的 NAND Flash 不能随机读写是不是有冲突啊?别着急,接着往下看。
c
static int rknand_read(struct mtd_info *mtd, loff_t from, size_t len,
size_t *retlen, u_char *buf)
{
int ret = 0;
/*
* 虽然 mtd 层传递的是地址 from,但是驱动中,需要手动把地址转换为 LBA(Logic Blk Address)
* rk3568,因为硬件的 NAND 控制器自带一个 FTL 硬件模块,所以软件上封装了一层对 FTL 的函数处理。目前这部分代码是闭源的
* FTL 负责将 LAB 映射成物理 Blk,以及做 GC 和磨损均衡
*/
int sector = len>>9;
int LBA = (int)(from>>9);
if(sector && gpNandInfo->ftl_read)
{
ret = gpNandInfo->ftl_read(LBA, sector, buf);
if(ret)
*retlen = 0;
}
return ret;
}
static int rknand_write(struct mtd_info *mtd, loff_t from, size_t len,
size_t *retlen, const u_char *buf)
{
int ret = 0;
int sector = len>>9;
int LBA = (int)(from>>9);
if(sector && gpNandInfo->ftl_write)// cmy
{
if(LBA < SysImageWriteEndAdd)//0x4E000)
{
//NAND_DEBUG(NAND_DEBUG_LEVEL0,">>> FtlWriteImage: LBA=0x%08X sector=%d\n",LBA, sector);
ret = gpNandInfo->ftl_write(LBA, sector, (void *)buf,1);
}
else
{
ret = gpNandInfo->ftl_write(LBA, sector, (void *)buf,0);
}
}
*retlen = len;
return 0;
}
和 NOR flash 一样,NAND Flash 本质上也是通过 命令 + 地址(单位为 page,不再是随机字节地址) 的方式来读写数据。
kernel\include\linux\mtd\rawnand.h
c
/*
* Standard NAND flash commands
*/
#define NAND_CMD_READ0 0
#define NAND_CMD_READ1 1
#define NAND_CMD_RNDOUT 5
#define NAND_CMD_PAGEPROG 0x10
#define NAND_CMD_READOOB 0x50
#define NAND_CMD_ERASE1 0x60
#define NAND_CMD_STATUS 0x70
#define NAND_CMD_SEQIN 0x80
#define NAND_CMD_RNDIN 0x85
#define NAND_CMD_READID 0x90
#define NAND_CMD_ERASE2 0xd0
#define NAND_CMD_PARAM 0xec
#define NAND_CMD_GET_FEATURES 0xee
#define NAND_CMD_SET_FEATURES 0xef
#define NAND_CMD_RESET 0xff
/* Extended commands for large page devices */
#define NAND_CMD_READSTART 0x30
#define NAND_CMD_RNDOUTSTART 0xE0
#define NAND_CMD_CACHEDPROG 0x15
#define NAND_CMD_NONE -1
3.2 拓展
很多新人在接触 Flash 存储时,会自然而然地把它和硬盘联系起来,认为 Flash 也会像硬盘那样有 MBR 或 GPT 分区表。实际上,Flash 的分区概念完全不同。
在嵌入式系统中,Flash 是通过 MTD(Memory Technology Device)子系统来管理的,而分区(Partition)只是对 Flash 芯片上的存储区域进行逻辑划分。每个分区可以独立擦写、读取和挂载,但它们 没有标准的分区表结构,也不依赖于文件系统。换句话说,Flash 分区更多是为了软件管理和功能隔离,而不是像硬盘那样为了兼容操作系统的通用分区格式。
通常,一个 Flash 芯片会被划分为几个用途明确的分区,例如:
- Bootloader 分区:存放启动程序
- Kernel 分区:存放内核镜像
- Rootfs 分区:存放根文件系统
- Data 分区:存放用户数据
MTD 设备在系统中就是按照这些分区来创建的,这意味着当你访问一个 MTD 设备时,其实是在操作某个具体分区,而不是整个 Flash 芯片。理解这一点对于嵌入式开发、固件升级以及数据管理都非常重要。
假设你有一个 Flash 芯片,并在设备树中为它定义了 3 个分区,比如 bootloader、kernel 和 rootfs。系统启动后,你会在 /dev/ 目录下看到类似下面这样的设备节点:
- /dev/mtd0 和 /dev/mtdblock0 对应第一个分区(例如 bootloader)
- /dev/mtd1 和 /dev/mtdblock1 对应第二个分区(例如 kernel)
- /dev/mtd2 和 /dev/mtdblock2 对应第三个分区(例如 rootfs)
4、总结
