scatterlist的相关概念与实例分析

概念

scatterlist

scatterlist用来描述一块内存,sg_table一般用于将物理不同大小的物理内存链接起来,一次性送给DMA控制器搬运

复制代码
struct scatterlist {
	unsigned long	page_link; //指示该内存块所在的页面
	unsigned int	offset; //指示该内存块在页面中的偏移(起始位置)
	unsigned int	length; //该内存块的长度
	dma_addr_t	dma_address; //该内存块实际的物理起始地址
#ifdef CONFIG_NEED_SG_DMA_LENGTH
	unsigned int	dma_length; //相应的长度信息
#endif
};

page_link:

(1).对于chain sg 来说,记录下一个 SG 数组的首地址,并且用bit[0] 和 bit[1] 来表示是chain sg 还是 end sg;

(2).对于 end sg 来说,只有bit[1] 为1,其他无意义;

(3).对于普通 sg 来说,记录的是关联的内存页块的地址;

sg_table

既然链接起物理内存,那么就需要多个sg;内核给了个sg_table和一系列api便于操作sg;

复制代码
struct sg_table {
	struct scatterlist *sgl;	/* the list */
	unsigned int nents;		//实际的内存块映射数量
	unsigned int orig_nents;	///内存块映射的数量
};

sg_alloc_table一次可以分配page size / sizeof(scatterlist)个scatterlist结构体;如果超过这个数,就需要再通过sg_alloc_table分配scatterlist,并且通过sg_chain()来连接上一个sg_table和新的sg_table

sg_alloc_table

sg_kmalloc用以批量分配 sg 的内存;G_MAX_SINGLE_ALLOC:系统规定了每次sg_kmalloc的最大个数为4096/32 = 128个

复制代码
int sg_alloc_table(struct sg_table *table, unsigned int nents, gfp_t gfp_mask)
{
	int ret;
 
	ret = __sg_alloc_table(table, nents, SG_MAX_SINGLE_ALLOC,
			       NULL, 0, gfp_mask, sg_kmalloc);
	if (unlikely(ret))
		__sg_free_table(table, SG_MAX_SINGLE_ALLOC, 0, sg_kfree);
 
	return ret;
}
EXPORT_SYMBOL(sg_alloc_table);

当申请的时候按照 SG_MAX_SINGLE_ALLOC,那么是一次性申请 4K 内存,系统直接调用 __get_free_page() 从buddy 中分配当没有达到 4K 内存,则通过kmalloc_array()申请 ;

复制代码
static struct scatterlist *sg_kmalloc(unsigned int nents, gfp_t gfp_mask)
{
	if (nents == SG_MAX_SINGLE_ALLOC) {
		/*
		 * Kmemleak doesn't track page allocations as they are not
		 * commonly used (in a raw form) for kernel data structures.
		 * As we chain together a list of pages and then a normal
		 * kmalloc (tracked by kmemleak), in order to for that last
		 * allocation not to become decoupled (and thus a
		 * false-positive) we need to inform kmemleak of all the
		 * intermediate allocations.
		 */
		void *ptr = (void *) __get_free_page(gfp_mask);
		kmemleak_alloc(ptr, PAGE_SIZE, 1, gfp_mask);
		return ptr;
	} else
		return kmalloc_array(nents, sizeof(struct scatterlist),
				     gfp_mask);
}

根据nents决定需不需要再次调用sg_kmalloc分配struct scatterlist数组,并返回首个scatterlist的地址,为什么叫数组,因为是在一个页面里面分配的,是连续的

复制代码
int __sg_alloc_table(struct sg_table *table, unsigned int nents,
		     unsigned int max_ents, struct scatterlist *first_chunk,
		     unsigned int nents_first_chunk, gfp_t gfp_mask,
		     sg_alloc_fn *alloc_fn)
{
	struct scatterlist *sg, *prv;
	unsigned int left;
	unsigned curr_max_ents = nents_first_chunk ?: max_ents;
	unsigned prv_max_ents;
 
    //准备初始化 sg_table,先memset
	memset(table, 0, sizeof(*table));
 
    //sg 条目数量不能为0
	if (nents == 0)
		return -EINVAL;
#ifdef CONFIG_ARCH_NO_SG_CHAIN
	if (WARN_ON_ONCE(nents > max_ents))
		return -EINVAL;
#endif
 
    //初始化还没有申请的sg数目
	left = nents;
	prv = NULL;
	do {
		unsigned int sg_size, alloc_size = left;
 
        //确定此次需要申请的sg 个数
        //申请的sg超过最大值,将分多次分配
		if (alloc_size > curr_max_ents) {
			alloc_size = curr_max_ents;
			sg_size = alloc_size - 1;     //申请的sg数组中,最后一个作为一个chain,不作为有效sg
		} else
			sg_size = alloc_size;
 
        //还剩余多少sg没有申请
		left -= sg_size;
 
		if (first_chunk) {
			sg = first_chunk;
			first_chunk = NULL;
		} else {
			sg = alloc_fn(alloc_size, gfp_mask); //调用sg分配的回调函数
		}
		if (unlikely(!sg)) {
			/*
			 * Adjust entry count to reflect that the last
			 * entry of the previous table won't be used for
			 * linkage.  Without this, sg_kfree() may get
			 * confused.
			 */
			if (prv)
				table->nents = ++table->orig_nents;
 
			return -ENOMEM;
		}
 
        /*
         * 初始化此次申请的sg 数组,这些sg 在物理上是连续的,所以可以直接memset
         * 另外,还会调用sg_mark_end() 初始化最后一个sg为 end sg
         */
		sg_init_table(sg, alloc_size);
 
        //更新sg_table->nents,初始化时 nents和orig_nents相同
		table->nents = table->orig_nents += sg_size;
 
		/*
         * 当再次进入循环时,说明需要的nents是大于max_nents的,那么上一次申请肯定是按照最大值
         * 申请.
         * 第一次申请时,会将sg数组放入sg_table的sgl
         * 当再进入循环时,需要连接新建的sg数组,所以要将prv的最后一个sg设为CHAIN
         */
		if (prv)
			sg_chain(prv, prv_max_ents, sg);
		else
			table->sgl = sg;
 
		//如果没剩余sg需要分配了,将推出循环,此时将最新分配的sg数组的最后一个sg设为END
		if (!left)
			sg_mark_end(&sg[sg_size - 1]);
 
		prv = sg;
		prv_max_ents = curr_max_ents; //能进入下一个循环的话,上一个sg数组肯定按最大值申请的
		curr_max_ents = max_ents;
	} while (left);
 
	return 0;
}
EXPORT_SYMBOL(__sg_alloc_table);

用以配置铰链 sg,offset 和 length 为0,通过该函数将当前的sg数组与下一个sg数组通过chain sg捆绑在一起。

复制代码
static inline void sg_chain(struct scatterlist *prv, unsigned int prv_nents,
			    struct scatterlist *sgl)
{
	/*
	 * offset and length are unused for chain entry.  Clear them.
	 */
	prv[prv_nents - 1].offset = 0;
	prv[prv_nents - 1].length = 0;
 
	/*
	 * Set lowest bit to indicate a link pointer, and make sure to clear
	 * the termination bit if it happens to be set.
	 */
	prv[prv_nents - 1].page_link = ((unsigned long) sgl | SG_CHAIN)
					& ~SG_END;
}

sg跟buffer

常用api

sg_set_page函数用sg_assign_page以将当前sg与某个内存页进行关联;并设置大小和偏移

复制代码
static inline void sg_set_page(struct scatterlist *sg, struct page *page,
			       unsigned int len, unsigned int offset)
{
	sg_assign_page(sg, page);
	sg->offset = offset;
	sg->length = len;
}

sg_set_buf传入buf,然后用sg_set_page将sg与这个buf的page关联

复制代码
static inline void sg_set_buf(struct scatterlist *sg, const void *buf,
			      unsigned int buflen)
{
#ifdef CONFIG_DEBUG_SG
	BUG_ON(!virt_addr_valid(buf));
#endif
	sg_set_page(sg, virt_to_page(buf), buflen, offset_in_page(buf));
}

只初始化一个sg

复制代码
void sg_init_table(struct scatterlist *sgl, unsigned int nents)
{
	memset(sgl, 0, sizeof(*sgl) * nents);
	sg_init_marker(sgl, nents);
}
EXPORT_SYMBOL(sg_init_table);
void sg_init_one(struct scatterlist *sg, const void *buf, unsigned int buflen)
{
	sg_init_table(sg, 1);
	sg_set_buf(sg, buf, buflen);
}
EXPORT_SYMBOL(sg_init_one);

示例

复制代码
int mmc_io_rw_extended(struct mmc_card *card, int write, unsigned fn,
        unsigned addr, int incr_addr, u8 *buf, unsigned blocks, unsigned blksz)
{
        struct mmc_request mrq = {NULL};
        struct mmc_command cmd = {0};
        struct mmc_data data = {0};
        struct scatterlist sg, *sg_ptr;
        struct sg_table sgtable;
        unsigned int nents, left_size, i;
        unsigned int seg_size = card->host->max_seg_size;

        ......

        data.blksz = blksz;
        /* Code in host drivers/fwk assumes that "blocks" always is >=1 */
        data.blocks = blocks ? blocks : 1;
        data.flags = write ? MMC_DATA_WRITE : MMC_DATA_READ;

        left_size = data.blksz * data.blocks;
        nents = (left_size - 1) / seg_size + 1;
        if (nents > 1) {
                if (sg_alloc_table(&sgtable, nents, GFP_KERNEL))
                        return -ENOMEM;

                data.sg = sgtable.sgl;
                data.sg_len = nents;

                for_each_sg(data.sg, sg_ptr, data.sg_len, i) {
                        sg_set_page(sg_ptr, virt_to_page(buf + (i * seg_size)),
                                        min(seg_size, left_size),
                                        offset_in_page(buf + (i * seg_size)));
                        left_size = left_size - seg_size;
                }
        } else {
                data.sg = &sg;
                data.sg_len = 1;

                sg_init_one(&sg, buf, left_size);
        }

       ......
}

sg跟DMA

常用api

判断当前sg是否为chain

复制代码
#define sg_is_chain(sg)		((sg)->page_link & SG_CHAIN) 

判断当前sg是否为last

复制代码
#define sg_is_last(sg)		((sg)->page_link & SG_END)

chain sg用来获取下一个指向的sg数组

复制代码
#define sg_chain_ptr(sg)	\                            
	((struct scatterlist *) ((sg)->page_link & ~(SG_CHAIN | SG_END)))

获取下一个sg,可能在下一个sg_table里

复制代码
struct scatterlist *sg_next(struct scatterlist *sg)
{
	if (sg_is_last(sg))
		return NULL;

	sg++;
	if (unlikely(sg_is_chain(sg)))
		sg = sg_chain_ptr(sg);

	return sg;
}
EXPORT_SYMBOL(sg_next);

遍历sg

复制代码
#define for_each_sg(sglist, sg, nr, __i)	\
	for (__i = 0, sg = (sglist); __i < (nr); __i++, sg = sg_next(sg))

获取sg关联的页块地址

复制代码
static inline struct page *sg_page(struct scatterlist *sg)
{
#ifdef CONFIG_DEBUG_SG
	BUG_ON(sg_is_chain(sg));
#endif
	return (struct page *)((sg)->page_link & ~(SG_CHAIN | SG_END));
}

示例

这是个支持sg的dma控制器;mmp_pdma_desc_hw用来dma描述符描述一个buf的信息,通过sg_dma_address将sg的总线物理地址,作为dma描述符的传输地址(源地址/目的地址),用来发送数据到设备,或者从设备接收数据

mmp_pdma_prep_slave_sg将下一个描述符的地址,给到上一个描述符的--下一个描述符地址的成员,以实现DMA控制器自动遍历描述符,来传输sg的多个数据块。

复制代码
struct mmp_pdma_desc_hw {
	u32 ddadr;	/* Points to the next descriptor + flags */
	u32 dsadr;	/* DSADR value for the current transfer */
	u32 dtadr;	/* DTADR value for the current transfer */
	u32 dcmd;	/* DCMD value for the current transfer */
} __aligned(32);

mmp_pdma_prep_slave_sg(struct dma_chan *dchan, struct scatterlist *sgl,
                       unsigned int sg_len, enum dma_transfer_direction dir,
                       unsigned long flags, void *context)
{
        struct mmp_pdma_chan *chan = to_mmp_pdma_chan(dchan);
        struct mmp_pdma_desc_sw *first = NULL, *prev = NULL, *new = NULL;
        size_t len, avail;
        struct scatterlist *sg;
        dma_addr_t addr;
        int i;

        if ((sgl == NULL) || (sg_len == 0))
                return NULL;

        chan->byte_align = true;

        mmp_pdma_config_write(dchan, &chan->slave_config, dir);

        for_each_sg(sgl, sg, sg_len, i) {
                addr = sg_dma_address(sg);
                avail = sg_dma_len(sg);

                do {
                        len = min_t(size_t, avail, PDMA_MAX_DESC_BYTES);
                        if (addr & 0x7)
                                chan->byte_align = true;

                        /* allocate and populate the descriptor */
                        new = mmp_pdma_alloc_descriptor(chan);
                        if (!new) {
                                dev_err(chan->dev, "no memory for desc\n");
                                goto fail;
                        }

                        new->desc.dcmd = chan->dcmd | (DCMD_LENGTH & len);
                        if (dir == DMA_MEM_TO_DEV) {
                                new->desc.dsadr = addr;
                                new->desc.dtadr = chan->dev_addr;
                        } else {
                                new->desc.dsadr = chan->dev_addr;
                                new->desc.dtadr = addr;
                        }

                        if (!first)
                                first = new;
                        else
                                prev->desc.ddadr = new->async_tx.phys; //将下一个描述符的地址,给到上一个描述符的--下一个描述符地址的成员;以实现控制器自动遍历描述符,来传输sg的多个数据块

                        new->async_tx.cookie = 0;
                        async_tx_ack(&new->async_tx);
                        prev = new;

                        /* Insert the link descriptor to the LD ring */
                        list_add_tail(&new->node, &first->tx_list);

                        /* update metadata */
                        addr += len;
                        avail -= len;
                } while (avail);
        }

        first->async_tx.cookie = -EBUSY;
        first->async_tx.flags = flags;

        /* last desc and fire IRQ */
        new->desc.ddadr = DDADR_STOP;
        new->desc.dcmd |= DCMD_ENDIRQEN;

        chan->dir = dir;
        chan->cyclic_first = NULL;

        return &first->async_tx;

fail:
        if (first)
                mmp_pdma_free_desc_list(chan, &first->tx_list);
        return NULL;
}
相关推荐
sukalot8 小时前
window显示驱动开发—为头装载和专用监视器生成自定义合成器应用(二)
驱动开发
zwhSunday9 小时前
Linux驱动开发(1)概念、环境与代码框架
linux·运维·驱动开发
sukalot20 小时前
window显示驱动开发—为头装载和专用监视器生成自定义合成器应用(三)
驱动开发
sukalot1 天前
window显示驱动开发—为头装载和专用监视器生成自定义合成器应用(一)
驱动开发
cxr8282 天前
基于Claude Code的 规范驱动开发(SDD)指南
人工智能·hive·驱动开发·敏捷流程·智能体
zwhSunday3 天前
Linux驱动开发(2)进一步理解驱动
linux·驱动开发
被遗忘的旋律.3 天前
Linux驱动开发笔记(十)——中断
linux·驱动开发·笔记
路溪非溪3 天前
Linux驱动如何向应用层提供sysfs操作接口
linux·arm开发·驱动开发
sukalot4 天前
window显示驱动开发—监视筛选器驱动程序(三)
驱动开发
墨染天姬4 天前
【android 驱动开发九】生产者-消费者模型
android·驱动开发