Redis源码研读八——listpack.c 1080-1528行

一、listpack合并

c 复制代码
unsigned char *lpMerge(unsigned char **first, unsigned char **second) {
    /* If any params are null, we can't merge, so NULL. */
    if (first == NULL || *first == NULL || second == NULL || *second == NULL)
        return NULL;

    /* Can't merge same list into itself. */
    if (*first == *second)
        return NULL;

    size_t first_bytes = lpBytes(*first);
    unsigned long first_len = lpLength(*first);

    size_t second_bytes = lpBytes(*second);
    unsigned long second_len = lpLength(*second);

    int append;
    unsigned char *source, *target;
    size_t target_bytes, source_bytes;
    /* Pick the largest listpack so we can resize easily in-place.
     * We must also track if we are now appending or prepending to
     * the target listpack. */
    if (first_bytes >= second_bytes) {
        /* retain first, append second to first. */
        target = *first;
        target_bytes = first_bytes;
        source = *second;
        source_bytes = second_bytes;
        append = 1;
    } else {
        /* else, retain second, prepend first to second. */
        target = *second;
        target_bytes = second_bytes;
        source = *first;
        source_bytes = first_bytes;
        append = 0;
    }

    /* Calculate final bytes (subtract one pair of metadata) */
    unsigned long long lpbytes = (unsigned long long)first_bytes + second_bytes - LP_HDR_SIZE - 1;
    assert(lpbytes < UINT32_MAX); /* larger values can't be stored */
    unsigned long lplength = first_len + second_len;

    /* Combined lp length should be limited within UINT16_MAX */
    lplength = lplength < UINT16_MAX ? lplength : UINT16_MAX;

    /* Extend target to new lpbytes then append or prepend source. */
    target = zrealloc(target, lpbytes);
    if (append) {
        /* append == appending to target */
        /* Copy source after target (copying over original [END]):
         *   [TARGET - END, SOURCE - HEADER] */
        memcpy(target + target_bytes - 1,
               source + LP_HDR_SIZE,
               source_bytes - LP_HDR_SIZE);
    } else {
        /* !append == prepending to target */
        /* Move target *contents* exactly size of (source - [END]),
         * then copy source into vacated space (source - [END]):
         *   [SOURCE - END, TARGET - HEADER] */
        memmove(target + source_bytes - 1,
                target + LP_HDR_SIZE,
                target_bytes - LP_HDR_SIZE);
        memcpy(target, source, source_bytes - 1);
    }

    lpSetNumElements(target, lplength);
    lpSetTotalBytes(target, lpbytes);

    /* Now free and NULL out what we didn't realloc */
    if (append) {
        zfree(*second);
        *second = NULL;
        *first = target;
    } else {
        zfree(*first);
        *first = NULL;
        *second = target;
    }

    return target;
}

/* Return the total number of bytes the listpack is composed of. */
size_t lpBytes(unsigned char *lp) {
    return lpGetTotalBytes(lp);
}

和ziplist一样,采用内存复制的方式实现。

二、listpack索引搜索

c 复制代码
unsigned char *lpSeek(unsigned char *lp, long index) {
    int forward = 1; /* Seek forward by default. */

    /* We want to seek from left to right or the other way around
     * depending on the listpack length and the element position.
     * However if the listpack length cannot be obtained in constant time,
     * we always seek from left to right. */
    uint32_t numele = lpGetNumElements(lp);
    if (numele != LP_HDR_NUMELE_UNKNOWN) {
        if (index < 0) index = (long)numele+index;
        if (index < 0) return NULL; /* Index still < 0 means out of range. */
        if (index >= (long)numele) return NULL; /* Out of range the other side. */
        /* We want to scan right-to-left if the element we are looking for
         * is past the half of the listpack. */
        if (index > (long)numele/2) {
            forward = 0;
            /* Right to left scanning always expects a negative index. Convert
             * our index to negative form. */
            index -= numele;
        }
    } else {
        /* If the listpack length is unspecified, for negative indexes we
         * want to always scan right-to-left. */
        if (index < 0) forward = 0;
    }

    /* Forward and backward scanning is trivially based on lpNext()/lpPrev(). */
    if (forward) {
        unsigned char *ele = lpFirst(lp);
        while (index > 0 && ele) {
            ele = lpNext(lp,ele);
            index--;
        }
        return ele;
    } else {
        unsigned char *ele = lpLast(lp);
        while (index < -1 && ele) {
            ele = lpPrev(lp,ele);
            index++;
        }
        return ele;
    }
}

可以看出,listpack不仅支持倒序搜索,并且还有一个优化,即索引在前半部分的时候,采用正序搜索,在后半部分的时候,采用倒序搜索。

三、listpack验证函数

c 复制代码
unsigned char *lpValidateFirst(unsigned char *lp) {
    unsigned char *p = lp + LP_HDR_SIZE; /* Skip the header. */
    if (p[0] == LP_EOF) return NULL;
    return p;
}
int lpValidateNext(unsigned char *lp, unsigned char **pp, size_t lpbytes) {
#define OUT_OF_RANGE(p) ( \
        (p) < lp + LP_HDR_SIZE || \
        (p) > lp + lpbytes - 1)
    unsigned char *p = *pp;
    if (!p)
        return 0;

    /* Before accessing p, make sure it's valid. */
    if (OUT_OF_RANGE(p))
        return 0;

    if (*p == LP_EOF) {
        *pp = NULL;
        return 1;
    }

    /* check that we can read the encoded size */
    uint32_t lenbytes = lpCurrentEncodedSizeBytes(p);
    if (!lenbytes)
        return 0;

    /* make sure the encoded entry length doesn't reach outside the edge of the listpack */
    if (OUT_OF_RANGE(p + lenbytes))
        return 0;

    /* get the entry length and encoded backlen. */
    unsigned long entrylen = lpCurrentEncodedSizeUnsafe(p);
    unsigned long encodedBacklen = lpEncodeBacklen(NULL,entrylen);
    entrylen += encodedBacklen;

    /* make sure the entry doesn't reach outside the edge of the listpack */
    if (OUT_OF_RANGE(p + entrylen))
        return 0;

    /* move to the next entry */
    p += entrylen;

    /* make sure the encoded length at the end patches the one at the beginning. */
    uint64_t prevlen = lpDecodeBacklen(p-1);
    if (prevlen + encodedBacklen != entrylen)
        return 0;

    *pp = p;
    return 1;
#undef OUT_OF_RANGE
}
static inline void lpAssertValidEntry(unsigned char* lp, size_t lpbytes, unsigned char *p) {
    assert(lpValidateNext(lp, &p, lpbytes));
}
int lpValidateIntegrity(unsigned char *lp, size_t size, int deep, 
                        listpackValidateEntryCB entry_cb, void *cb_userdata) {
    /* Check that we can actually read the header. (and EOF) */
    if (size < LP_HDR_SIZE + 1)
        return 0;

    /* Check that the encoded size in the header must match the allocated size. */
    size_t bytes = lpGetTotalBytes(lp);
    if (bytes != size)
        return 0;

    /* The last byte must be the terminator. */
    if (lp[size-1] != LP_EOF)
        return 0;

    if (!deep)
        return 1;

    /* Validate the individual entries. */
    uint32_t count = 0;
    uint32_t numele = lpGetNumElements(lp);
    unsigned char *p = lp + LP_HDR_SIZE;
    while(p && p[0] != LP_EOF) {
        unsigned char *prev = p;

        /* Validate this entry and move to the next entry in advance
         * to avoid callback crash due to corrupt listpack. */
        if (!lpValidateNext(lp, &p, bytes))
            return 0;

        /* Optionally let the caller validate the entry too. */
        if (entry_cb && !entry_cb(prev, numele, cb_userdata))
            return 0;

        count++;
    }

    /* Make sure 'p' really does point to the end of the listpack. */
    if (p != lp + size - 1)
        return 0;

    /* Check that the count in the header is correct */
    if (numele != LP_HDR_NUMELE_UNKNOWN && numele != count)
        return 0;

    return 1;
}

1、lpValidateFirst

功能:验证 listpack 的第一个条目是否有效,并返回指向第一个条目的指针。

2、lpValidateNext

功能:验证当前 listpack 条目是否有效,并移动到下一个条目。

(1)函数内宏定义 OUT_OF_RANGE

局部作用域:宏定义在函数内部,仅在该函数内有效,不会影响其他函数或文件。

减少污染:如果 OUT_OF_RANGE 是一个通用名称(例如描述边界检查的宏),全局定义可能会导致与其他代码中的宏或变量冲突。

3、lpValidateIntegrity

功能:用于验证 listpack 数据结构完整性的函数,支持快速检查(浅验证)和深度检查(深验证)。

(1)深验证和浅验证

浅验证:直接用存储的长度信息和分配的内存进行比较;

深验证:遍历listpack,以节点数量和总长度进行比较。

(2)应用场景

通过分层检查和可选回调机制,平衡了性能与灵活性。其设计强调安全性(如指针范围检查)和可扩展性(如自定义回调),适合在 Redis 或其他存储系统中用于数据持久化或网络传输前的完整性校验。

四、listpack比较函数

c 复制代码
unsigned int lpCompare(unsigned char *p, unsigned char *s, uint32_t slen) {
    unsigned char *value;
    int64_t sz;
    if (p[0] == LP_EOF) return 0;

    value = lpGet(p, &sz, NULL);
    if (value) {
        return (slen == sz) && memcmp(value,s,slen) == 0;
    } else {
        /* We use lpStringToInt64() to get an integer representation of the
         * string 's' and compare it to 'sval', it's much faster than convert
         * integer to string and comparing. */
        int64_t sval;
        if (lpStringToInt64((const char*)s, slen, &sval))
            return sz == sval;
    }

    return 0;
}
static int uintCompare(const void *a, const void *b) {
    return (*(unsigned int *) a - *(unsigned int *) b);
}

将比较字符串和整数整合起来,当前节点存储的内容是字符串时,就会根据传入的指针和长度,直接进行内存比较。否则就要根据给定的长度,先将给定的指针根据长度将字符串数组解析为整数,再进行比较。

1、uintCompare

用于排序的比较函数,在C++里常用bool类型,在C里面则为int类型。

五、listpack随机取值

c 复制代码
typedef struct {
    /* When string is used, it is provided with the length (slen). */
    unsigned char *sval;
    uint32_t slen;
    /* When integer is used, 'sval' is NULL, and lval holds the value. */
    long long lval;
} listpackEntry;
static inline void lpSaveValue(unsigned char *val, unsigned int len, int64_t lval, listpackEntry *dest) {
    dest->sval = val;
    dest->slen = len;
    dest->lval = lval;
}
void lpRandomPair(unsigned char *lp, unsigned long total_count, listpackEntry *key, listpackEntry *val) {
    unsigned char *p;

    /* Avoid div by zero on corrupt listpack */
    assert(total_count);

    /* Generate even numbers, because listpack saved K-V pair */
    int r = (rand() % total_count) * 2;
    assert((p = lpSeek(lp, r)));
    key->sval = lpGetValue(p, &(key->slen), &(key->lval));

    if (!val)
        return;
    assert((p = lpNext(lp, p)));
    val->sval = lpGetValue(p, &(val->slen), &(val->lval));
}
void lpRandomPairs(unsigned char *lp, unsigned int count, listpackEntry *keys, listpackEntry *vals) {
    unsigned char *p, *key, *value;
    unsigned int klen = 0, vlen = 0;
    long long klval = 0, vlval = 0;

    /* Notice: the index member must be first due to the use in uintCompare */
    typedef struct {
        unsigned int index;
        unsigned int order;
    } rand_pick;
    rand_pick *picks = zmalloc(sizeof(rand_pick)*count);
    unsigned int total_size = lpLength(lp)/2;

    /* Avoid div by zero on corrupt listpack */
    assert(total_size);

    /* create a pool of random indexes (some may be duplicate). */
    for (unsigned int i = 0; i < count; i++) {
        picks[i].index = (rand() % total_size) * 2; /* Generate even indexes */
        /* keep track of the order we picked them */
        picks[i].order = i;
    }

    /* sort by indexes. */
    qsort(picks, count, sizeof(rand_pick), uintCompare);

    /* fetch the elements form the listpack into a output array respecting the original order. */
    unsigned int lpindex = picks[0].index, pickindex = 0;
    p = lpSeek(lp, lpindex);
    while (p && pickindex < count) {
        key = lpGetValue(p, &klen, &klval);
        assert((p = lpNext(lp, p)));
        value = lpGetValue(p, &vlen, &vlval);
        while (pickindex < count && lpindex == picks[pickindex].index) {
            int storeorder = picks[pickindex].order;
            lpSaveValue(key, klen, klval, &keys[storeorder]);
            if (vals)
                lpSaveValue(value, vlen, vlval, &vals[storeorder]);
             pickindex++;
        }
        lpindex += 2;
        p = lpNext(lp, p);
    }

    zfree(picks);
}
unsigned int lpRandomPairsUnique(unsigned char *lp, unsigned int count, listpackEntry *keys, listpackEntry *vals) {
    unsigned char *p, *key;
    unsigned int klen = 0;
    long long klval = 0;
    unsigned int total_size = lpLength(lp)/2;
    unsigned int index = 0;
    if (count > total_size)
        count = total_size;

    /* To only iterate once, every time we try to pick a member, the probability
     * we pick it is the quotient of the count left we want to pick and the
     * count still we haven't visited in the dict, this way, we could make every
     * member be equally picked.*/
    p = lpFirst(lp);
    unsigned int picked = 0, remaining = count;
    while (picked < count && p) {
        double randomDouble = ((double)rand()) / RAND_MAX;
        double threshold = ((double)remaining) / (total_size - index);
        if (randomDouble <= threshold) {
            key = lpGetValue(p, &klen, &klval);
            lpSaveValue(key, klen, klval, &keys[picked]);
            assert((p = lpNext(lp, p)));
            if (vals) {
                key = lpGetValue(p, &klen, &klval);
                lpSaveValue(key, klen, klval, &vals[picked]);
            }
            remaining--;
            picked++;
        } else {
            assert((p = lpNext(lp, p)));
        }
        p = lpNext(lp, p);
        index++;
    }
    return picked;
}

1、lpSaveValue

用于将数据保存到一个 listpackEntry 结构体中。它的作用是 高效地存储字符串和整数值。而从listpackEntry结构体的说明不难看出,使用这类结构体,要么存储字符串,要么存储整数,就会形成内存浪费。

2、listpackEntry结构体

(1) 简化操作,避免分支判断

如果分开存储字符串和整数(例如用 union),每次访问都需要检查当前存储的类型(增加分支判断)。

Redis 的 listpack 常用于高频访问的场景(如 Redis 的 Stream、List 类型),减少分支预测失败对性能更重要。

(2) 内存浪费是可控的

listpackEntry 是一个 临时结构体,仅在解析或操作 listpack 时使用(例如 lpGet 函数读取条目时会填充它)。

实际存储时,listpack 的二进制格式是紧凑的:

字符串和整数是分开编码的(通过头部标识类型)。

持久化存储时不会保留未使用的字段(lval 或 sval)。

(3) 对比 ziplist 的优化

旧版 ziplist 会将字符串和整数混合存储在同一个二进制流中,但解析时需要动态判断类型。

listpack 进一步优化了这一点:每个条目独立存储类型信息,而 listpackEntry 只是解析时的中间表示。

3、lpRandomPair、lpRandomPairs和lpRandomPairsUnique

仅仅是建立在不同的的索引数组上,其余操作大多相同。

六、debug所用

c 复制代码
void lpRepr(unsigned char *lp) {
    unsigned char *p, *vstr;
    int64_t vlen;
    unsigned char intbuf[LP_INTBUF_SIZE];
    int index = 0;

    printf("{total bytes %zu} {num entries %lu}\n", lpBytes(lp), lpLength(lp));
        
    p = lpFirst(lp);
    while(p) {
        uint32_t encoded_size_bytes = lpCurrentEncodedSizeBytes(p);
        uint32_t encoded_size = lpCurrentEncodedSizeUnsafe(p);
        unsigned long back_len = lpEncodeBacklen(NULL, encoded_size);
        printf(
            "{\n"
                "\taddr: 0x%08lx,\n"
                "\tindex: %2d,\n"
                "\toffset: %1lu,\n"
                "\thdr+entrylen+backlen: %2lu,\n"
                "\thdrlen: %3u,\n"
                "\tbacklen: %2lu,\n"
                "\tpayload: %1u\n",
            (long unsigned)p,
            index,
            (unsigned long) (p-lp),
            encoded_size + back_len,
            encoded_size_bytes,
            back_len,
            encoded_size - encoded_size_bytes);
        printf("\tbytes: ");
        for (unsigned int i = 0; i < (encoded_size + back_len); i++) {
            printf("%02x|",p[i]);
        }
        printf("\n");

        vstr = lpGet(p, &vlen, intbuf);
        printf("\t[str]");
        if (vlen > 40) {
            if (fwrite(vstr, 40, 1, stdout) == 0) perror("fwrite");
            printf("...");
        } else {
            if (fwrite(vstr, vlen, 1, stdout) == 0) perror("fwrite");
        }
        printf("\n}\n");
        index++;
        p = lpNext(lp, p);
    }
    printf("{end}\n\n");
}

七、其他内容

c 复制代码
#ifdef REDIS_TEST
/*用于调试的版本*/
#endif

同样因为没有作者所需的内容,故跳过。

八、listpack和ziplist的区别

特性 Ziplist Listpack 胜出方
内存效率 高,但存在级联更新开销 更高,编码更灵活 Listpack
性能稳定性 级联更新导致性能波动 无级联更新,性能稳定 Listpack
编码复杂度 较复杂(prevlen 动态调整) 更简单(backlen 固定) Listpack
反向遍历效率 依赖 prevlen,略低 通过 backlen 快速定位,相当 相当
Redis 版本 ≤6.x 主用 7.0+ 完全取代 Listpack

代码分析方面:

1、ziplist需级联,而listpack不需要的原因

(1)Ziplist 的每个元素包含三部分:[prevlen][encoding][data]

prevlen:记录前一个元素的长度(1 或 5 字节)。

如果前一个元素长度 < 254,prevlen 用 1 字节存储。

如果前一个元素长度 ≥ 254,prevlen 用 5 字节存储(1 字节标记 0xFE + 4 字节实际长度)。

encoding:动态编码,记录当前元素的数据类型和长度。

data:实际存储的数据(字符串或整数)。

当在 Ziplist 中插入或修改元素时,若导致后续元素的 prevlen 需要扩容(例如从 1 字节变为 5 字节),则会触发连锁反应:

(1)修改当前元素:例如插入一个长字符串,导致后续元素的 prevlen 需要从 1 字节扩容到 5 字节。

(2)后续元素后移:由于 prevlen 占用空间变大,后续所有元素需要向后移动 4 字节(5 - 1)。

(3)递归检查:移动后的元素可能又需要更新自己的 prevlen(如果它前面的元素长度也发生了变化),导致新一轮的移动。

(4)最坏情况:若所有元素的 prevlen 都需要扩容,时间复杂度会从 O(n) 退化为 O(n²),性能急剧下降。

(2)Listpack 的每个元素包含三部分:[encoding][data][backlen]

encoding:记录数据类型和长度(支持更灵活的编码,如小整数直接嵌入编码)。

data:实际存储的数据。

backlen:记录当前元素的总长度(包括 encoding 和 data),采用变长编码(1~5 字节)。

backlen 仅用于反向遍历时快速定位前一个元素,不参与前驱长度的计算。

反向遍历机制:

Listpack 头部维护一个 total-bytes 字段,记录整个 Listpack 的总长度。

反向遍历时,通过 current_element_start + backlen 快速定位下一个元素的起始位置(无需依赖前驱的 prevlen)。

插入/修改元素:

修改当前元素的 data 或 encoding 时,仅需更新其 backlen,不会影响其他元素。

即使 backlen 需要扩容(如从 1 字节变为 5 字节),也仅影响当前元素的空间占用,后续元素无需移动。

2、理解prevlen是"动态的",而backlen是"固定的"。

(1)backlen:

Listpack 中每个元素末尾存储的字段,表示当前元素的总长度(包括 encoding、data 和 backlen 自身)。
固定特性:backlen 的编码方式是变长但有明确规则(1~5 字节),但它的值是当前元素长度的精确反映,因此可以视为"固定"于当前元素的结构中。

(2)prevlen:

在 Redis 的 Ziplist 或其他类似结构中,prevlen 表示前一个元素的长度,用于正向遍历时跳转到下一个元素。
动态特性:prevlen 的长度可能因前一个元素的大小而变化(例如,前一个元素很短时用 1 字节,很长时用 5 字节),因此其值是"动态"的。

3、listpack和ziplist的插入函数对比

ziplist的插入必要调用__ziplistCascadeUpdate进行级联操作,而listpack的插入函数与删除和替换函数进行了合并。

相关推荐
隔壁小邓2 小时前
TIDB分布式数据库
数据库·分布式·tidb
wellc2 小时前
redis连接服务
数据库·redis·bootstrap
隔叶听风2 小时前
RocketMQ 与 Kafka 长轮询详解
数据库·kafka·rocketmq
袋鼠云数栈2 小时前
构建金融级数据防线:数栈 DataAPI 的全生命周期管理实践
java·大数据·数据库·人工智能·api
知识分享小能手2 小时前
PostgreSQL 入门学习教程,从入门到精通,PostgreSQL 16 数据备份与还原详解 —语法、案例与实战(16)
数据库·学习·postgresql
四谎真好看2 小时前
Redis学习笔记(实战篇1)
redis·笔记·学习·学习笔记
串口哑火达人2 小时前
(七)RT-Thread物联网实战--MQTT-cJSON-OneNET
c语言·单片机·嵌入式硬件·mcu·物联网
yc_xym2 小时前
Redis哨兵(Sentinel)机制
数据库·redis·sentinel
数据知道2 小时前
MongoDB复制集部署实战:三节点集群搭建完整步骤详解
数据库·mongodb