RGB转换为NV12，查表式算法

原来代码
https://blog.csdn.net/quantum7/article/details/82627937
查表式算法
这里的算法，没有使用VALID_COLOR判断。严重影响性能。
循环体中，也尽可能去掉了位置的计算。
复制代码
#define RGB_2_Y_INT(R, G, B)        VALID_COLOR((( 66*R + 129*G +  25*B) >> 8) +  16)
#define RGB_2_Y_TABLE(R, G, B)      (((BGR_Y_R_TABLE[R] + \
                                                  BGR_Y_G_TABLE[G] +  \
                                                  BGR_Y_B_TABLE[B]) \
                                                  >> 8) +  16)

#define RGB_2_U_INT(R, G, B)        VALID_COLOR(((-38*R -  74*G + 112*B) >> 8) + 128)
#define RGB_2_U_TABLE(R, G, B)      (((BGR_U_R_TABLE[R] + \
                                                  BGR_U_G_TABLE[G] +  \
                                                  BGR_U_B_TABLE[B]) \
                                                  >> 8) +  128)

#define RGB_2_V_INT(R, G, B)        VALID_COLOR(((112*R -  94*G -  18*B) >> 8) + 128)
#define RGB_2_V_TABLE(R, G, B)      (((BGR_V_R_TABLE[R] + \
                                                  BGR_V_G_TABLE[G] +  \
                                                  BGR_V_B_TABLE[B]) \
                                                  >> 8) +  128)

static bool BGR_TO_YUV = false;
static int  BGR_Y_R_TABLE[256] = {0};
static int  BGR_Y_G_TABLE[256] = {0};
static int  BGR_Y_B_TABLE[256] = {0};

static int  BGR_U_R_TABLE[256] = {0};
static int  BGR_U_G_TABLE[256] = {0};
static int  BGR_U_B_TABLE[256] = {0};

static int  BGR_V_R_TABLE[256] = {0};
static int  BGR_V_G_TABLE[256] = {0};
static int  BGR_V_B_TABLE[256] = {0};


static void RGB2NV12_Init()
{
    for (int i=0; i<256; i++)
    {
        BGR_Y_B_TABLE[i] =  25*i;
        BGR_Y_G_TABLE[i] = 129*i;
        BGR_Y_R_TABLE[i] =  66*i;

        BGR_U_B_TABLE[i] = 112*i;
        BGR_U_G_TABLE[i] = -74*i;
        BGR_U_R_TABLE[i] = -38*i;

        BGR_V_B_TABLE[i] = -18*i;
        BGR_V_G_TABLE[i] = -94*i;
        BGR_V_R_TABLE[i] = 112*i;
    }

}

//Convert two rows from RGB to two Y rows, and one row of interleaved U,V.
//I0 and I1 points two sequential source rows.
//I0 -> rgbrgbrgbrgbrgbrgb...
//I1 -> rgbrgbrgbrgbrgbrgb...
//Y0 and Y1 points two sequential destination rows of Y plane.
//Y0 -> yyyyyy
//Y1 -> yyyyyy
//UV0 points destination rows of interleaved UV plane.
//UV0 -> uvuvuv
static void RGB2NV12_TwoRows(const unsigned char I0[],
                            const unsigned char I1[],
                            int step,
                            const int image_width,
                            unsigned char Y0[],
                            unsigned char Y1[],
                            unsigned char UV0[])
{
    int x;  //Column index

    int xoffset0 = 0;
    int xoffset1 = 0;
    //Process 4 source pixels per iteration (2 pixels of row I0 and 2 pixels of row I1).
    for (x = 0; x < image_width; x += 2)
    {
        xoffset1  = xoffset0 + step;

        //Load R,G,B elements from first row (and convert to float).
        int b00 = I0[xoffset0 + COLOR_B_INDEX];
        int g00 = I0[xoffset0 + COLOR_G_INDEX];
        int r00 = I0[xoffset0 + COLOR_R_INDEX];

        //Load next R,G,B elements from first row (and convert to float).
        int b01 = I0[xoffset1 + COLOR_B_INDEX];
        int g01 = I0[xoffset1 + COLOR_G_INDEX];
        int r01 = I0[xoffset1 + COLOR_R_INDEX];

        //Load R,G,B elements from second row (and convert to float).
        int b10 = I1[xoffset0 + COLOR_B_INDEX];
        int g10 = I1[xoffset0 + COLOR_G_INDEX];
        int r10 = I1[xoffset0 + COLOR_R_INDEX];

        //Load next R,G,B elements from second row (and convert to float).
        int b11 = I1[xoffset1 + COLOR_B_INDEX];
        int g11 = I1[xoffset1 + COLOR_G_INDEX];
        int r11 = I1[xoffset1 + COLOR_R_INDEX];
#if 1
        int y00 = RGB_2_Y_TABLE(r00, g00, b00);
        int y01 = RGB_2_Y_TABLE(r01, g01, b01);
        int y10 = RGB_2_Y_TABLE(r10, g10, b10);
        int y11 = RGB_2_Y_TABLE(r11, g11, b11);

        //Calculate 4 U elements.
        int u00 = RGB_2_U_TABLE(r00, g00, b00);
        int u01 = RGB_2_U_TABLE(r01, g01, b01);
        int u10 = RGB_2_U_TABLE(r10, g10, b10);
        int u11 = RGB_2_U_TABLE(r11, g11, b11);

        //Calculate 4 V elements.
        int v00 = RGB_2_V_TABLE(r00, g00, b00);
        int v01 = RGB_2_V_TABLE(r01, g01, b01);
        int v10 = RGB_2_V_TABLE(r10, g10, b10);
        int v11 = RGB_2_V_TABLE(r11, g11, b11);
#else
        int y00 = RGB_2_Y_INT(r00, g00, b00);
        int y01 = RGB_2_Y_INT(r01, g01, b01);
        int y10 = RGB_2_Y_INT(r10, g10, b10);
        int y11 = RGB_2_Y_INT(r11, g11, b11);

        //Calculate 4 U elements.
        int u00 = RGB_2_U_INT(r00, g00, b00);
        int u01 = RGB_2_U_INT(r01, g01, b01);
        int u10 = RGB_2_U_INT(r10, g10, b10);
        int u11 = RGB_2_U_INT(r11, g11, b11);

        //Calculate 4 V elements.
        int v00 = RGB_2_V_INT(r00, g00, b00);
        int v01 = RGB_2_V_INT(r01, g01, b01);
        int v10 = RGB_2_V_INT(r10, g10, b10);
        int v11 = RGB_2_V_INT(r11, g11, b11);
#endif
        //Calculate destination U element: average of 2x2 "original" U elements.
        int u0  = (u00 + u01 + u10 + u11) >> 2;

        //Calculate destination V element: average of 2x2 "original" V elements.
        int v0  = (v00 + v01 + v10 + v11) >> 2;

        //Store 4 Y elements (two in first row and two in second row).
        Y0[x + 0]    = (unsigned char)y00;
        Y0[x + 1]    = (unsigned char)y01;
        Y1[x + 0]    = (unsigned char)y10;
        Y1[x + 1]    = (unsigned char)y11;

        //Store destination U element.
        UV0[x + 0]   = (unsigned char)u0;

        //Store destination V element (next to stored U element).
        UV0[x + 1]   = (unsigned char)v0;
        
        xoffset0 += step << 1;
    }
}

//Convert image I from pixel ordered RGB to NV12 format.
//I - Input image in pixel ordered RGB format
//image_width - Number of columns of I
//image_height - Number of rows of I
//J - Destination "image" in NV12 format.

//I is pixel ordered RGB color format (size in bytes is image_width*image_height*3):
//RGBRGBRGBRGBRGBRGB
//RGBRGBRGBRGBRGBRGB
//RGBRGBRGBRGBRGBRGB
//RGBRGBRGBRGBRGBRGB
//
//J is in NV12 format (size in bytes is image_width*image_height*3/2):
//YYYYYY
//YYYYYY
//UVUVUV
//Each element of destination U is average of 2x2 "original" U elements
//Each element of destination V is average of 2x2 "original" V elements
//
//Limitations:
//1. image_width must be a multiple of 2.
//2. image_height must be a multiple of 2.
//3. I and J must be two separate arrays (in place computation is not supported). 
void RGB2NV12(const unsigned char I[],
            const int image_width, 
            const int image_height,
            unsigned char J[])
{
    if (!BGR_TO_YUV)
    {
        WY_LOG_HERE();
        BGR_TO_YUV = true;
        RGB2NV12_Init();
    }

    int step = 3;
    //In NV12 format, UV plane starts below Y plane.
    unsigned char *UV = &J[image_width*image_height];

    //I0 and I1 points two sequential source rows.
    const unsigned char *I0;  //I0 -> rgbrgbrgbrgbrgbrgb...
    const unsigned char *I1;  //I1 -> rgbrgbrgbrgbrgbrgb...

    //Y0 and Y1 points two sequential destination rows of Y plane.
    unsigned char *Y0;    //Y0 -> yyyyyy
    unsigned char *Y1;    //Y1 -> yyyyyy

    //UV0 points destination rows of interleaved UV plane.
    unsigned char *UV0; //UV0 -> uvuvuv

    uint y;  //Row index

    uint istep = image_width*step;
    uint iline0  = 0;
    uint iline1  = 0;
    uint yline0  = 0;
    uint yline1  = 0;
    uint uvline  = 0;
    //In each iteration: process two rows of Y plane, and one row of interleaved UV plane.
    for (y = 0; y < image_height; y += 2)
    {
        iline1 = iline0 + istep;
        yline1 = yline0 + image_width;

        I0 = &I[iline0];        //Input row width is image_width*3 bytes (each pixel is R,G,B).
        I1 = &I[iline1];

        Y0 = &J[yline0];            //Output Y row width is image_width bytes (one Y element per pixel).
        Y1 = &J[yline1];

        UV0 = &UV[uvline];    //Output UV row - width is same as Y row width.

        //Process two source rows into: Two Y destination row, and one destination interleaved U,V row.
        RGB2NV12_TwoRows(I0,
                        I1,
                        step,
                        image_width,
                        Y0,
                        Y1,
                        UV0);
        
        iline0  += istep       << 1;
        yline0  += image_width << 1;
        uvline  += image_width;
    }
}