【CPP】数据结构

#1- Arrays

Arrays

A contiguously allocated memory
Fixed number of objects (The array size cannot be changed)
Its element type can be any fundamental type (int, float, bool), structure, class, pointers enumeration

cpp 复制代码

# array.cpp
#include <iostream>
using namespace std;
int main()
{
    int num_array1[5]; //uninitialized array, random values 
    int num_array2[5] = {0, 1, 2, 3, 4}; //initialization

    for(int idx = 0; idx < 5; idx++)
        cout << num_array1[idx] << " ";
    cout << endl;

    for(int idx = 0; idx < 5; idx++)
        cout << num_array2[idx] << " ";
    cout << endl;

    return 0;
}

Variable-length arrays

If the length is not an integer constant expression, the array will be a variable-length one.

cpp 复制代码

#variable-array.cpp
#include <iostream>
using namespace std;
int main()
{
    int num_array1[5] = {0,1}; // fixed length array, initialized to {0,1,0,0,0}  
    cout << "sizeof(num_array1) = " << sizeof(num_array1) << endl;

    int len = 0;
    while ( len < 10 )
    {
        int num_array2[len]; //variable-length array
        cout << "len = " << len;
        cout << ", sizeof(num_array2)) = " << sizeof(num_array2) << endl;
        len ++;
    }
}

Arrays of unknown size

The number is not specified in the declaration
The arguments of a function

cpp 复制代码

#unknown-size-array .cpp
#include <iostream>
using namespace std;

// float array_sum(float *values, size_t length) // equivalent to the next line
float array_sum(float values[], size_t length)
{
    cout << sizeof(values) << endl; // 4 or 8
    float sum = 0.0f;
    for (int i = 0; i < length; i++)
    {
        sum += values[i];
    }
    return sum;
}

int main()
{
    float values[4] = {1.1f, 2.2f, 3.3f, 4.4f};
    float sum = array_sum(values, 4);

    cout << "sum = " << sum << endl;
    return 0;
}

Element accessing

cpp 复制代码

int array1[4] = {9, 8, 7, 6};
int array2[4];
array2 = array1;  // error!
array2[0] = array1[0];  // okay
array2[1] = array1[1];
array2[2] = array1[2];
array2[3] = array1[3];

No bounds-checking in C/C++

cpp 复制代码

# index-bound.cpp
#include <iostream>
using namespace std;
int main()
{
    int num_array[5]; 

    for(int idx = -1; idx <= 5; idx++) //out of bounds
        num_array[idx] = idx * idx;

    for(int idx = -1; idx <= 5; idx++) //out of bounds
        cout << "num_array[" << idx << "] = " << num_array[idx] << endl;

    return 0;
}

超出边界也没事，直接取越界的地址

Arrays are not objects in C/C++ (different with Java)
Arrays can be regarded as address

优点是数据读写很快

Multidimensional arrays

cpp 复制代码

#md-array.cpp
#include <iostream>
using namespace std;

// You must tell the function the bound of an array,
// otherwise, elements cannot be accessed
// if the array is a variable-length one, it may be difficult to know the bound
void init_2d_array(float mat[][4], //error, arrays of unknown bound
                    size_t rows, size_t cols)
{
    for (int r = 0; r < rows; r++)
        for(int c = 0; c < cols; c++)
            mat[r][c] =  r * c;
}

int main()
{
    int mat1[2][3] = {{11,12,13}, {14,15,16}};

    int rows = 5;
    int cols = 4;
    //float mat2[rows][cols]; //uninitialized array
    float mat2[rows][4]; //uninitialized array

    //init_2d_array(mat2, rows, cols);

    for (int r = 0; r < rows; r++)
        for(int c = 0; c < cols; c++)
            mat2[r][c] =  r * c;


    for (int r = 0; r < rows; r++)
    {
        for(int c = 0; c < cols; c++)
            cout << mat2[r][c] << " ";
        cout << endl;
    }
    return 0;
}

const Arrays

Used as function arguments

cpp 复制代码

#const-array
#include <iostream>
using namespace std;

//float array_sum(const float *values, size_t length)
//float array_sum(const float values[4], size_t length)
float array_sum(const float values[], size_t length)
{
    float sum = 0.0f;
    for (int i = 0; i < length; i++)
    {
        sum += values[i];
        //values[i] = 0; //error
    }
    return sum;
}

int main()
{
    // const float PI = 3.1415926f;
    // PI += 1.f; // error
    // const float values[4] = {1.1f, 2.2f, 3.3f, 4.4f};
    // values[0] = 1.0f; // error

    float values[4] = {1.1f, 2.2f, 3.3f, 4.4f};
    float sum = array_sum(values, 4);

    cout << "sum = " << sum << endl;
    return 0;
}

外部是普通数组，进入函数后变成const不能进行修改，可以放心使用

2- Strings

Array-style strings

An array-style string (null-terminated strings/arrays of characters) is a series of characters stored in bytes in memory
This kind of strings can be declared as follows

cpp 复制代码

#initchar.cpp
#include <iostream>
#include <cstring>
using namespace std;

int main()
{
    char rabbit[16] = {'P', 'e', 't', 'e', 'r'};
    cout << "String length is " << strlen(rabbit) << endl;
    for(int i = 0; i < 16; i++)
        cout << i << ":" << +rabbit[i] << "(" << rabbit[i] << ")" << endl;

    char bad_pig[9] = {'P', 'e', 'p', 'p', 'a', ' ', 'P', 'i', 'g'};
    char good_pig[10] = {'P', 'e', 'p', 'p', 'a', ' ', 'P', 'i', 'g', '\0'};

    cout << "Rabbit is (" << rabbit << ")" << endl;
    cout << "Pig's bad name is (" << bad_pig << ")" << endl; 
    cout << "Pig's good name is (" << good_pig << ")" << endl; 

    char name[10] = {'Y', 'u', '\0', 'S', '.', '0'};
    cout << "(" << name << ")" << endl;
    cout << strlen(name) << endl;

    return 0;
}

字符串的结束标志："\0"，碰到就结束

strlen(): 碰到"\0" 才结束计算长度，所以要多存一个字符'\0', 否则会越界

String literals

it isn't convenient to initial a string character by character
String literals can help

cpp 复制代码

char name1[] = "Southern University of Science and Technology";
char name2[] = "Southern University of "  "Science and Technology";
char name3[] = "ABCD";   // how many bytes for the array?

const wchar_t[] s5 = L"ABCD";
const char16_t[] s9 = u"ABCD";  // since C++11
const char32_t[] s6 = U"ABCD";  // since C++11

String manipulation and examination

Copy

cpp 复制代码

char * strcpy(char* dest, const char * src);

strcpy 对于长度不够的字符串会在后面字符串后面字节拷贝，越界不安全

Safer one:

cpp 复制代码

char *strncpy(char *dest, const char *src, size_t count);

count: dest 和 src的较小值

Concatenate: appends a copy of src to dest

cpp 复制代码

char *strcat(char *dest, const char *src);

Compare

cpp 复制代码

int strcmp(const char *lhs, const char *rhs);

cpp 复制代码

# stringop.cpp
#include <iostream>
#include <cstring>
using namespace std;

int main()
{
    char str1[] = "Hello, \0CPP";
    char str2[] = "SUSTech";
    char result[128];

    for(int i = 0; i < 16; i++)
        cout << i << ":" << +str1[i] << "(" << str1[i] << ")" << endl;

    strcpy(result, str1);
    cout << "Result = " <<  result << endl;
    strcat(result, str2);
    cout << "Result = " <<  result << endl;

    cout << "strcmp() = " << strcmp(str1, str2) << endl;

    //strcat(str1, str2); //danger operation!
    //cout << "str1 = " << str1 << endl;
}

String class

Null-terminated strings are easy to be out of bound, and to cause problems.
string class provides functions to manipulate and examine strings.

cpp 复制代码

#stdstring.cpp
#include <iostream>
#include <string>

using namespace std;
int main()
{
    std::string str1 = "Hello";
    std::string str2 = "SUSTech";
    std::string result = str1 + ", " + str2;

    cout << "result = " + result << endl;

    cout << "The length is " << result.length() << endl;

    cout << "str1 < str2 is " << (str1 < str2) << endl;

    cout << "str1[100] = (" << str1[100] << ")" << endl;

    return 0;
}

Different types of strings

cpp 复制代码

std::string
std::wstring
std::u8string // C++20
std::u16string  // C++11
std::u32string   // C++11

string类也是没有越界检查

3- Structures Unions and Enumerations

struct

A struct is a type consisting of a sequence of membes
The members are allocated in an orders sequence.

cpp 复制代码

# struct.c
#include <stdio.h>
#include <string.h>
#include <stdbool.h>

// typedef 
// struct _Student{
//     char name[4];
//     int born;
//     bool male; 
// } Student;

struct Student{
    char name[4];
    int born;
    bool male; 
};
int main()
{
    struct Student stu = {"Yu", 2000, true}; //initialization
    // strcpy(stu.name, "Yu");
    // stu.born = 2000;
    // stu.male = true;

    printf("Student %s, born in %d, gender %s\n", 
        stu.name, 
        stu.born, 
        stu.male ? "male" : "female");

    struct Student students[100];
    students[50].born = 2002; 

    return 0;
}

在C编译器中，声明struct变量一定要在前面加struct，否则编译有错

但在C++编译器中，即使不在变量声明前加struct也不会报错

在C编译器中，除非在struct定义时加上typedef，这样的话就可以在声明时不加struct，这是因为你定义了一个新的类型

cpp 复制代码

typedef 
struct _Student{
    char name[4];
    int born;
    bool male; 
} Student;

Structure padding

In order to align the data in memory, some empty bytes will be padded

cpp 复制代码

# structpadding.cpp
#include <iostream>
using namespace std;

struct Student1{
    int id;
    bool male;
    char label;
    float weight;
};

struct Student2{
    int id;
    bool male;
    float weight;
    char label;
};

int main()
{
    cout << "Size of Student1: " << sizeof(Student1) << endl;
    cout << "Size of Student2: " << sizeof(Student2) << endl;
    return 0;
}

绝对0地址（变量开始的地址）一般是4的倍数或8的倍数

如果8对齐，可以直接装载到寄存器里

如果跨过8，则一次可能装载不完，需要装载两次

所以宁可牺牲两个，也往上放一放

struct in C and C++

struct and class in C++ are identical except for several features
No typedef needed in C++

Union

union declaration is similar to struct declaration
The storage of members overlaps/shared
sizeof(union ipv4address) is 4

cpp 复制代码

#union.cpp
#include <iostream>
using namespace std;

union ipv4address{
    std::uint32_t address32;
    std::uint8_t address8[4];
};

int main()
{
    union ipv4address ip;

    cout << "sizeof(ip) = " << sizeof(ip) << endl;

    ip.address8[3] = 127;
    ip.address8[2] = 0;
    ip.address8[1] = 0;
    ip.address8[0] = 1;

    cout << "The address is " ;
    cout << +ip.address8[3] << ".";
    cout << +ip.address8[2] << ".";
    cout << +ip.address8[1] << ".";
    cout << +ip.address8[0] << endl;

    cout << std::hex;
    cout << "in hex " << ip.address32 << endl;

    return 0;
}

enum

enum makes a new type
It provides an alternative to const for creating symbolic constantt
Its members are integers, but they cannot be operands in arithmetic expressions

cpp 复制代码

enum color {WHITE, BLACK, RED, GREEN, BLUE, YELLOW, NUM_COLORS};
enum color pen_color = RED;
pen_color = color(3); //convert int to enum
cout << "We have " << NUM_COLORS << " pens." << endl;
//pen_color += 1; //error!
int color_index = pen_color;
color_index += 1;
cout << "color_index = " << color_index << endl;

An example with struct, union and enum

cpp 复制代码

#enum.cpp
#include <iostream>
using namespace std;

enum color {WHITE, BLACK, RED, GREEN, BLUE, YELLOW, NUM_COLORS};
enum datatype {TYPE_INT8=1, TYPE_INT16=2, TYPE_INT32=4, TYPE_INT64=8};

struct Point{
    enum datatype type;
    union {
        std::int8_t data8[3];
        std::int16_t data16[3];
        std::int32_t data32[3];
        std::int64_t data64[3];
    };
};

size_t datawidth(struct Point pt)
{
    return size_t(pt.type) * 3;
}

int64_t l1norm(struct Point pt)
{
    int64_t result = 0;
    switch(pt.type)
    {
        case (TYPE_INT8): 
            result = abs(pt.data8[0]) + abs(pt.data8[1]) + abs(pt.data8[2]);
            break;
        case (TYPE_INT16): 
            result = abs(pt.data16[0]) + abs(pt.data16[1]) + abs(pt.data16[2]);
            break;
        case (TYPE_INT32): 
            result = abs(pt.data32[0]) + abs(pt.data32[1]) + abs(pt.data32[2]);
            break;
        case (TYPE_INT64): 
            result = abs(pt.data64[0]) + abs(pt.data64[1]) + abs(pt.data64[2]);
            break;
    }
    return result;
}

int main()
{
    enum color pen_color = RED;
    pen_color = color(3); //convert int to enum
    cout << "We have " << NUM_COLORS << " pens." << endl;
    //pen_color += 1; //error!
    int color_index = pen_color;
    color_index += 1;
    cout << "color_index = " << color_index << endl;

    //declaration and initialization
    struct Point point1 = {.type=TYPE_INT8, .data8={-2,3,4}};
    struct Point point2 = {.type=TYPE_INT32, .data32={1,-2,3}};
    
    cout << "Data width = " << datawidth(point1) << endl;
    cout << "Data width = " << datawidth(point2) << endl;

    cout << "L1 norm = " << l1norm(point1) << endl;
    cout << "L1 norm = " << l1norm(point2) << endl;


    return 0;
}

4-typedef

typedef

typedef can create an alias for a type
It can be used to replace a possibly complex type name.

cpp 复制代码

#typedef.cpp
#include <iostream>
using namespace std;

typedef int myint;
typedef unsigned char vec3b[3];
typedef struct _rgb_struct{ // name _rgb_struct can be omit
    unsigned char r;
    unsigned char g;
    unsigned char b;
}  rgb_struct;

int main()
{
    myint num = 32;

    // the following two lines are identical
    //unsigned char color[3] = {255, 0, 255};
    vec3b color = {255, 0, 255}; 
    cout << hex;
    cout << "R=" << +color[0] << ", ";
    cout << "G=" << +color[1] << ", ";
    cout << "B=" << +color[2] << endl;

    rgb_struct rgb = {0, 255, 128};
    cout << "R=" << +rgb.r << ", ";
    cout << "G=" << +rgb.g << ", ";
    cout << "B=" << +rgb.b << endl;

    cout << sizeof(rgb.r) << endl;
    cout << sizeof(+rgb.r) << endl; //why 4?

    return 0;
}

Typical typedef usages

cpp 复制代码

#_uint8_t.h
#ifndef _UINT8_T
#define _UINT8_T
typedef unsigned char uint8_t;
#endif   /* _UINT8_T. */

cpp 复制代码

# if defined (_LP64)
typedef int wchar_t;
#else
typedef long wchar_t;
#endif