Rust : 数据分析利器polars用法

Polars虽牛刀小试,就显博大精深,在数据分析上,未来有重要一席。

下面主要列举一些常见用法。

一、toml

需要说明的是,在Rust中,不少的功能都需要对应features引入设置,这些需要特别注意,否则编译通不过。

以下polars的版本是0.41.3。

c 复制代码
[package]
name = "my_polars"
version = "0.1.0"
edition = "2021"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
polars = { version = "0.41.3", features = ["lazy","dtype-struct","polars-io","dtype-datetime","dtype-date","range","temporal","rank","serde","csv","ndarray"] }
rand = "0.8.5"
chrono = "0.4.38"
serde_json = "1.0.124"

二、main.rs

部分函数功能还没有完成,用todo标示,请大家注意。

c 复制代码
use polars::prelude::*;
use std::time::Instant;
use serde_json::*;
use chrono::{DateTime, Local, Datelike, NaiveDate};
fn  main(){
    create_df_by_series();
    create_df_by_df_macro();
    // 需要把相关函数放在里面即可,这里不一一列示。
}

fn create_df_by_series(){
    println!("------------- create_df_by_series test ---------------- ");
    let s1 = Series::new("from vec", vec![4, 3, 2]);
    let s2 = Series::new("from slice", &[true, false, true]);
    let s3 = Series::new("from array", ["rust", "go", "julia"]);
    let df = DataFrame::new(vec![s1, s2, s3]).unwrap();
    println!("{:?}", &df);
}

fn create_df_by_df_macro(){
    println!("------------- create_df_by_macro test ---------------- ");
    let df1: DataFrame = df!("D1" => &[1, 3, 1, 5, 6],"D2" => &[3, 2, 3, 5, 3]).unwrap();
    let df2 = df1
                .lazy()
                .select(&[
                    col("D1").count().alias("total"),
                    col("D1").filter(col("D1").gt(lit(2))).count().alias("D1 > 3"),
                ])
                .collect()
                .unwrap();
    println!("{}", df2);
}
fn rank(){
    println!("------------- rank test ---------------- ");
    // 注意:toml => feature : rank
    let mut df = df!(
        "scores" => ["A", "A", "A", "B", "C", "B"],
        "class" => [1, 2, 3, 4, 2, 2]
    ).unwrap();
    
    let mut df = df
        .clone().lazy()
        .with_column(col("class")
            .rank(RankOptions{method: RankMethod::Ordinal, descending: false}, None)
            .over([col("scores")])
            .alias("rank_")
        ).sort_by_exprs([col("scores"), col("class"), col("rank_")], Default::default())
        ;
    
    println!("{:?}", df.collect().unwrap().head(Some(3)));
}
fn head_tail_sort(){
    println!("------------------head_tail_sort test-------------------");
    let  df = df!(
        "scores" => ["A", "B", "C", "B", "A", "B"],
        "class" => [1, 3, 1, 1, 2, 3]
    ).unwrap();
    let head = df.head(Some(3));
    let tail = df.tail(Some(3));
    // 对value列进行sort,生成新的series,并进行排序
    let sort = df.lazy().select([col("value").sort(Default::default())]).collect();
    println!("df head :{:?}",head);
    println!("df tail:{:?}",tail);
    println!("df sort:{:?}",sort);
}
fn filter_group_by_agg(){
    println!("----------filter_group_by_agg test--------------");
    use rand::{thread_rng, Rng};
    let mut arr = [0f64; 5];
    thread_rng().fill(&mut arr);
    let df = df! (
        "nrs" => &[Some(1), Some(2), Some(3), None, Some(5)],
        "names" => &[Some("foo"), Some("ham"), Some("spam"), Some("eggs"), None],
        "random" => &arr,
        "groups" => &["A", "A", "B", "C", "B"],
    ).unwrap();
    let df2 = df.clone().lazy().filter(col("groups").eq(lit("A"))).collect().unwrap();
    println!("df2 :{:?}",df2);
    println!("{}", &df);
    let out = df
    .lazy()
    .group_by([col("groups")])
    .agg([
        sum("nrs"),                           // sum nrs by groups
        col("random").count().alias("count"), // count group members
        // sum random where name != null
        col("random")
            .filter(col("names").is_not_null())
            .sum()
            .name()
            .suffix("_sum"),
        col("names").reverse().alias("reversed names"),
    ])
    .collect().unwrap();
    println!("{}", out);

}
fn filter_by_exclude(){
    let df = df!(
        "code" => &["600036.SH".to_string(),"600036.SH".to_string(),"600036.SH".to_string()],
        "date" =>&[NaiveDate::from_ymd_opt(2015, 3, 14).unwrap(),
                   NaiveDate::from_ymd_opt(2015, 3, 15).unwrap(),
                   NaiveDate::from_ymd_opt(2015, 3, 16).unwrap(),],
        "close" => &[1.21,1.22,1.23],
        "open" => &[1.22,1.21,1.23],
        "high" => &[1.22,1.25,1.24],
        "low" => &[1.19, 1.20,1.21],
    ).unwrap();
    let lst = df["keys"].as_list().slice(1,1);
    println!("s :{:?}",lst);
    let df_filter = df.lazy().select([all().exclude(["code","low"])]).collect().unwrap();
    println!("df_filter :{}",df_filter);
   
}
fn windows_over(){
    println!("------------- windows_over test ---------------- ");
    let  df = df!(
        "key" => ["a", "a", "a", "a", "b", "c"],
        "value" => [1, 2, 1, 3, 3, 3]
    ).unwrap();
    // over()函数:col("value").min().over([col("key")]),表示:请根据col("key")进行分类,再对分类得到的组求最小值操作;
    let df = df
        .clone().lazy()
        .with_column(col("value")
            .min() // .max(), .mean()
            .over([col("key")])
            .alias("over_min"))
        .with_column(col("value").max().over([col("key")]).alias("over_max"));
    println!("{:?}", df.collect().unwrap().head(Some(10)));
}
//read_csv
fn lazy_read_csv(){
    println!("------------- lazy_read_csv test ---------------- ");
    let filepath =  "../my_duckdb/src/test.csv";
    // CSV数据格式
    // 600036.XSHG,2079/7/24,3345.9,3357.8,3326.7,3357,33589,69181710.57,1
    // 600036.XSHG,2079/7/25,3346,3357.9,3326.8,3357.1,33590,69184251.47,1
    let polars_lazy_csv_time  = Instant::now();
    let p = LazyCsvReader::new(filepath)
    .with_try_parse_dates(true)  //需要增加Available on crate feature temporal only.
    .with_has_header(true)
    .finish().unwrap();
    let  df = p.collect().expect("error to dataframe!");
    println!("polars lazy 读出csv的行和列数:{:?}",df.shape());
    println!("polars lazy 读csv 花时: {:?} 秒!", polars_lazy_csv_time.elapsed().as_secs_f32());
}
fn read_csv(){
    println!("------------- read_csv test ---------------- ");
    use std::fs::File;
    let csv_time  = Instant::now();
    let filepath = "../my_duckdb/src/test.csv";
    // CSV数据格式
    // 600036.XSHG,2079/7/24,3345.9,3357.8,3326.7,3357,33589,69181710.57,1
    // 600036.XSHG,2079/7/25,3346,3357.9,3326.8,3357.1,33590,69184251.47,1
    let file = File::open(filepath)
                    .expect("could not read file");
    let df = CsvReader::new(file).finish().unwrap();
    //println!("df:{:?}",df);
    println!("读出csv的行和列数:{:?}",df.shape());
    println!("读csv 花时: {:?} 秒!",csv_time.elapsed().as_secs_f32());
}
fn read_csv2(){
    println!("------------- read_csv2 test ---------------- ");
    let filepath = "../my_duckdb/src/test.csv";
    // CSV数据格式
    // 600036.XSHG,2079/7/24,3345.9,3357.8,3326.7,3357,33589,69181710.57,1
    // 600036.XSHG,2079/7/25,3346,3357.9,3326.8,3357.1,33590,69184251.47,1
    let df = CsvReadOptions::default()
            .with_has_header(true)
            .try_into_reader_with_file_path(Some(filepath.into())).unwrap()
            .finish().unwrap();
    println!("read_csv2 => df {:?}",df)
}
fn parse_date_csv(){
    let filepath = "../my_duckdb/src/test.csv";
    // 读出csv,并对csv中date类型进行转换
    // CSV数据格式
    // 600036.XSHG,2019/7/24,3345.9,3357.8,3326.7,3357,33589,69181710.57,1
    // 600036.XSHG,2019/7/25,3346,3357.9,3326.8,3357.1,33590,69184251.47,1
    let df = CsvReadOptions::default()
    .map_parse_options(|parse_options| parse_options.with_try_parse_dates(true))
    .try_into_reader_with_file_path(Some(filepath.into()))
    .unwrap()
    .finish()
    .unwrap();
    println!("{}", &df);
}
fn write_csv_df(){
    // toml features => csv
    let mut df = df!(
        "code" => &["600036.SH".to_string(),"600036.SH".to_string(),"600036.SH".to_string()],
        "date" =>&[NaiveDate::from_ymd_opt(2015, 3, 14).unwrap(),
                   NaiveDate::from_ymd_opt(2015, 3, 15).unwrap(),
                   NaiveDate::from_ymd_opt(2015, 3, 16).unwrap(),],
        "close" => &[1.21,1.22,1.23],
        "open" => &[1.22,1.21,1.23],
        "high" => &[1.22,1.25,1.24],
        "low" => &[1.19, 1.20,1.21],
    ).unwrap();
    
    let mut file = std::fs::File::create("600036SH.csv").unwrap();
    CsvWriter::new(&mut file).finish(&mut df).unwrap();
}
fn iter_dataframe_as_row() {
    println!("------------- iter_dataframe_as_row test ---------------- ");
    let starttime = Instant::now();
    let df: DataFrame = df!("D1" => &[1, 3, 1, 5, 6],"D2" => &[3, 2, 3, 5, 3]).unwrap();
    let (_row,_col) = df.shape();
    for i in 0.._row{
        let mut rows = Vec::new();
        for j in 0.._col{
            let value = df[j].get(i).unwrap();
            rows.push(value);
        }
    }
    println!("dataframe按行遍历cost time :{:?} seconds!",starttime.elapsed().as_secs_f32());
}

fn join_concat(){
    println!("------------- df_join_df test ---------------- ");
    // 创建表结构,内部有空数据
    let df = df! [
        // 表头		对应数据
        "Model" => ["iPhone XS", "iPhone 12", "iPhone 13", "iPhone 14", "Samsung S11", "Samsung S12", "Mi A1", "Mi A2"],
        "Company" => ["Apple", "Apple", "Apple", "Apple", "Samsung", "Samsung", "Xiao Mi", "Xiao Mi"],
        "Sales" => [80, 170, 130, 205, 400, 30, 14, 8],
        "Comment" => [None, None, Some("Sold Out"), Some("New Arrival"), None, Some("Sold Out"), None, None],
    ].unwrap();
 
    let df_price = df! [
        "Model" => ["iPhone XS", "iPhone 12", "iPhone 13", "iPhone 14", "Samsung S11", "Samsung S12", "Mi A1", "Mi A2"],
        "Price" => [2430, 3550, 5700, 8750, 2315, 3560, 980, 1420],
        "Discount" => [Some(0.85), Some(0.85), Some(0.8), None, Some(0.87), None, Some(0.66), Some(0.8)],
    ].unwrap();
    // 合并
    // join()接收5个参数,分别是:要合并的DataFrame,左表主键,右表主键,合并方式
    let  df_join = df.join(&df_price, ["Model"], ["Model"], JoinArgs::from(JoinType::Inner)).unwrap();
    println!("{:?}", &df_join);
    let df_v1 = df!(
        "a"=> &[1],
        "b"=> &[3],
    ).unwrap();
    let df_v2 = df!(
            "a"=> &[2],
            "b"=> &[4],
    ).unwrap();
    let df_vertical_concat = concat(
        [df_v1.clone().lazy(), df_v2.clone().lazy()],
        UnionArgs::default(),
    ).unwrap()
    .collect().unwrap();
    println!("{}", &df_vertical_concat);
    // let df_h1 = df!(
    //     "l1"=> &[1, 2],
    //     "l2"=> &[3, 4],
    // ).unwrap();
    // let df_h2 = df!(
    //         "r1"=> &[5, 6],
    //         "r2"=> &[7, 8],
    //         "r3"=> &[9, 10],
    // ).unwrap();
    // //let df_horizontal_concat = polars::functions::concat_df_horizontal(&[df_h1, df_h2], true).unwrap();
    // //println!("{}", &df_horizontal_concat);
}

fn get_slice_scalar_from_df(){
    println!("------------- get_slice_scalar_from_df test ---------------- ");
    let df: DataFrame = df!("D1" => &[1, 2, 3, 4, 5],"D2" => &[3, 2, 3, 5, 3]).unwrap();
    // slice(1,4): 从第2行开始(包含),各列向下共取4行
    let slice = &df.slice(1,4);
    println!("slice :{:?}",&slice);
    // 获取第2列第3个值的标量
    let scalar =  df[1].get(3).unwrap(); 
    println!("saclar :{:?}",scalar);
}
fn replace_drop_col(){
    println!("------------- replace_drop test ---------------- ");
    let mut df: DataFrame = df!("D1" => &[1, 2, 3, 4, 5],"D2" => &[3, 2, 3, 5, 3]).unwrap();
    let new_s1 = Series::new("", &[2,3,4,5,6]); // ""为名字不变;
    // D1列进行替换
    let df2 = df.replace("D1", new_s1).unwrap();
    // 删除D2列
    let df3 = df2.drop_many(&["D2"]);
    println!("df3:{:?}",df3);
}
fn drop_null_fill_null(){
    println!("------------- drop_null_fill_null test ---------------- ");
    let df: DataFrame = df!("D1" => &[None, Some(2), Some(3), Some(4), None],"D2" => &[3, 2, 3, 5, 3]).unwrap();
    // 取当前列第一个非空的值填充后面的空值
    let df2 = df.fill_null(FillNullStrategy::Forward(None)).unwrap();
    // Forward(Option):向后遍历,用遇到的第一个非空值(或给定下标位置的值)填充后面的空值
    // Backward(Option):向前遍历,用遇到的第一个非空值(或给定下标位置的值)填充前面的空值
    // Mean:用算术平均值填充
    // Min:用最小值填充
    // Max: 用最大值填充
    // Zero:用0填充
    // One:用1填充
    // MaxBound:用数据类型的取值范围的上界填充
    // MinBound:用数据类型的取值范围的下界填充
    
    println!("fill_null :{:?}", df2);
    // 删除D1列中的None值
    let df3 = df2.drop_nulls(Some(&["D1"])).unwrap();
    println!("drop_nulls :{:?}",df3);

}
fn compute_return(){
    let df = df!(
        "code" => &["600036.SH".to_string(),"600036.SH".to_string(),"600036.SH".to_string()],
        "date" =>&[NaiveDate::from_ymd_opt(2015, 3, 14).unwrap(),
                   NaiveDate::from_ymd_opt(2015, 3, 15).unwrap(),
                   NaiveDate::from_ymd_opt(2015, 3, 16).unwrap(),],
        "close" => &[1.21,1.22,1.23],
        "open" => &[1.22,1.21,1.23],
        "high" => &[1.22,1.25,1.24],
        "low" => &[1.19, 1.20,1.21],
    ).unwrap();

    let _df = df
    .clone()
    .lazy()
    .with_columns([(col("close")/col("close").first()-lit(1.0)).alias("ret")])
    .collect().unwrap();
    println!("_df :{}",_df)
}
fn standardlize_center(){
    println!("------------- standardlize test ---------------- ");
    let df: DataFrame = df!("D1" => &[1, 2, 3, 4, 5],"D2" => &[3, 2, 3, 5, 3]).unwrap();
    // 进行标准化:对所有的列,每个值除以本列最大值
    // cast(): 由int =>Float64
    let standardization = df.lazy().select([col("*").cast(DataType::Float64) / col("*").cast(DataType::Float64).max()]);
    
    // 对于标准化后的列,进行中心化
    let center = standardization
    .select([col("*") - col("*").mean()])
    .collect()
    .unwrap();
    println!("standardlize : {:?}",center);
}

fn creat_list_in_df_by_apply(){
    println!("----------apply_list test ------------------------");
    let df = df!(
        "keys" => &["rust", "go", "julia"],
        "values" => &[1032, 222, 42],
    ).unwrap();
    let out = df
        .clone()
        .lazy()
        .group_by([col("keys")])
        .agg([
            col("values")      
                .apply(|s| {    
                    let v = s.i32().unwrap();
                    let out: i32= v
                        .into_iter()
                        .map(|v| match v {
                            Some(v_) => v_ + 1,
                            _ => 0
                        })
                        .sum();
                        Ok(Some(Series::new("_", [out])))
                }, GetOutput::default())
                .alias("aggr_sum"),
        ]) 
        .with_column(col("aggr_sum").list().first().alias("aggr_sum_first")) 
        .collect()
        .unwrap();
    println!("{}", out);
}

fn create_struct_in_df_by_apply(){
    use polars::prelude::*;
    let df = df!(
        "keys" => &["a", "a", "b"],
        "values" => &[10, 7, 1],
    ).unwrap();

    let out = df
        .clone()
        .lazy()
        .with_column(col("values").apply(
            |s| {
                let s = s.i32()?;
                let out_1: Vec<Option<i32>> = s.into_iter().map(|v| match v {
                    Some(v_) => Some(v_ * 10),
                    _ => None,
                }).collect();

                let out_2: Vec<Option<i32>> = s.into_iter().map(|v| match v {
                    Some(v_) => Some(v_ * 20),
                    _ => None,
                }).collect();
                let out = df! (
                    "v1" => &out_1,
                    "v2" => &out_2,
                ).unwrap()
                .into_struct("vals")
                .into_series();
                Ok(Some(out))

            },
            GetOutput::default())) 
        .collect()
        .unwrap();
    println!("{}", out);
}

fn field_value_counts(){
    println!("--------------value_counts test---------------");
    let ratings = df!(
        "Movie"=> &["Cars", "IT", "ET", "Cars", "Up", "IT", "Cars", "ET", "Up", "ET"],
        "Theatre"=> &["NE", "ME", "IL", "ND", "NE", "SD", "NE", "IL", "IL", "SD"],
        "Avg_Rating"=> &[4.5, 4.4, 4.6, 4.3, 4.8, 4.7, 4.7, 4.9, 4.7, 4.6],
        "Count"=> &[30, 27, 26, 29, 31, 28, 28, 26, 33, 26],

    ).unwrap();
    println!("{}", &ratings);
    let out = ratings
        .clone()
        .lazy()
        .select([col("Theatre").value_counts(true, true, "count".to_string(), false)])
        .collect().unwrap();
    println!("{}", &out);
    
}

macro_rules! structs_to_dataframe {
    ($input:expr, [$($field:ident),+]) => {
        {
            // Extract the field values into separate vectors
            $(let mut $field = Vec::new();)*
            for e in $input.into_iter() {
                $($field.push(e.$field);)*
            }
            df! {
                $(stringify!($field) => $field,)*
            }
        }
    };
}
macro_rules! dataframe_to_structs_todo {
    ($df:expr, [$($field:ident),+]) => {
        {
            // todo!
            let mut vec:Vec<Bar> = Vec::new();
            vec

        }
    };
}
fn df_to_structs_by_macro_todo(){


    struct Bar {
        date:NaiveDate,
        close:f64,
        open:f64,
        high:f64,
        low:f64,
    }
    impl Bar {
        fn bar(date:NaiveDate, close:f64,open:f64,high:f64,low:f64) -> Self{
            Bar{date,close,open,high,low}
        }
    }
    let df = df!(
        "date" =>&[NaiveDate::from_ymd_opt(2015, 3, 14).unwrap(),
                   NaiveDate::from_ymd_opt(2015, 3, 15).unwrap(),
                   NaiveDate::from_ymd_opt(2015, 3, 16).unwrap(),],
        "close" => &[1.21,1.22,1.23],
        "open" => &[1.22,1.21,1.23],
        "high" => &[1.22,1.25,1.24],
        "low" => &[1.19, 1.20,1.21],
    ).unwrap();
    let bars: Vec<Bar> = dataframe_to_structs_todo!(df, [date,close,open,high,low]);
    println!("df:{:?}",df);
}

fn structs_to_df(){

    struct Bar {
        date:NaiveDate,
        close:f64,
        open:f64,
        high:f64,
        low:f64,
    }
    impl Bar {
        fn new(date:NaiveDate, close:f64,open:f64,high:f64,low:f64) -> Self{
            Bar{date,close,open,high,low}
        }
    }
    let test_bars:Vec<Bar> = vec![Bar::new(NaiveDate::from_ymd_opt(2024,1,1).unwrap(),10.1,10.12,10.2,9.99),
    Bar::new(NaiveDate::from_ymd_opt(2024,1,2).unwrap(),10.2,10.22,10.3,10.1)];
    let df = structs_to_dataframe!(test_bars, [date,close,open,high,low]).unwrap();
    println!("df:{:?}",df);
}

fn serde_df_todo(){
    let df = df! {
        "a" => [1,2,3,4,5],
    }
    .unwrap();
    //let df_json = serde_json::to_value(&df).unwrap();
    //println!("df_json {df_json}");
}

fn df_to_structs_by_iter(){
    let now = Instant::now();
    #[derive(Debug, Clone)]
    struct Bar {
        code :String,
        date:NaiveDate,
        close:f64,
        open:f64,
        high:f64,
        low:f64,
    }
    impl Bar {
        fn new(code:String,date:NaiveDate, close:f64,open:f64,high:f64,low:f64) -> Self{
            Bar{code,date,close,open,high,low}
        }
    }
    let df = df!(
        "code" => &["600036.SH".to_string(),"600036.SH".to_string(),"600036.SH".to_string()],
        "date" =>&[NaiveDate::from_ymd_opt(2015, 3, 14).unwrap(),
                   NaiveDate::from_ymd_opt(2015, 3, 15).unwrap(),
                   NaiveDate::from_ymd_opt(2015, 3, 16).unwrap(),],
        "close" => &[1.21,1.22,1.23],
        "open" => &[1.22,1.21,1.23],
        "high" => &[1.22,1.25,1.24],
        "low" => &[1.19, 1.20,1.21],
    ).unwrap();
    let mut bars:Vec<Bar> = Vec::new();

    let rows_data = df.into_struct("bars");
    let start_date = NaiveDate::from_ymd_opt(1970, 1, 2).unwrap();
    for  row_data in &rows_data{
        let code = row_data.get(0).unwrap();
        let mut new_code = "".to_string();
        if let &AnyValue::String(value) = code{
            new_code = value.to_string();
        }
        let mut new_date = NaiveDate::from_ymd_opt(2000,1,1).unwrap(); 
        let since_days = start_date.signed_duration_since(NaiveDate::from_ymd_opt(1,1,1).unwrap());
        let date = row_data.get(1).unwrap();
        if let &AnyValue::Date(dt) = date {
            let tmp_date = NaiveDate::from_num_days_from_ce_opt(dt).unwrap();
            new_date = tmp_date.checked_add_signed(since_days).unwrap();
        }
        let open =row_data[3].extract::<f64>().unwrap();
        let high = row_data[4].extract::<f64>().unwrap();
        let close =row_data[2].extract::<f64>().unwrap();
        let low = row_data[5].extract::<f64>().unwrap();
        bars.push(Bar::new(new_code,new_date,close,open,high,low));
    }
    println!("df_to_structs2 => structchunk : cost time :{:?}",now.elapsed().as_secs_f32());
    println!("bars :{:?}",bars);
}
fn df_to_structs_by_zip(){
    // 同样适用df -> struct ,tuple等
    let now = Instant::now();
    #[derive(Debug, Clone)]
    struct Bar {
        code :String,
        date:NaiveDate,
        close:f64,
        open:f64,
        high:f64,
        low:f64,
    }
    impl Bar {
        fn new(code:String,date:NaiveDate, close:f64,open:f64,high:f64,low:f64) -> Self{
            Bar{code,date,close,open,high,low}
        }
    }
    let df = df!(
        "code" => &["600036.SH".to_string(),"600036.SH".to_string(),"600036.SH".to_string()],
        "date" =>&[NaiveDate::from_ymd_opt(2015, 3, 14).unwrap(),
                   NaiveDate::from_ymd_opt(2015, 3, 15).unwrap(),
                   NaiveDate::from_ymd_opt(2015, 3, 16).unwrap(),],
        "close" => &[1.21,1.22,1.23],
        "open" => &[1.22,1.21,1.23],
        "high" => &[1.22,1.25,1.24],
        "low" => &[1.19, 1.20,1.21],
    ).unwrap();
    let bars : Vec<Bar> = df["code"].str().unwrap().iter()
    .zip(df["date"].date().unwrap().as_date_iter())
    .zip(df["close"].f64().unwrap().iter())
    .zip(df["open"].f64().unwrap().iter())
    .zip(df["high"].f64().unwrap().iter())
    .zip(df["low"].f64().unwrap().iter())
    .map(|(((((code,date),close),open),high),low)| 
         Bar::new(code.unwrap().to_string(),
         date.unwrap(),
         close.unwrap(),
         open.unwrap(),
         high.unwrap(),
         low.unwrap())).collect();
    println!("df_to_struct3 => zip : cost time :{:?} seconds!",now.elapsed().as_secs_f32());
    println!("bars :{:?}",bars);
    //izip! from itertools --其它参考--,省各种复杂的括号!
    //use itertools::izip;
    //izip!(code, date, close, open,high,low).collect::<Vec<_>>() // Vec of 4-tuples

}

fn series_to_vec(){
    let df = df!(
        "date" =>&[NaiveDate::from_ymd_opt(2015, 3, 14).unwrap(),
                   NaiveDate::from_ymd_opt(2015, 3, 15).unwrap(),
                   NaiveDate::from_ymd_opt(2015, 3, 16).unwrap(),],

    ).unwrap();
    let vec :Vec<Option<NaiveDate>>= df["date"].date().unwrap().as_date_iter().collect();
    println!("vec :{:?}",vec)
}
fn series_to_vec2(){
    let df = df!("lang" =>&["rust","go","julia"],).unwrap();
    let vec:Vec<Option<&str>> = df["date"].str().unwrap()
    .into_iter()
    .map(|s|
        match s{
            Some(v_) => Some(v_),
            _ => None,
        }).collect();
    println!("vec:{:?}",vec);

}
fn structs_in_df(){
    println!("-----------structs_in_df test -----------------");
    // feature => dtype-struct
    let df = df!(
        "code" => &["600036.SH".to_string(),"600036.SH".to_string(),"600036.SH".to_string()],
        "date" =>&[NaiveDate::from_ymd_opt(2015, 3, 14).unwrap(),
                   NaiveDate::from_ymd_opt(2015, 3, 15).unwrap(),
                   NaiveDate::from_ymd_opt(2015, 3, 16).unwrap(),],
        "close" => &[1.21,1.22,1.23],
        "open" => &[1.22,1.21,1.23],
        "high" => &[1.22,1.25,1.24],
        "low" => &[1.19, 1.20,1.21],
    ).unwrap()
    .into_struct("bars")
    .into_series();
    println!("{}", &df);

    // how to get series from struct column?
    let out = df.struct_().unwrap().field_by_name("close").unwrap();
    println!("out :{}",out);
    // how to get struct value in df 
    let _ = df
    .struct_()
    .unwrap()
    .into_iter()
    .map(|rows| 
        {
            println!("code :{} date :{} close:{},open:{},high:{},low:{}",rows[0],rows[1],rows[2],rows[3],rows[4],rows[5]);
        }
    ).collect::<Vec<_>>();

}
fn list_in_df(){
    let df = df!(
        "code" => &["600036.SH".to_string(),"600036.SH".to_string(),"600036.SH".to_string()],
        "date" =>&[NaiveDate::from_ymd_opt(2015, 3, 14).unwrap(),
                   NaiveDate::from_ymd_opt(2015, 3, 15).unwrap(),
                   NaiveDate::from_ymd_opt(2015, 3, 16).unwrap(),],
        "close" => &[1.21,1.22,1.23],
        "open" => &[1.22,1.21,1.23],
        "high" => &[1.22,1.25,1.24],
        "low" => &[1.19, 1.20,1.21],
    ).unwrap();

    let lst = df["keys"].as_list();

}

fn serialize_df_to_json(){
    // toml features => serde
    let df = df!(
        "code" => &["600036.SH".to_string(),"600036.SH".to_string(),"600036.SH".to_string()],
        "date" =>&[NaiveDate::from_ymd_opt(2015, 3, 14).unwrap(),
                   NaiveDate::from_ymd_opt(2015, 3, 15).unwrap(),
                   NaiveDate::from_ymd_opt(2015, 3, 16).unwrap(),],
        "close" => &[1.21,1.22,1.23],
        "open" => &[1.22,1.21,1.23],
        "high" => &[1.22,1.25,1.24],
        "low" => &[1.19, 1.20,1.21],
    ).unwrap();
     let df_json = serde_json::to_value(&df).unwrap();
     println!("df_json {df_json}");
}

fn serialize_df_to_binary_todo(){
    // toml features => serde
    let df = df!(
        "code" => &["600036.SH".to_string(),"600036.SH".to_string(),"600036.SH".to_string()],
        "date" =>&[NaiveDate::from_ymd_opt(2015, 3, 14).unwrap(),
                   NaiveDate::from_ymd_opt(2015, 3, 15).unwrap(),
                   NaiveDate::from_ymd_opt(2015, 3, 16).unwrap(),],
        "close" => &[1.21,1.22,1.23],
        "open" => &[1.22,1.21,1.23],
        "high" => &[1.22,1.25,1.24],
        "low" => &[1.19, 1.20,1.21],
    ).unwrap();
    // todo
    //let df_binary = serde_json::to_value(&df).unwrap();
    //println!("df_json {df_binary}");
}

fn df_to_ndarray(){
    // toml features =>ndarray
    let df = df!(
        "code" => &["600036.SH".to_string(),"600036.SH".to_string(),"600036.SH".to_string()],
        "date" =>&[NaiveDate::from_ymd_opt(2015, 3, 14).unwrap(),
                   NaiveDate::from_ymd_opt(2015, 3, 15).unwrap(),
                   NaiveDate::from_ymd_opt(2015, 3, 16).unwrap(),],
        "close" => &[1.21,1.22,1.23],
        "open" => &[1.22,1.21,1.23],
        "high" => &[1.22,1.25,1.24],
        "low" => &[1.19, 1.20,1.21],
    ).unwrap();
    let df_filter = df.lazy().select([all().exclude(["code","low"])]).collect().unwrap();
    let ndarray = df_filter.to_ndarray::<Float64Type>(IndexOrder::Fortran).unwrap();
    println!("ndarray :{}",ndarray);
}
相关推荐
versatile_zpc3 分钟前
C++初阶:类和对象(上)
开发语言·c++
尘浮生4 分钟前
Java项目实战II基于微信小程序的移动学习平台的设计与实现(开发文档+数据库+源码)
java·开发语言·数据库·spring boot·学习·微信小程序·小程序
娅娅梨1 小时前
C++ 错题本--not found for architecture x86_64 问题
开发语言·c++
汤米粥1 小时前
小皮PHP连接数据库提示could not find driver
开发语言·php
冰淇淋烤布蕾1 小时前
EasyExcel使用
java·开发语言·excel
Leo.yuan1 小时前
数据量大Excel卡顿严重?选对报表工具提高10倍效率
数据库·数据分析·数据可视化·powerbi
拾荒的小海螺1 小时前
JAVA:探索 EasyExcel 的技术指南
java·开发语言
马剑威(威哥爱编程)2 小时前
哇喔!20种单例模式的实现与变异总结
java·开发语言·单例模式
白-胖-子2 小时前
【蓝桥等考C++真题】蓝桥杯等级考试C++组第13级L13真题原题(含答案)-统计数字
开发语言·c++·算法·蓝桥杯·等考·13级
好睡凯2 小时前
c++写一个死锁并且自己解锁
开发语言·c++·算法