Spark SQL----内置函数Aggregate Functions

Spark SQL----内置函数Aggregate Functions

sql 复制代码
-- any
SELECT any(col) FROM VALUES (true), (false), (false) AS tab(col);
+--------+
|any(col)|
+--------+
|    true|
+--------+

SELECT any(col) FROM VALUES (NULL), (true), (false) AS tab(col);
+--------+
|any(col)|
+--------+
|    true|
+--------+

SELECT any(col) FROM VALUES (false), (false), (NULL) AS tab(col);
+--------+
|any(col)|
+--------+
|   false|
+--------+

-- any_value
SELECT any_value(col) FROM VALUES (10), (5), (20) AS tab(col);
+--------------+
|any_value(col)|
+--------------+
|            10|
+--------------+

SELECT any_value(col) FROM VALUES (NULL), (5), (20) AS tab(col);
+--------------+
|any_value(col)|
+--------------+
|          NULL|
+--------------+

SELECT any_value(col, true) FROM VALUES (NULL), (5), (20) AS tab(col);
+--------------+
|any_value(col)|
+--------------+
|             5|
+--------------+

-- approx_count_distinct
SELECT approx_count_distinct(col1) FROM VALUES (1), (1), (2), (2), (3) tab(col1);
+---------------------------+
|approx_count_distinct(col1)|
+---------------------------+
|                          3|
+---------------------------+

-- approx_percentile
SELECT approx_percentile(col, array(0.5, 0.4, 0.1), 100) FROM VALUES (0), (1), (2), (10) AS tab(col);
+-------------------------------------------------+
|approx_percentile(col, array(0.5, 0.4, 0.1), 100)|
+-------------------------------------------------+
|                                        [1, 1, 0]|
+-------------------------------------------------+

SELECT approx_percentile(col, 0.5, 100) FROM VALUES (0), (6), (7), (9), (10) AS tab(col);
+--------------------------------+
|approx_percentile(col, 0.5, 100)|
+--------------------------------+
|                               7|
+--------------------------------+

SELECT approx_percentile(col, 0.5, 100) FROM VALUES (INTERVAL '0' MONTH), (INTERVAL '1' MONTH), (INTERVAL '2' MONTH), (INTERVAL '10' MONTH) AS tab(col);
+--------------------------------+
|approx_percentile(col, 0.5, 100)|
+--------------------------------+
|              INTERVAL '1' MONTH|
+--------------------------------+

SELECT approx_percentile(col, array(0.5, 0.7), 100) FROM VALUES (INTERVAL '0' SECOND), (INTERVAL '1' SECOND), (INTERVAL '2' SECOND), (INTERVAL '10' SECOND) AS tab(col);
+--------------------------------------------+
|approx_percentile(col, array(0.5, 0.7), 100)|
+--------------------------------------------+
|                        [INTERVAL '01' SE...|
+--------------------------------------------+

-- array_agg
SELECT array_agg(col) FROM VALUES (1), (2), (1) AS tab(col);
+-----------------+
|collect_list(col)|
+-----------------+
|        [1, 2, 1]|
+-----------------+

-- avg
SELECT avg(col) FROM VALUES (1), (2), (3) AS tab(col);
+--------+
|avg(col)|
+--------+
|     2.0|
+--------+

SELECT avg(col) FROM VALUES (1), (2), (NULL) AS tab(col);
+--------+
|avg(col)|
+--------+
|     1.5|
+--------+

-- bit_and
SELECT bit_and(col) FROM VALUES (3), (5) AS tab(col);
+------------+
|bit_and(col)|
+------------+
|           1|
+------------+

-- bit_or
SELECT bit_or(col) FROM VALUES (3), (5) AS tab(col);
+-----------+
|bit_or(col)|
+-----------+
|          7|
+-----------+

-- bit_xor
SELECT bit_xor(col) FROM VALUES (3), (5) AS tab(col);
+------------+
|bit_xor(col)|
+------------+
|           6|
+------------+

-- bitmap_construct_agg
SELECT substring(hex(bitmap_construct_agg(bitmap_bit_position(col))), 0, 6) FROM VALUES (1), (2), (3) AS tab(col);
+--------------------------------------------------------------------+
|substring(hex(bitmap_construct_agg(bitmap_bit_position(col))), 0, 6)|
+--------------------------------------------------------------------+
|                                                              070000|
+--------------------------------------------------------------------+

SELECT substring(hex(bitmap_construct_agg(bitmap_bit_position(col))), 0, 6) FROM VALUES (1), (1), (1) AS tab(col);
+--------------------------------------------------------------------+
|substring(hex(bitmap_construct_agg(bitmap_bit_position(col))), 0, 6)|
+--------------------------------------------------------------------+
|                                                              010000|
+--------------------------------------------------------------------+

-- bitmap_or_agg
SELECT substring(hex(bitmap_or_agg(col)), 0, 6) FROM VALUES (X '10'), (X '20'), (X '40') AS tab(col);
+----------------------------------------+
|substring(hex(bitmap_or_agg(col)), 0, 6)|
+----------------------------------------+
|                                  700000|
+----------------------------------------+

SELECT substring(hex(bitmap_or_agg(col)), 0, 6) FROM VALUES (X '10'), (X '10'), (X '10') AS tab(col);
+----------------------------------------+
|substring(hex(bitmap_or_agg(col)), 0, 6)|
+----------------------------------------+
|                                  100000|
+----------------------------------------+

-- bool_and
SELECT bool_and(col) FROM VALUES (true), (true), (true) AS tab(col);
+-------------+
|bool_and(col)|
+-------------+
|         true|
+-------------+

SELECT bool_and(col) FROM VALUES (NULL), (true), (true) AS tab(col);
+-------------+
|bool_and(col)|
+-------------+
|         true|
+-------------+

SELECT bool_and(col) FROM VALUES (true), (false), (true) AS tab(col);
+-------------+
|bool_and(col)|
+-------------+
|        false|
+-------------+

-- bool_or
SELECT bool_or(col) FROM VALUES (true), (false), (false) AS tab(col);
+------------+
|bool_or(col)|
+------------+
|        true|
+------------+

SELECT bool_or(col) FROM VALUES (NULL), (true), (false) AS tab(col);
+------------+
|bool_or(col)|
+------------+
|        true|
+------------+

SELECT bool_or(col) FROM VALUES (false), (false), (NULL) AS tab(col);
+------------+
|bool_or(col)|
+------------+
|       false|
+------------+

-- collect_list
SELECT collect_list(col) FROM VALUES (1), (2), (1) AS tab(col);
+-----------------+
|collect_list(col)|
+-----------------+
|        [1, 2, 1]|
+-----------------+

-- collect_set
SELECT collect_set(col) FROM VALUES (1), (2), (1) AS tab(col);
+----------------+
|collect_set(col)|
+----------------+
|          [1, 2]|
+----------------+

-- corr
SELECT corr(c1, c2) FROM VALUES (3, 2), (3, 3), (6, 4) as tab(c1, c2);
+------------------+
|      corr(c1, c2)|
+------------------+
|0.8660254037844387|
+------------------+

-- count
SELECT count(*) FROM VALUES (NULL), (5), (5), (20) AS tab(col);
+--------+
|count(1)|
+--------+
|       4|
+--------+

SELECT count(col) FROM VALUES (NULL), (5), (5), (20) AS tab(col);
+----------+
|count(col)|
+----------+
|         3|
+----------+

SELECT count(DISTINCT col) FROM VALUES (NULL), (5), (5), (10) AS tab(col);
+-------------------+
|count(DISTINCT col)|
+-------------------+
|                  2|
+-------------------+

-- count_if
SELECT count_if(col % 2 = 0) FROM VALUES (NULL), (0), (1), (2), (3) AS tab(col);
+-------------------------+
|count_if(((col % 2) = 0))|
+-------------------------+
|                        2|
+-------------------------+

SELECT count_if(col IS NULL) FROM VALUES (NULL), (0), (1), (2), (3) AS tab(col);
+-----------------------+
|count_if((col IS NULL))|
+-----------------------+
|                      1|
+-----------------------+

-- count_min_sketch
SELECT hex(count_min_sketch(col, 0.5d, 0.5d, 1)) FROM VALUES (1), (2), (1) AS tab(col);
+---------------------------------------+
|hex(count_min_sketch(col, 0.5, 0.5, 1))|
+---------------------------------------+
|                   00000001000000000...|
+---------------------------------------+

-- covar_pop
SELECT covar_pop(c1, c2) FROM VALUES (1,1), (2,2), (3,3) AS tab(c1, c2);
+------------------+
| covar_pop(c1, c2)|
+------------------+
|0.6666666666666666|
+------------------+

-- covar_samp
SELECT covar_samp(c1, c2) FROM VALUES (1,1), (2,2), (3,3) AS tab(c1, c2);
+------------------+
|covar_samp(c1, c2)|
+------------------+
|               1.0|
+------------------+

-- every
SELECT every(col) FROM VALUES (true), (true), (true) AS tab(col);
+----------+
|every(col)|
+----------+
|      true|
+----------+

SELECT every(col) FROM VALUES (NULL), (true), (true) AS tab(col);
+----------+
|every(col)|
+----------+
|      true|
+----------+

SELECT every(col) FROM VALUES (true), (false), (true) AS tab(col);
+----------+
|every(col)|
+----------+
|     false|
+----------+

-- first
SELECT first(col) FROM VALUES (10), (5), (20) AS tab(col);
+----------+
|first(col)|
+----------+
|        10|
+----------+

SELECT first(col) FROM VALUES (NULL), (5), (20) AS tab(col);
+----------+
|first(col)|
+----------+
|      NULL|
+----------+

SELECT first(col, true) FROM VALUES (NULL), (5), (20) AS tab(col);
+----------+
|first(col)|
+----------+
|         5|
+----------+

-- first_value
SELECT first_value(col) FROM VALUES (10), (5), (20) AS tab(col);
+----------------+
|first_value(col)|
+----------------+
|              10|
+----------------+

SELECT first_value(col) FROM VALUES (NULL), (5), (20) AS tab(col);
+----------------+
|first_value(col)|
+----------------+
|            NULL|
+----------------+

SELECT first_value(col, true) FROM VALUES (NULL), (5), (20) AS tab(col);
+----------------+
|first_value(col)|
+----------------+
|               5|
+----------------+

-- grouping
SELECT name, grouping(name), sum(age) FROM VALUES (2, 'Alice'), (5, 'Bob') people(age, name) GROUP BY cube(name);
+-----+--------------+--------+
| name|grouping(name)|sum(age)|
+-----+--------------+--------+
| NULL|             1|       7|
|Alice|             0|       2|
|  Bob|             0|       5|
+-----+--------------+--------+

-- grouping_id
SELECT name, grouping_id(), sum(age), avg(height) FROM VALUES (2, 'Alice', 165), (5, 'Bob', 180) people(age, name, height) GROUP BY cube(name, height);
+-----+-------------+--------+-----------+
| name|grouping_id()|sum(age)|avg(height)|
+-----+-------------+--------+-----------+
| NULL|            2|       2|      165.0|
|Alice|            0|       2|      165.0|
|Alice|            1|       2|      165.0|
| NULL|            3|       7|      172.5|
|  Bob|            1|       5|      180.0|
|  Bob|            0|       5|      180.0|
| NULL|            2|       5|      180.0|
+-----+-------------+--------+-----------+

-- histogram_numeric
SELECT histogram_numeric(col, 5) FROM VALUES (0), (1), (2), (10) AS tab(col);
+-------------------------+
|histogram_numeric(col, 5)|
+-------------------------+
|     [{0, 1.0}, {1, 1....|
+-------------------------+

-- hll_sketch_agg
SELECT hll_sketch_estimate(hll_sketch_agg(col, 12)) FROM VALUES (1), (1), (2), (2), (3) tab(col);
+--------------------------------------------+
|hll_sketch_estimate(hll_sketch_agg(col, 12))|
+--------------------------------------------+
|                                           3|
+--------------------------------------------+

-- hll_union_agg
SELECT hll_sketch_estimate(hll_union_agg(sketch, true)) FROM (SELECT hll_sketch_agg(col) as sketch FROM VALUES (1) tab(col) UNION ALL SELECT hll_sketch_agg(col, 20) as sketch FROM VALUES (1) tab(col));
+------------------------------------------------+
|hll_sketch_estimate(hll_union_agg(sketch, true))|
+------------------------------------------------+
|                                               1|
+------------------------------------------------+

-- kurtosis
SELECT kurtosis(col) FROM VALUES (-10), (-20), (100), (1000) AS tab(col);
+-------------------+
|      kurtosis(col)|
+-------------------+
|-0.7014368047529618|
+-------------------+

SELECT kurtosis(col) FROM VALUES (1), (10), (100), (10), (1) as tab(col);
+-------------------+
|      kurtosis(col)|
+-------------------+
|0.19432323191698986|
+-------------------+

-- last
SELECT last(col) FROM VALUES (10), (5), (20) AS tab(col);
+---------+
|last(col)|
+---------+
|       20|
+---------+

SELECT last(col) FROM VALUES (10), (5), (NULL) AS tab(col);
+---------+
|last(col)|
+---------+
|     NULL|
+---------+

SELECT last(col, true) FROM VALUES (10), (5), (NULL) AS tab(col);
+---------+
|last(col)|
+---------+
|        5|
+---------+

-- last_value
SELECT last_value(col) FROM VALUES (10), (5), (20) AS tab(col);
+---------------+
|last_value(col)|
+---------------+
|             20|
+---------------+

SELECT last_value(col) FROM VALUES (10), (5), (NULL) AS tab(col);
+---------------+
|last_value(col)|
+---------------+
|           NULL|
+---------------+

SELECT last_value(col, true) FROM VALUES (10), (5), (NULL) AS tab(col);
+---------------+
|last_value(col)|
+---------------+
|              5|
+---------------+

-- max
SELECT max(col) FROM VALUES (10), (50), (20) AS tab(col);
+--------+
|max(col)|
+--------+
|      50|
+--------+

-- max_by
SELECT max_by(x, y) FROM VALUES ('a', 10), ('b', 50), ('c', 20) AS tab(x, y);
+------------+
|max_by(x, y)|
+------------+
|           b|
+------------+

-- mean
SELECT mean(col) FROM VALUES (1), (2), (3) AS tab(col);
+---------+
|mean(col)|
+---------+
|      2.0|
+---------+

SELECT mean(col) FROM VALUES (1), (2), (NULL) AS tab(col);
+---------+
|mean(col)|
+---------+
|      1.5|
+---------+

-- median
SELECT median(col) FROM VALUES (0), (10) AS tab(col);
+-----------+
|median(col)|
+-----------+
|        5.0|
+-----------+

SELECT median(col) FROM VALUES (INTERVAL '0' MONTH), (INTERVAL '10' MONTH) AS tab(col);
+--------------------+
|         median(col)|
+--------------------+
|INTERVAL '0-5' YE...|
+--------------------+

-- min
SELECT min(col) FROM VALUES (10), (-1), (20) AS tab(col);
+--------+
|min(col)|
+--------+
|      -1|
+--------+

-- min_by
SELECT min_by(x, y) FROM VALUES ('a', 10), ('b', 50), ('c', 20) AS tab(x, y);
+------------+
|min_by(x, y)|
+------------+
|           a|
+------------+

-- mode
SELECT mode(col) FROM VALUES (0), (10), (10) AS tab(col);
+---------+
|mode(col)|
+---------+
|       10|
+---------+

SELECT mode(col) FROM VALUES (INTERVAL '0' MONTH), (INTERVAL '10' MONTH), (INTERVAL '10' MONTH) AS tab(col);
+-------------------+
|          mode(col)|
+-------------------+
|INTERVAL '10' MONTH|
+-------------------+

SELECT mode(col) FROM VALUES (0), (10), (10), (null), (null), (null) AS tab(col);
+---------+
|mode(col)|
+---------+
|       10|
+---------+

-- percentile
SELECT percentile(col, 0.3) FROM VALUES (0), (10) AS tab(col);
+-----------------------+
|percentile(col, 0.3, 1)|
+-----------------------+
|                    3.0|
+-----------------------+

SELECT percentile(col, array(0.25, 0.75)) FROM VALUES (0), (10) AS tab(col);
+-------------------------------------+
|percentile(col, array(0.25, 0.75), 1)|
+-------------------------------------+
|                           [2.5, 7.5]|
+-------------------------------------+

SELECT percentile(col, 0.5) FROM VALUES (INTERVAL '0' MONTH), (INTERVAL '10' MONTH) AS tab(col);
+-----------------------+
|percentile(col, 0.5, 1)|
+-----------------------+
|   INTERVAL '0-5' YE...|
+-----------------------+

SELECT percentile(col, array(0.2, 0.5)) FROM VALUES (INTERVAL '0' SECOND), (INTERVAL '10' SECOND) AS tab(col);
+-----------------------------------+
|percentile(col, array(0.2, 0.5), 1)|
+-----------------------------------+
|               [INTERVAL '0 00:0...'|
+-----------------------------------+

-- percentile_approx
SELECT percentile_approx(col, array(0.5, 0.4, 0.1), 100) FROM VALUES (0), (1), (2), (10) AS tab(col);
+-------------------------------------------------+
|percentile_approx(col, array(0.5, 0.4, 0.1), 100)|
+-------------------------------------------------+
|                                        [1, 1, 0]|
+-------------------------------------------------+

SELECT percentile_approx(col, 0.5, 100) FROM VALUES (0), (6), (7), (9), (10) AS tab(col);
+--------------------------------+
|percentile_approx(col, 0.5, 100)|
+--------------------------------+
|                               7|
+--------------------------------+

SELECT percentile_approx(col, 0.5, 100) FROM VALUES (INTERVAL '0' MONTH), (INTERVAL '1' MONTH), (INTERVAL '2' MONTH), (INTERVAL '10' MONTH) AS tab(col);
+--------------------------------+
|percentile_approx(col, 0.5, 100)|
+--------------------------------+
|              INTERVAL '1' MONTH|
+--------------------------------+

SELECT percentile_approx(col, array(0.5, 0.7), 100) FROM VALUES (INTERVAL '0' SECOND), (INTERVAL '1' SECOND), (INTERVAL '2' SECOND), (INTERVAL '10' SECOND) AS tab(col);
+--------------------------------------------+
|percentile_approx(col, array(0.5, 0.7), 100)|
+--------------------------------------------+
|                        [INTERVAL '01' SE...|
+--------------------------------------------+

-- regr_avgx
SELECT regr_avgx(y, x) FROM VALUES (1, 2), (2, 2), (2, 3), (2, 4) AS tab(y, x);
+---------------+
|regr_avgx(y, x)|
+---------------+
|           2.75|
+---------------+

SELECT regr_avgx(y, x) FROM VALUES (1, null) AS tab(y, x);
+---------------+
|regr_avgx(y, x)|
+---------------+
|           NULL|
+---------------+

SELECT regr_avgx(y, x) FROM VALUES (null, 1) AS tab(y, x);
+---------------+
|regr_avgx(y, x)|
+---------------+
|           NULL|
+---------------+

SELECT regr_avgx(y, x) FROM VALUES (1, 2), (2, null), (2, 3), (2, 4) AS tab(y, x);
+---------------+
|regr_avgx(y, x)|
+---------------+
|            3.0|
+---------------+

SELECT regr_avgx(y, x) FROM VALUES (1, 2), (2, null), (null, 3), (2, 4) AS tab(y, x);
+---------------+
|regr_avgx(y, x)|
+---------------+
|            3.0|
+---------------+

-- regr_avgy
SELECT regr_avgy(y, x) FROM VALUES (1, 2), (2, 2), (2, 3), (2, 4) AS tab(y, x);
+---------------+
|regr_avgy(y, x)|
+---------------+
|           1.75|
+---------------+

SELECT regr_avgy(y, x) FROM VALUES (1, null) AS tab(y, x);
+---------------+
|regr_avgy(y, x)|
+---------------+
|           NULL|
+---------------+

SELECT regr_avgy(y, x) FROM VALUES (null, 1) AS tab(y, x);
+---------------+
|regr_avgy(y, x)|
+---------------+
|           NULL|
+---------------+

SELECT regr_avgy(y, x) FROM VALUES (1, 2), (2, null), (2, 3), (2, 4) AS tab(y, x);
+------------------+
|   regr_avgy(y, x)|
+------------------+
|1.6666666666666667|
+------------------+

SELECT regr_avgy(y, x) FROM VALUES (1, 2), (2, null), (null, 3), (2, 4) AS tab(y, x);
+---------------+
|regr_avgy(y, x)|
+---------------+
|            1.5|
+---------------+

-- regr_count
SELECT regr_count(y, x) FROM VALUES (1, 2), (2, 2), (2, 3), (2, 4) AS tab(y, x);
+----------------+
|regr_count(y, x)|
+----------------+
|               4|
+----------------+

SELECT regr_count(y, x) FROM VALUES (1, 2), (2, null), (2, 3), (2, 4) AS tab(y, x);
+----------------+
|regr_count(y, x)|
+----------------+
|               3|
+----------------+

SELECT regr_count(y, x) FROM VALUES (1, 2), (2, null), (null, 3), (2, 4) AS tab(y, x);
+----------------+
|regr_count(y, x)|
+----------------+
|               2|
+----------------+

-- regr_intercept
SELECT regr_intercept(y, x) FROM VALUES (1,1), (2,2), (3,3) AS tab(y, x);
+--------------------+
|regr_intercept(y, x)|
+--------------------+
|                 0.0|
+--------------------+

SELECT regr_intercept(y, x) FROM VALUES (1, null) AS tab(y, x);
+--------------------+
|regr_intercept(y, x)|
+--------------------+
|                NULL|
+--------------------+

SELECT regr_intercept(y, x) FROM VALUES (null, 1) AS tab(y, x);
+--------------------+
|regr_intercept(y, x)|
+--------------------+
|                NULL|
+--------------------+

-- regr_r2
SELECT regr_r2(y, x) FROM VALUES (1, 2), (2, 2), (2, 3), (2, 4) AS tab(y, x);
+------------------+
|     regr_r2(y, x)|
+------------------+
|0.2727272727272726|
+------------------+

SELECT regr_r2(y, x) FROM VALUES (1, null) AS tab(y, x);
+-------------+
|regr_r2(y, x)|
+-------------+
|         NULL|
+-------------+

SELECT regr_r2(y, x) FROM VALUES (null, 1) AS tab(y, x);
+-------------+
|regr_r2(y, x)|
+-------------+
|         NULL|
+-------------+

SELECT regr_r2(y, x) FROM VALUES (1, 2), (2, null), (2, 3), (2, 4) AS tab(y, x);
+------------------+
|     regr_r2(y, x)|
+------------------+
|0.7500000000000001|
+------------------+

SELECT regr_r2(y, x) FROM VALUES (1, 2), (2, null), (null, 3), (2, 4) AS tab(y, x);
+-------------+
|regr_r2(y, x)|
+-------------+
|          1.0|
+-------------+

-- regr_slope
SELECT regr_slope(y, x) FROM VALUES (1,1), (2,2), (3,3) AS tab(y, x);
+----------------+
|regr_slope(y, x)|
+----------------+
|             1.0|
+----------------+

SELECT regr_slope(y, x) FROM VALUES (1, null) AS tab(y, x);
+----------------+
|regr_slope(y, x)|
+----------------+
|            NULL|
+----------------+

SELECT regr_slope(y, x) FROM VALUES (null, 1) AS tab(y, x);
+----------------+
|regr_slope(y, x)|
+----------------+
|            NULL|
+----------------+

-- regr_sxx
SELECT regr_sxx(y, x) FROM VALUES (1, 2), (2, 2), (2, 3), (2, 4) AS tab(y, x);
+------------------+
|    regr_sxx(y, x)|
+------------------+
|2.7499999999999996|
+------------------+

SELECT regr_sxx(y, x) FROM VALUES (1, 2), (2, null), (2, 3), (2, 4) AS tab(y, x);
+--------------+
|regr_sxx(y, x)|
+--------------+
|           2.0|
+--------------+

SELECT regr_sxx(y, x) FROM VALUES (1, 2), (2, null), (null, 3), (2, 4) AS tab(y, x);
+--------------+
|regr_sxx(y, x)|
+--------------+
|           2.0|
+--------------+

-- regr_sxy
SELECT regr_sxy(y, x) FROM VALUES (1, 2), (2, 2), (2, 3), (2, 4) AS tab(y, x);
+------------------+
|    regr_sxy(y, x)|
+------------------+
|0.7499999999999998|
+------------------+

SELECT regr_sxy(y, x) FROM VALUES (1, 2), (2, null), (2, 3), (2, 4) AS tab(y, x);
+--------------+
|regr_sxy(y, x)|
+--------------+
|           1.0|
+--------------+

SELECT regr_sxy(y, x) FROM VALUES (1, 2), (2, null), (null, 3), (2, 4) AS tab(y, x);
+--------------+
|regr_sxy(y, x)|
+--------------+
|           1.0|
+--------------+

-- regr_syy
SELECT regr_syy(y, x) FROM VALUES (1, 2), (2, 2), (2, 3), (2, 4) AS tab(y, x);
+------------------+
|    regr_syy(y, x)|
+------------------+
|0.7499999999999999|
+------------------+

SELECT regr_syy(y, x) FROM VALUES (1, 2), (2, null), (2, 3), (2, 4) AS tab(y, x);
+------------------+
|    regr_syy(y, x)|
+------------------+
|0.6666666666666666|
+------------------+

SELECT regr_syy(y, x) FROM VALUES (1, 2), (2, null), (null, 3), (2, 4) AS tab(y, x);
+--------------+
|regr_syy(y, x)|
+--------------+
|           0.5|
+--------------+

-- skewness
SELECT skewness(col) FROM VALUES (-10), (-20), (100), (1000) AS tab(col);
+------------------+
|     skewness(col)|
+------------------+
|1.1135657469022013|
+------------------+

SELECT skewness(col) FROM VALUES (-1000), (-100), (10), (20) AS tab(col);
+-------------------+
|      skewness(col)|
+-------------------+
|-1.1135657469022011|
+-------------------+

-- some
SELECT some(col) FROM VALUES (true), (false), (false) AS tab(col);
+---------+
|some(col)|
+---------+
|     true|
+---------+

SELECT some(col) FROM VALUES (NULL), (true), (false) AS tab(col);
+---------+
|some(col)|
+---------+
|     true|
+---------+

SELECT some(col) FROM VALUES (false), (false), (NULL) AS tab(col);
+---------+
|some(col)|
+---------+
|    false|
+---------+

-- std
SELECT std(col) FROM VALUES (1), (2), (3) AS tab(col);
+--------+
|std(col)|
+--------+
|     1.0|
+--------+

-- stddev
SELECT stddev(col) FROM VALUES (1), (2), (3) AS tab(col);
+-----------+
|stddev(col)|
+-----------+
|        1.0|
+-----------+

-- stddev_pop
SELECT stddev_pop(col) FROM VALUES (1), (2), (3) AS tab(col);
+-----------------+
|  stddev_pop(col)|
+-----------------+
|0.816496580927726|
+-----------------+

-- stddev_samp
SELECT stddev_samp(col) FROM VALUES (1), (2), (3) AS tab(col);
+----------------+
|stddev_samp(col)|
+----------------+
|             1.0|
+----------------+

-- sum
SELECT sum(col) FROM VALUES (5), (10), (15) AS tab(col);
+--------+
|sum(col)|
+--------+
|      30|
+--------+

SELECT sum(col) FROM VALUES (NULL), (10), (15) AS tab(col);
+--------+
|sum(col)|
+--------+
|      25|
+--------+

SELECT sum(col) FROM VALUES (NULL), (NULL) AS tab(col);
+--------+
|sum(col)|
+--------+
|    NULL|
+--------+

-- try_avg
SELECT try_avg(col) FROM VALUES (1), (2), (3) AS tab(col);
+------------+
|try_avg(col)|
+------------+
|         2.0|
+------------+

SELECT try_avg(col) FROM VALUES (1), (2), (NULL) AS tab(col);
+------------+
|try_avg(col)|
+------------+
|         1.5|
+------------+

SELECT try_avg(col) FROM VALUES (interval '2147483647 months'), (interval '1 months') AS tab(col);
+------------+
|try_avg(col)|
+------------+
|        NULL|
+------------+

-- try_sum
SELECT try_sum(col) FROM VALUES (5), (10), (15) AS tab(col);
+------------+
|try_sum(col)|
+------------+
|          30|
+------------+

SELECT try_sum(col) FROM VALUES (NULL), (10), (15) AS tab(col);
+------------+
|try_sum(col)|
+------------+
|          25|
+------------+

SELECT try_sum(col) FROM VALUES (NULL), (NULL) AS tab(col);
+------------+
|try_sum(col)|
+------------+
|        NULL|
+------------+

SELECT try_sum(col) FROM VALUES (9223372036854775807L), (1L) AS tab(col);
+------------+
|try_sum(col)|
+------------+
|        NULL|
+------------+

-- var_pop
SELECT var_pop(col) FROM VALUES (1), (2), (3) AS tab(col);
+------------------+
|      var_pop(col)|
+------------------+
|0.6666666666666666|
+------------------+

-- var_samp
SELECT var_samp(col) FROM VALUES (1), (2), (3) AS tab(col);
+-------------+
|var_samp(col)|
+-------------+
|          1.0|
+-------------+

-- variance
SELECT variance(col) FROM VALUES (1), (2), (3) AS tab(col);
+-------------+
|variance(col)|
+-------------+
|          1.0|
+-------------+
相关推荐
憶巷11 分钟前
MyBatis中动态SQL执行原理
java·sql·mybatis
重生之绝世牛码11 分钟前
Java设计模式 —— 【结构型模式】享元模式(Flyweight Pattern) 详解
java·大数据·开发语言·设计模式·享元模式·设计原则
喝醉酒的小白12 分钟前
ElasticSearch 的核心功能
大数据·elasticsearch·jenkins
神秘打工猴1 小时前
Kafka 监控都有哪些?
分布式·kafka
知识的宝藏1 小时前
Django models中的增删改查与MySQL SQL的对应关系
sql·mysql·django·django models
Kobebryant-Manba2 小时前
kafka基本概念
分布式·学习·kafka
蚂蚁数据AntData2 小时前
流批一体向量化计算引擎 Flex 在蚂蚁的探索和实践
大数据·数据仓库·spark·数据库架构
rainoway3 小时前
CRDT宝典 - yata算法
前端·分布式·算法
hanbarger3 小时前
分布式通信,微服务协调组件,zookeeper
分布式·zookeeper·中间件
~央千澈~3 小时前
如果你的网站是h5网站,如何将h5网站变成小程序-除开完整重做方法如何快速h5转小程序-h5网站转小程序的办法-优雅草央千澈
前端·apache