板凳-------Mysql cookbook学习 (十一--------9)

13.2 分组描述统计

sql 复制代码
mysql> select age, count(score) as n,
    -> sum(score) as sum,
    -> min(score) as minimum,
    -> max(score) as maximum,
    -> avg(score) as mean,
    -> stddev_samp(score) as 'std. dev.',
    -> var_samp(score) as 'variance'
    -> from testscore
    -> group by age;
+-----+---+------+---------+---------+--------+--------------------+--------------------+
| age | n | sum  | minimum | maximum | mean   | std. dev.          | variance           |
+-----+---+------+---------+---------+--------+--------------------+--------------------+
|   5 | 4 |   22 |       4 |       7 | 5.5000 | 1.2909944487358056 | 1.6666666666666667 |
|   6 | 4 |   27 |       4 |       9 | 6.7500 |  2.217355782608345 |  4.916666666666667 |
|   7 | 4 |   30 |       6 |       9 | 7.5000 | 1.2909944487358056 | 1.6666666666666667 |
|   8 | 4 |   32 |       6 |      10 | 8.0000 | 1.8257418583505538 | 3.3333333333333335 |
|   9 | 4 |   35 |       7 |      10 | 8.7500 | 1.2583057392117918 |  1.583333333333334 |
+-----+---+------+---------+---------+--------+--------------------+--------------------+
5 rows in set (0.03 sec)

mysql> select sex, count(score) as n,
    -> sum(score) as sum,
    -> min(score) as minimum,
    -> max(score) as maximum,
    -> avg(score) as mean,
    -> stddev_samp(score) as 'std. dev.',
    -> var_samp(score) as 'variance'
    -> from testscore
    -> group by sex;
+-----+----+------+---------+---------+--------+--------------------+--------------------+
| sex | n  | sum  | minimum | maximum | mean   | std. dev.          | variance           |
+-----+----+------+---------+---------+--------+--------------------+--------------------+
| M   | 10 |   71 |       4 |       9 | 7.1000 | 1.7919573407620817 | 3.2111111111111112 |
| F   | 10 |   75 |       4 |      10 | 7.5000 | 1.9578900207451218 | 3.8333333333333335 |
+-----+----+------+---------+---------+--------+--------------------+--------------------+
2 rows in set (0.00 sec)

mysql> select age, sex,  count(score) as n,
    -> sum(score) as sum,
    -> min(score) as minimum,
    -> max(score) as maximum,
    -> avg(score) as mean,
    -> stddev_samp(score) as 'std. dev.',
    -> var_samp(score) as 'variance'
    -> from testscore
    -> group by age, sex;
+-----+-----+---+------+---------+---------+--------+--------------------+----------+
| age | sex | n | sum  | minimum | maximum | mean   | std. dev.          | variance |
+-----+-----+---+------+---------+---------+--------+--------------------+----------+
|   5 | M   | 2 |    9 |       4 |       5 | 4.5000 | 0.7071067811865476 |      0.5 |
|   5 | F   | 2 |   13 |       6 |       7 | 6.5000 | 0.7071067811865476 |      0.5 |
|   6 | M   | 2 |   17 |       8 |       9 | 8.5000 | 0.7071067811865476 |      0.5 |
|   6 | F   | 2 |   10 |       4 |       6 | 5.0000 | 1.4142135623730951 |        2 |
|   7 | M   | 2 |   14 |       6 |       8 | 7.0000 | 1.4142135623730951 |        2 |
|   7 | F   | 2 |   16 |       7 |       9 | 8.0000 | 1.4142135623730951 |        2 |
|   8 | M   | 2 |   15 |       6 |       9 | 7.5000 | 2.1213203435596424 |      4.5 |
|   8 | F   | 2 |   17 |       7 |      10 | 8.5000 | 2.1213203435596424 |      4.5 |
|   9 | M   | 2 |   16 |       7 |       9 | 8.0000 | 1.4142135623730951 |        2 |
|   9 | F   | 2 |   19 |       9 |      10 | 9.5000 | 0.7071067811865476 |      0.5 |
+-----+-----+---+------+---------+---------+--------+--------------------+----------+
10 rows in set (0.00 sec)

13.3 产生频率分布

sql 复制代码
mysql> select score, count(score) as occurence
    -> from testscore group by score;
+-------+-----------+
| score | occurence |
+-------+-----------+
|     5 |         1 |
|     4 |         2 |
|     6 |         4 |
|     7 |         4 |
|     8 |         2 |
|     9 |         5 |
|    10 |         2 |
+-------+-----------+
7 rows in set (0.00 sec)

mysql> select @n := count(score) from  testscore;
+--------------------+
| @n := count(score) |
+--------------------+
|                 20 |
+--------------------+
1 row in set, 1 warning (0.01 sec)

mysql> select score, (count(score) * 100)/@n as percent
    -> from testscore group by score;
+-------+---------+
| score | percent |
+-------+---------+
|     5 |  5.0000 |
|     4 | 10.0000 |
|     6 | 20.0000 |
|     7 | 20.0000 |
|     8 | 10.0000 |
|     9 | 25.0000 |
|    10 | 10.0000 |
+-------+---------+
7 rows in set (0.00 sec)

mysql> select score, repeat('*', count(score)) as occurrences
    -> from testscore group by score;
+-------+-------------+
| score | occurrences |
+-------+-------------+
|     5 | *           |
|     4 | **          |
|     6 | ****        |
|     7 | ****        |
|     8 | **          |
|     9 | *****       |
|    10 | **          |
+-------+-------------+
7 rows in set (0.00 sec)

mysql> select @n := count(score) from  testscore;
+--------------------+
| @n := count(score) |
+--------------------+
|                 20 |
+--------------------+
1 row in set, 1 warning (0.00 sec)

mysql> select score, repeat('*', (count(score)*100)/@n) as percent
    -> from testscore group by score;
+-------+---------------------------+
| score | percent                   |
+-------+---------------------------+
|     5 | *****                     |
|     4 | **********                |
|     6 | ********************      |
|     7 | ********************      |
|     8 | **********                |
|     9 | ************************* |
|    10 | **********                |
+-------+---------------------------+
7 rows in set (0.00 sec)

mysql> drop table if exists ref;
Query OK, 0 rows affected (0.03 sec)

mysql> create table ref(score int);
Query OK, 0 rows affected (0.04 sec)

mysql> insert into ref(score)
    -> values(0), (1), (2), (3), (4), (5), (6), (7), (8), (9), (10);
Query OK, 11 rows affected (0.02 sec)
Records: 11  Duplicates: 0  Warnings: 0

mysql> select ref.score, count(testscore.score) as occurences
    -> from ref left join testscore on ref.score = testscore.score
    -> group by ref.score;
+-------+------------+
| score | occurences |
+-------+------------+
|     0 |          0 |
|     1 |          0 |
|     2 |          0 |
|     3 |          0 |
|     4 |          2 |
|     5 |          1 |
|     6 |          4 |
|     7 |          4 |
|     8 |          2 |
|     9 |          5 |
|    10 |          2 |
+-------+------------+
11 rows in set (0.00 sec)

mysql> select ref.score, (count(testscore.score)*100)/@n as percent
    -> from ref left join testscore on ref.score = testscore.score
    -> group by ref.score;
+-------+---------+
| score | percent |
+-------+---------+
|     0 |  0.0000 |
|     1 |  0.0000 |
|     2 |  0.0000 |
|     3 |  0.0000 |
|     4 | 10.0000 |
|     5 |  5.0000 |
|     6 | 20.0000 |
|     7 | 20.0000 |
|     8 | 10.0000 |
|     9 | 25.0000 |
|    10 | 10.0000 |
+-------+---------+
11 rows in set (0.00 sec)

13.4 计数缺失值

sql 复制代码
mysql> -- 创建表
mysql> CREATE TABLE subject_scores (
    ->   subject INT,
    ->   score INT NULL
    -> );
Query OK, 0 rows affected (0.06 sec)

mysql>
mysql> -- 插入示例数据
mysql> INSERT INTO subject_scores VALUES
    ->   (1, 38), (2, NULL), (3, 47),
    ->   (4, 82), (5, NULL), (6, 65),
    ->   (7, 90), (8, 73), (9, NULL),
    ->   (10, 55), (11, 68), (12, 79);
Query OK, 12 rows affected (0.01 sec)
Records: 12  Duplicates: 0  Warnings: 0

mysql>
mysql> -- 查询
mysql> SELECT * FROM subject_scores ORDER BY subject;
+---------+-------+
| subject | score |
+---------+-------+
|       1 |    38 |
|       2 |  NULL |
|       3 |    47 |
|       4 |    82 |
|       5 |  NULL |
|       6 |    65 |
|       7 |    90 |
|       8 |    73 |
|       9 |  NULL |
|      10 |    55 |
|      11 |    68 |
|      12 |    79 |
+---------+-------+
12 rows in set (0.00 sec)

mysql> SELECT
    ->   COUNT(*) AS 'n (total)',
    ->   COUNT(score) AS 'n (nonmissing)',
    ->   COUNT(*) - COUNT(score) AS 'n (missing)',
    ->   ((COUNT(*) - COUNT(score)) * 100) / COUNT(*) AS '% missing'
    -> FROM subject_scores;
+-----------+----------------+-------------+-----------+
| n (total) | n (nonmissing) | n (missing) | % missing |
+-----------+----------------+-------------+-----------+
|        12 |              9 |           3 |   25.0000 |
+-----------+----------------+-------------+-----------+
1 row in set (0.00 sec)

mysql> select count(*) as 'n (total)',
    -> count(score) as  'n (nonmissing)',
    -> sum(isnull(score)) as 'n (missing)',
    -> (sum(isnull(score))* 100) / count(*) as '% missing'
    -> from subject_scores;
+-----------+----------------+-------------+-----------+
| n (total) | n (nonmissing) | n (missing) | % missing |
+-----------+----------------+-------------+-----------+
|        12 |              9 |           3 |   25.0000 |
+-----------+----------------+-------------+-----------+
1 row in set (0.00 sec)

如果您确实需要按某些条件分组:
需要先确定分组依据。例如,如果:
•	科目1-6是A组
•	科目7-12是B组
可以这样写:
sql
mysql> SELECT
    ->   CASE WHEN subject BETWEEN 1 AND 6 THEN 'A' ELSE 'B' END AS group_name,
    ->   COUNT(*) AS 'n (total)',
    ->   COUNT(score) AS 'n (nonmissing)',
    ->   COUNT(*) - COUNT(score) AS 'n (missing)',
    ->   ((COUNT(*) - COUNT(score)) * 100) / COUNT(*) AS '% missing'
    -> FROM subject_scores
    -> GROUP BY group_name;
+------------+-----------+----------------+-------------+-----------+
| group_name | n (total) | n (nonmissing) | n (missing) | % missing |
+------------+-----------+----------------+-------------+-----------+
| A          |         6 |              4 |           2 |   33.3333 |
| B          |         6 |              5 |           1 |   16.6667 |
+------------+-----------+----------------+-------------+-----------+
2 rows in set (0.00 sec)

13.5 计算线性回归和相关系数

sql 复制代码
mysql> select age, score from testscore;
+-----+-------+
| age | score |
+-----+-------+
|   5 |     5 |
|   5 |     4 |
|   5 |     6 |
|   5 |     7 |
|   6 |     8 |
|   6 |     9 |
|   6 |     4 |
|   6 |     6 |
|   7 |     8 |
|   7 |     6 |
|   7 |     9 |
|   7 |     7 |
|   8 |     9 |
|   8 |     6 |
|   8 |     7 |
|   8 |    10 |
|   9 |     9 |
|   9 |     7 |
|   9 |    10 |
|   9 |     9 |
+-----+-------+
20 rows in set (0.00 sec)

mysql> SELECT
    ->   @n := COUNT(score) AS n,
    ->   @meanx := AVG(age) AS 'x mean',
    ->   @sumx := SUM(age) AS 'x sum',
    ->   @sumxx := SUM(age * age) AS 'x sum of squares',
    ->   @meany := AVG(score) AS 'y mean',
    ->   @sumy := SUM(score) AS 'y sum',
    ->   @sumyy := SUM(score * score) AS 'y sum of squares',
    ->   @sumxy := SUM(age * score) AS 'x*y sum'
    -> FROM testscore\G
*************************** 1. row ***************************
               n: 20
          x mean: 7.0000
           x sum: 140
x sum of squares: 1020
          y mean: 7.3000
           y sum: 146
y sum of squares: 1130
         x*y sum: 1053
1 row in set, 8 warnings (0.00 sec)

mysql> select
    -> @b := (@n * @sumxy - @sumx * @sumy)/ (@n * @sumxx - @sumx * @sumx)
    -> as slope;
+-------------+
| slope       |
+-------------+
| 0.775000000 |
+-------------+
1 row in set, 1 warning (0.00 sec)

mysql> select @a := (@meany - @b * @meanx) as intercept;
+----------------------+
| intercept            |
+----------------------+
| 1.875000000000000000 |
+----------------------+
1 row in set, 1 warning (0.00 sec)

mysql> select concat('y =', @b, 'x + ', @a) as 'least-squares regression';
+----------------------------------------+
| least-squares regression               |
+----------------------------------------+
| y =0.775000000x + 1.875000000000000000 |
+----------------------------------------+
1 row in set (0.00 sec)

mysql> select
    -> (@n * @sumxy - @sumx * @sumy)
    -> /sqrt((@n * @sumxx - @sumx * @sumx) * (@n * @sumyy - @sumy * @sumy ))
    -> as correlation;
+--------------------+
| correlation        |
+--------------------+
| 0.6117362044219903 |
+--------------------+
1 row in set (0.00 sec)
相关推荐
超浪的晨25 分钟前
Java 内部类详解:从基础到实战,掌握嵌套类、匿名类与局部类的使用技巧
java·开发语言·后端·学习·个人开发
刺客xs1 小时前
MYSQL数据库----DCL语句
android·数据库·mysql
胖墩的IT1 小时前
在高并发场景下,仅依赖数据库机制(如行锁、版本控制)无法完全避免数据异常的问题
数据库·mysql
iReaShare1 小时前
如何将数据从一部手机传输到另一部手机?
android
慢行的骑兵1 小时前
Android音视频探索之旅 | C++层使用OpenGL ES实现视频渲染
android·音视频·ndk
iReaShare2 小时前
将CSV联系人导入安卓手机的3种简单方法
android
DKPT2 小时前
Java设计模式之行为型模式(命令模式)介绍与说明
java·笔记·学习·设计模式
yingtianhaoxuan3 小时前
学习笔记-Excel统计分析——描述统计量的计算
笔记·学习
叁沐3 小时前
MySQL 14 count(*)这么慢,我该怎么办?
mysql