Python爬虫

Numpy

创建数组

import numpy as np

创建数组

arr1=np.array([1,2,3])

arr2=np.array([[3,4,5],[2,4,1]])

print(arr1)

print(arr2)

print(type(arr1))

1 2 3

\[3 4 5

2 4 1\]

<class 'numpy.ndarray'>

查看数组的基础属性

print(arr1.shape)

print(arr1.ndim)

print(arr1.dtype)

print(arr2.shape)

print(arr2.ndim)

print(arr2.dtype)

(3,)

1

int32

(2, 3)

2

int32

初识数组特点

list1=([0.3,0.5,4.2])

arr1=np.array([0.3,0.5,4.2])

print(list1)

print(arr1)

print(list1 ** 2)

print([i**2 for i in list1])

print(arr1 ** 2)

0.3, 0.5, 4.2

0.3 0.5 4.2

0.09, 0.25, 17.64

0.09 0.25 17.64

创建常见数组

arr3=np.arange(0,10)

arr4=np.arange(10)

arr5=np.arange(0,1,0.1)

print(arr3)

print(arr4)

print(arr5)

0 1 2 3 4 5 6 7 8 9

0 1 2 3 4 5 6 7 8 9

0. 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9

arr6=np.linspace(0,1,10)

print(arr6)

0. 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.

arr7=np.zeros([3,4,5])

print(arr7)

\[\[ 0. 0. 0. 0. 0.

0. 0. 0. 0. 0.

0. 0. 0. 0. 0.

0. 0. 0. 0. 0.\]

\[ 0. 0. 0. 0. 0.

0. 0. 0. 0. 0.

0. 0. 0. 0. 0.

0. 0. 0. 0. 0.\]

\[ 0. 0. 0. 0. 0.

0. 0. 0. 0. 0.

0. 0. 0. 0. 0.

0. 0. 0. 0. 0.\]\]

arr8=np.ones([3,4,5])

print(arr8)

\[\[ 1. 1. 1. 1. 1.

1. 1. 1. 1. 1.

1. 1. 1. 1. 1.

1. 1. 1. 1. 1.\]

\[ 1. 1. 1. 1. 1.

1. 1. 1. 1. 1.

1. 1. 1. 1. 1.

1. 1. 1. 1. 1.\]

\[ 1. 1. 1. 1. 1.

1. 1. 1. 1. 1.

1. 1. 1. 1. 1.

1. 1. 1. 1. 1.\]\]

数组的数据类型

arr8=np.array([3,4,5],dtype=np.float) # 声明数组的数据类型

print(arr8)

print(arr8.dtype)

arr8[0]=1.2

print(arr8)

print(np.int32(arr8)) # 转换数组的数据类型

3. 4. 5.

float64

1.2 4. 5.

1 4 5

生成随机数

print(np.random.random(10))

0.73744669 0.80080002 0.69015703 0.51743988 0.05570415 0.34901843 0.0734643 0.86541768 0.18830607 0.07036505

print(np.random.rand(10))

0.27580848 0.69278954 0.12912278 0.53180506 0.21617686 0.4357371 0.92604774 0.19278177 0.81396217 0.15550441

print(np.random.rand(3,4))

print(np.random.randn(3,4))

\[ 0.03654586 0.23675581 0.35543946 0.31476077

0.12875907 0.78566879 0.87653987 0.78687239

0.79805071 0.46032083 0.08375826 0.60476404\]

\[ 0.22623064 -0.34199973 -0.50766523 0.01726679

0.71462127 -1.19509683 -0.3916739 0.67375221

-1.02150652 0.98995901 1.64691806 0.81784057\]

数组的索引

arr1=np.array([0.3,0.78,0.24,5,3.2])

print(arr1)

print(arr1[0])

print(arr1[-5])

print(arr1[1:2])

print(arr1[-4:-2])

0.3 0.78 0.24 5. 3.2

0.3

0.3

0.78

0.78 0.24

逻辑型索引

arr2=np.array([2.3,1.8,4.5])

print(arr2)

print(arr2[[False,False,True]])

index=arr2>2

print(arr2[index])

2.3 1.8 4.5

4.5

2.3 4.5

多维数组的索引

arr3=np.arange(1,13).reshape([3,4])

print(arr3)

print(arr3[2,3])

print(arr3[2,0:])

print(arr3[:,3])

print(arr3[1:,1:3])

\[ 1 2 3 4

5 6 7 8

9 10 11 12\]

12

9 10 11 12

4 8 12

\[ 6 7

10 11\]

\[ 5 6 7 8

9 10 11 12\]

False True True

修改数组中的元素

arr3=np.arange(1,13).reshape([3,4])

print(arr3)

arr3[0,0]=15

print(arr3)

\[ 1 2 3 4

5 6 7 8

9 10 11 12\]

\[15 2 3 4

5 6 7 8

9 10 11 12\]

求解距离矩阵

n=10 # 样本个数

x=np.linspace(1,100,n) # 样本的横坐标

y=np.linspace(1,100,n) # 样本的纵坐标

dist=np.sqrt((x[0]-x[1])**2+(y[0]-y[1])**2)

dist = np.zeros([n, n]) # 初始距离矩阵

for i in range(n):

for j in range(n):

dist[i, j] = np.sqrt((x[i] - x[j])**2 + (y[i] - y[j])**2) # 计算欧式距离

print(x)

print(y)

print(dist)

1. 12. 23. 34. 45. 56. 67. 78. 89. 100.

1. 12. 23. 34. 45. 56. 67. 78. 89. 100.

\[ 0. 15.55634919 31.11269837 46.66904756 62.22539674 77.78174593 93.33809512 108.8944443 124.45079349 140.00714267

15.55634919 0. 15.55634919 31.11269837 46.66904756 62.22539674 77.78174593 93.33809512 108.8944443 124.45079349

31.11269837 15.55634919 0. 15.55634919 31.11269837 46.66904756 62.22539674 77.78174593 93.33809512 108.8944443

46.66904756 31.11269837 15.55634919 0. 15.55634919 31.11269837 46.66904756 62.22539674 77.78174593 93.33809512

62.22539674 46.66904756 31.11269837 15.55634919 0. 15.55634919 31.11269837 46.66904756 62.22539674 77.78174593

77.78174593 62.22539674 46.66904756 31.11269837 15.55634919 0. 15.55634919 31.11269837 46.66904756 62.22539674

93.33809512 77.78174593 62.22539674 46.66904756 31.11269837 15.55634919 0. 15.55634919 31.11269837 46.66904756

108.8944443 93.33809512 77.78174593 62.22539674 46.66904756 31.11269837 15.55634919 0. 15.55634919 31.11269837

124.45079349 108.8944443 93.33809512 77.78174593 62.22539674 46.66904756 31.11269837 15.55634919 0. 15.55634919

140.00714267 124.45079349 108.8944443 93.33809512 77.78174593 62.22539674 46.66904756 31.11269837 15.55634919 0. \]

数组形态变化

arr4=np.arange(1,13)

print(arr4)

print(arr4.reshape([3,4]))

arr5=arr4.reshape([3,4])

print(arr5.ravel()) # 数组的展平

print(arr5.flatten('F')) #数组的纵向展平

arr6=arr4.reshape([3,4])

print(np.hstack((arr5,arr6))) # 数组的横向拼接

print(np.vstack((arr5,arr6))) # 数组的纵向拼接

1 2 3 4 5 6 7 8 9 10 11 12

\[ 1 2 3 4

5 6 7 8

9 10 11 12\]

1 2 3 4 5 6 7 8 9 10 11 12

1 5 9 2 6 10 3 7 11 4 8 12

\[ 1 2 3 4 1 2 3 4

5 6 7 8 5 6 7 8

9 10 11 12 9 10 11 12\]

\[ 1 2 3 4

5 6 7 8

9 10 11 12

1 2 3 4

5 6 7 8

9 10 11 12\]

掌握 NumPy 矩阵与通用函数

import numpy as np

matr1 = np.mat("1 2 3;4 5 6;7 8 9")

print(type(matr1))

matr2 = np.matrix([[1, 2, 3],[4, 5, 6],[7, 8, 9]])

np.bmat('matr1 matr2; matr1, matr2')

<class 'numpy.matrix'>

matrix([[1, 2, 3, 1, 2, 3],

4, 5, 6, 4, 5, 6\], \[7, 8, 9, 7, 8, 9\], \[1, 2, 3, 1, 2, 3\], \[4, 5, 6, 4, 5, 6\], \[7, 8, 9, 7, 8, 9\]\]) # 通用函数 arr1 = np.array(\[0.2, 0.4, 0.6\]) arr2=np.array(\[0.2,0.6,0.78\]) list1 = \[0.2, 0.4, 0.6

print(arr1+1)

print(arr1-1)

print(arr1*2)

print(arr1/2)

print(list1*2)

print(arr1+arr2)

print(arr1>0)

print(arr1<arr2)

print(np.any(arr1==0.2))

print(np.all(arr2==0.2))

1.2 1.4 1.6

-0.8 -0.6 -0.4

0.4 0.8 1.2

0.1 0.2 0.3

0.2, 0.4, 0.6, 0.2, 0.4, 0.6

0.4 1. 1.38

True True True

False True True

True

False

广播机制

arr3 = np.arange(1, 13).reshape([4, 3])

arr4 = np.array([1, 2, 3])

arr5 = np.array([[1], [2], [3], [4]])

print(arr3)

print(arr4)

print(arr5)

print(arr3+arr4)

print(arr3+arr5)

\[ 1 2 3

4 5 6

7 8 9

10 11 12\]

1 2 3

\[1

2

3

4\]

\[ 2 4 6

5 7 9

8 10 12

11 13 15\]

\[ 2 3 4

6 7 8

10 11 12

14 15 16\]

利用 NumPy 进行统计分析

import numpy as np

读写二进制文件

arr1 = np.arange(1, 13).reshape([4, 3])

arr2 = np.arange(1, 13).reshape([3, 4])

print(arr1)

print(arr2)

\[ 1 2 3

4 5 6

7 8 9

10 11 12\]

\[ 1 2 3 4

5 6 7 8

9 10 11 12\]

读写txt文件

np.savetxt('tmp/arr1.txt', arr1, delimiter=',') # 保存数据

np.loadtxt('tmp/arr1.txt', delimiter=',') # 读取数据

array([[ 1., 2., 3.],

4., 5., 6.\], \[ 7., 8., 9.\], \[10., 11., 12.\]\]) # 使用数组进行简单统计分析 arr3 = np.random.randint(1, 10, (3, 4)) print(arr3) arr3.sort(axis=0) print(arr3) print(arr3.argsort(axis=0)) print(np.tile(arr3, 2)) print(np.repeat(arr3, 2, axis=1)) print(arr3.mean()) print(arr3.mean(axis=0)) print(arr3.max(axis=0)) print(arr3.argmax(axis=0)) \[\[8 6 7 3

2 9 9 4

2 5 2 3\]

\[2 5 2 3

2 6 7 3

8 9 9 4\]

\[0 0 0 0

1 1 1 1

2 2 2 2\]

\[2 5 2 3 2 5 2 3

2 6 7 3 2 6 7 3

8 9 9 4 8 9 9 4\]

\[2 2 5 5 2 2 3 3

2 2 6 6 7 7 3 3

8 8 9 9 9 9 4 4\]

5.0

4. 6.66666667 6. 3.33333333

8 9 9 4

2 2 2 2

Pandas

import pandas as pd

读取文本数据

pd.read_csv?

data_txt = pd.read_csv('data/meal_order_info.txt',sep=' ')

data_csv = pd.read_csv('data/meal_order_info.csv', encoding='gbk', header=0)

data_csv

将数据框存储为文本文件数据

data_csv.to_csv('tmp/data_csv.csv',index=None, encoding='gbk')

data_csv

读取Excel文件

data_excel = pd.read_excel('data/meal_order_detail.xlsx',sheet_name='meal_order_detail2')

data_excel

data_excel.to_excel('tmp/data_excel.xlsx', index=None, sheet_name='test1')

掌握DataFrame的常用操作

import pandas as pd

Series系列

ser1 = pd.Series([1,2,'a'],index=['a','b','c'])

print(ser1)

ser2 = pd.Series({'a':[1,2,3],'b':['1','2','3']})

print(ser2)

a 1

b 2

c a

dtype: object

a [1, 2, 3]

b [1, 2, 3]

dtype: object

构造数据框(DataFrame)

d=[[1.3,2.0,3,4],[2,4,1,4],[2,5,1.9,7],[3,1,0,11]]

print(d)

df = pd.DataFrame(d, index=['a', 'b', 'c', 'd'], columns=['A', 'B', 'C', 'D'])

print(df)

d={'color':['blue','green','yellow','red','white'],

'object':['ball','pen','pencil','paper','mug'],

'price':[1.2,1.0,0.6,0.9,1.7]}

frame = pd.DataFrame(d,index=['a','b','c','d','e'])

print(frame)

print(pd.DataFrame(index=[1, 2], columns=[1, 2]))

print(pd.DataFrame(1, index=[1, 2], columns=[1, 2]))

\[1.3, 2.0, 3, 4\], \[2, 4, 1, 4\], \[2, 5, 1.9, 7\], \[3, 1, 0, 11\]

A B C D

a 1.3 2.0 3.0 4

b 2.0 4.0 1.0 4

c 2.0 5.0 1.9 7

d 3.0 1.0 0.0 11

color object price

a blue ball 1.2

b green pen 1.0

c yellow pencil 0.6

d red paper 0.9

e white mug 1.7

1 2

1 NaN NaN

2 NaN NaN

1 2

1 1 1

2 1 1

数据框的常用属性

d=[[1.3,2.0,3,4],[2,4,1,4],[2,5,1.9,7],[3,1,0,11]]

df = pd.DataFrame(d, index=['a', 'b', 'c', 'd'], columns=['A', 'B', 'C', 'D'])

print(df)

print(df.values)

print(df.index)

print(df.shape)

print(df.dtypes)

A B C D

a 1.3 2.0 3.0 4

b 2.0 4.0 1.0 4

c 2.0 5.0 1.9 7

d 3.0 1.0 0.0 11

\[ 1.3 2. 3. 4.

2. 4. 1. 4.

2. 5. 1.9 7.

3. 1. 0. 11. \]

Index(['a', 'b', 'c', 'd'], dtype='object')

(4, 4)

A float64

B float64

C float64

D int64

dtype: object

数据框的查改增删操作

import pandas as pd

访问数据框中的元素

d=[[1.3,2.0,3,4],[2,4,1,4],[2,5,1.9,7],[3,1,0,11]]

df = pd.DataFrame(d, index=['a', 'b', 'c', 'd'], columns=['A', 'B', 'C', 'D'])

print(df)

print(df['A']) # 单列数据访问

print(df[['A', 'C']]) # 多列数据访问

A B C D

a 1.3 2.0 3.0 4

b 2.0 4.0 1.0 4

c 2.0 5.0 1.9 7

d 3.0 1.0 0.0 11

a 1.3

b 2.0

c 2.0

d 3.0

Name: A, dtype: float64

A C

a 1.3 3.0

b 2.0 1.0

c 2.0 1.9

d 3.0 0.0

print(df.head(3)) # 访问某几行数据

print(df.tail(3))

A B C D

a 1.3 2.0 3.0 4

b 2.0 4.0 1.0 4

c 2.0 5.0 1.9 7

A B C D

b 2.0 4.0 1.0 4

c 2.0 5.0 1.9 7

d 3.0 1.0 0.0 11

print(df)

print(df.iloc[0, 0]) # 按照行列顺序进行数据访问

print(df.iloc[0:3, 0])

print(df.iloc[:, 0])

print(df.iloc[0, :])

print(df.iloc[1:3, 1:3])

A B C D

a 1.3 2.0 3.0 4

b 2.0 4.0 1.0 4

c 2.0 5.0 1.9 7

d 3.0 1.0 0.0 11

1.3

a 1.3

b 2.0

c 2.0

Name: A, dtype: float64

a 1.3

b 2.0

c 2.0

d 3.0

Name: A, dtype: float64

A 1.3

B 2.0

C 3.0

D 4.0

Name: a, dtype: float64

B C

b 4.0 1.0

c 5.0 1.9

<class 'pandas.core.series.Series'>

print(df.loc['a', 'A']) # 按照行列名称进行数据访问

print(df.loc['a':'c', 'A'])

print(df.loc[:, 'A'])

print(df.loc['a', :])

print(df.loc[['b','c'], ['B', 'C']])

1.3

a 1.3

b 2.0

c 2.0

Name: A, dtype: float64

a 1.3

b 2.0

c 2.0

d 3.0

Name: A, dtype: float64

A 1.3

B 2.0

C 3.0

D 4.0

Name: a, dtype: float64

B C

b 4.0 1.0

c 5.0 1.9

注意如下方式返回值的区别

print(df.iloc[:, 0])

print(df.iloc[:, 0:1])

print(type(df.iloc[:, 0]))

print(type(df.iloc[:, 0:1]))

a 1.3

b 2.0

c 2.0

d 3.0

Name: A, dtype: float64

A

a 1.3

b 2.0

c 2.0

d 3.0

<class 'pandas.core.series.Series'>

<class 'pandas.core.frame.DataFrame'>

修改数据框中的元素

d=[[1.3,2.0,3,4],[2,4,1,4],[2,5,1.9,7],[3,1,0,11]]

df = pd.DataFrame(d, index=['a', 'b', 'c', 'd'], columns=['A', 'B', 'C', 'D'])

print(df)

df.loc['a', 'A'] = 101

df.loc[:, 'B'] = 0.25

df.loc[:, 'C'] = [1, 2, 3, 4]

print(df)

A B C D

a 1.3 2.0 3.0 4

b 2.0 4.0 1.0 4

c 2.0 5.0 1.9 7

d 3.0 1.0 0.0 11

A B C D

a 101.0 0.25 1 4

b 2.0 0.25 2 4

c 2.0 0.25 3 7

d 3.0 0.25 4 11

C:\Users\Administrator\AppData\Local\Temp\ipykernel_70148\2679651701.py:7: DeprecationWarning: In a future version, `df.iloc[:, i] = newvals` will attempt to set the values inplace instead of always setting a new array. To retain the old behavior, use either `df[df.columns[i]] = newvals` or, if columns are non-unique, `df.isetitem(i, newvals)`

df.loc[:, 'C'] = [1, 2, 3, 4]

为数据框增添数据

d=[[1.3,2.0,3,4],[2,4,1,4],[2,5,1.9,7],[3,1,0,11]]

df = pd.DataFrame(d, index=['a', 'b', 'c', 'd'], columns=['A', 'B', 'C', 'D'])

print(df)

df['E'] = 5

df['F'] = [1, 2, 3, 4]

print(df)

A B C D

a 1.3 2.0 3.0 4

b 2.0 4.0 1.0 4

c 2.0 5.0 1.9 7

d 3.0 1.0 0.0 11

A B C D E F

a 1.3 2.0 3.0 4 5 1

b 2.0 4.0 1.0 4 5 2

c 2.0 5.0 1.9 7 5 3

d 3.0 1.0 0.0 11 5 4

import pandas as pd

删除数据框中的元素

d=[[1.3,2.0,3,4],[2,4,1,4],[2,5,1.9,7],[3,1,0,11]]

df = pd.DataFrame(d, index=['a', 'b', 'c', 'd'], columns=['A', 'B', 'C', 'D'])

print(df)

print(df.drop('D'))

print(df.drop('D', axis=1, inplace=False)) # 删除数据框的列元素

print(df)

print(df.drop(['a', 'c'], axis=0)) # 输出数据框的行元素

A B C D

a 1.3 2.0 3.0 4

b 2.0 4.0 1.0 4

c 2.0 5.0 1.9 7

d 3.0 1.0 0.0 11

A B C

a 1.3 2.0 3.0

b 2.0 4.0 1.0

c 2.0 5.0 1.9

d 3.0 1.0 0.0

A B C D

a 1.3 2.0 3.0 4

b 2.0 4.0 1.0 4

c 2.0 5.0 1.9 7

d 3.0 1.0 0.0 11

A B C D

b 2.0 4.0 1.0 4

d 3.0 1.0 0.0 11

描述分析DataFrame数据

import numpy as np

import pandas as pd

d=[[1.3,2.0,3,4],[2,4,1,4],[2,5,1.9,7],[3,1,0,11]]

df = pd.DataFrame(d, index=['a', 'b', 'c', 'd'], columns=['A', 'B', 'C', 'D'])

print(df)

print(np.mean(df, axis=1))

print(df.mean(axis=1))

print(df.std())

print(df.describe())

print(df.T.describe())

df['A'].value_counts()

转换与处理时间序列数据

import pandas as pd

order = pd.read_csv('data/meal_order_info.csv', encoding='gbk')

print(order)

print(order['lock_time'].dtypes)

order['lock_time'] = pd.to_datetime(order['lock_time'])

print(order['lock_time'].dtypes)

print(pd.DatetimeIndex(order['lock_time']))

print(pd.PeriodIndex(order['lock_time'], freq='H'))

order['lock_time']

print(order['lock_time'][0].year) # 获取数据年份信息

print(order['lock_time'].dt.year) # 获取数据年份信息

print(order['lock_time'].dt.month) # 获取数据月份信息

print(order['lock_time'].dt.week) # 获取数据周次信息

print(order['lock_time'] + pd.Timedelta(days=1)) # 时间平移

print(order['lock_time'][1] - order['lock_time'][0]) # 求时间差别

使用分组聚合进行组内计算

import pandas as pd

detail = pd.read_excel('data/meal_order_detail.xlsx')

detail.head()

detail_group = detail[['order_id', 'counts', 'amounts']].groupby(by='order_id') # 分组操作

detail_group.agg('mean').head(3) # 对分组数据的所有列都执行mean操作

detail_group.agg(['mean', 'sum']).head(3) # 对分组数据的所有列都执行mean和sum操作

detail_group.agg({'counts': ['mean', np.max], 'amounts': 'std'}).head(3) # 对分组数据的不同列执行不同操作

detail_group.agg({'counts': lambda x: sum(x)**2}).head(3) # 将自定义函数放入聚合操作中

创建透视表与交叉表

import pandas as pd

detail = pd.read_excel('data/meal_order_detail.xlsx')

detail.head()

pd.pivot_table(detail[['order_id', 'counts', 'amounts']], index='order_id', aggfunc='sum').head(3)

pd.pivot_table(detail[['order_id', 'dishes_name', 'counts']], index='order_id', columns='dishes_name',aggfunc='sum').head(3)

pd.pivot_table(detail[['order_id', 'dishes_name', 'counts']], index='order_id', columns='dishes_name',values='counts', fill_value=0).head()

pd.crosstab(index=detail['order_id'], columns=detail['dishes_name']).head(3)

pd.crosstab(index=detail['order_id'], columns=detail['dishes_name'], values=detail['counts'], aggfunc='sum').fillna(0).head(3)

Matplotlib

python 复制代码
import numpy as np
import matplotlib.pyplot as plt
import matplotlib

x = np.arange(0, 1.1, 0.1)
print(x)
plt.figure()        # 第一环节,创建画布
plt.plot(x, x**2)   # 第二环节,绘制图形
plt.plot(x, x**4)
plt.show()          # 第三环节,显示图形
python 复制代码
plt.figure

x = np.arange(0, 1.1, 0.1)
print(x)
plt.figure()        # 第一环节,创建画布
plt.plot(x, x**2)   # 第二环节,绘制图形
plt.plot(x, x**4)
plt.xlim(0, 1)
plt.ylim(0, 1)
plt.title('lines')
plt.xlabel('x')
plt.ylabel('y')
plt.legend(['y=x^2', 'y=x^4'])

plt.savefig('tmp/examplt.png')
plt.show()          # 第三环节,显示图形
python 复制代码
import numpy as np
import matplotlib.pyplot as plt

data = np.load('国民经济核算季度数据.npz', allow_pickle=True)
columns = data['columns']
values = data['values']
print(columns)
print(values)
data['values'].shape


# 绘制散点图
plt.figure(figsize=(8, 6))
plt.rcParams['font.sans-serif'] = 'SimHei'  # 设置中文显示
plt.rcParams['axes.unicode_minus'] = False

plt.scatter(values[:, 1], values[:, 3], marker='o')
plt.scatter(values[:, 1], values[:, 4], marker='*')
plt.scatter(values[:, 1], values[:, 5], marker='D')

plt.xticks(range(0, 70, 4), values[range(0, 70, 4), 1], rotation=45)
plt.legend(['第一产业生产总值', '第二产业生产总值', '第三产业生产总值'])
plt.title('2000-2017年各产业生产总值散点图')
plt.ylabel('生产总值(亿元)')
plt.savefig('tmp/2000-2017年各产业生产总值散点图.png')
plt.show()
python 复制代码
# 绘制折线图
plt.figure(figsize=(8, 6))
plt.rcParams['font.sans-serif'] = 'SimHei'  # 设置中文显示
plt.rcParams['axes.unicode_minus'] = False

plt.plot(values[:, 1], values[:, 3], linestyle='solid')
plt.plot(values[:, 1], values[:, 4], marker='*')
plt.plot(values[:, 1], values[:, 5], marker='D')

plt.xticks(range(0, 70, 4), values[range(0, 70, 4), 1], rotation=45)
plt.legend(['第一产业生产总值', '第二产业生产总值', '第三产业生产总值'])
plt.title('2000-2017年各产业生产总值折线图')
plt.ylabel('生产总值(亿元)')
plt.savefig('tmp/2000-2017年各产业生产总值折线图.png')
plt.show()
python 复制代码
# 绘制直方图
plt.figure(figsize=(8, 6))
plt.rcParams['font.sans-serif'] = 'SimHei'  # 设置中文显示
plt.rcParams['axes.unicode_minus'] = False

plt.title('2017年第一季度各产业生产总值直方图')
plt.ylabel('生产总值(亿元)')
plt.bar(columns[3:6], values[-1, 3:6])
my_height = values[-1, 3:6]
for i in range(len(my_height)):
    plt.text(i, my_height[i]+1000, my_height[i], va='bottom', ha='center')

plt.show()
python 复制代码
# 绘制饼图
plt.figure(figsize=(6, 6))
plt.rcParams['font.sans-serif'] = 'SimHei'  # 设置中文显示
plt.rcParams['axes.unicode_minus'] = False

labels = ['第一产业', '第二产业', '第三产业']

plt.pie(values[-1, 3:6], explode=[0.01, 0.01, 0.01], labels=labels, autopct='%1.1f%%')
plt.title('2017年第一季度各产业生产总值饼图')
plt.show()
python 复制代码
# 绘制箱线图
plt.figure(figsize=(6, 6))
plt.rcParams['font.sans-serif'] = 'SimHei'  # 设置中文显示
plt.rcParams['axes.unicode_minus'] = False

labels = ['第一产业', '第二产业', '第三产业']
plt.boxplot(values[:, 3:6], notch=True, labels=labels)
plt.show()
python 复制代码
plt.figure(figsize=(6, 6))
plt.rcParams['font.sans-serif'] = 'SimHei'  # 设置中文显示
plt.rcParams['axes.unicode_minus'] = False

labels = ['第一产业', '第二产业', '第三产业']
plt.boxplot(values[:, 3], notch=True)
plt.show()

Requests库

相关推荐
子竹聆风2 小时前
Feapder框架UpdateItem使用技巧:如何优雅地实现"只更新有值字段"
爬虫
数据智能老司机7 小时前
精通 Python 设计模式——分布式系统模式
python·设计模式·架构
数据智能老司机8 小时前
精通 Python 设计模式——并发与异步模式
python·设计模式·编程语言
数据智能老司机8 小时前
精通 Python 设计模式——测试模式
python·设计模式·架构
数据智能老司机8 小时前
精通 Python 设计模式——性能模式
python·设计模式·架构
c8i8 小时前
drf初步梳理
python·django
每日AI新事件8 小时前
python的异步函数
python
这里有鱼汤9 小时前
miniQMT下载历史行情数据太慢怎么办?一招提速10倍!
前端·python
databook18 小时前
Manim实现脉冲闪烁特效
后端·python·动效
程序设计实验室19 小时前
2025年了,在 Django 之外,Python Web 框架还能怎么选?
python