Python爬虫

Numpy

创建数组

import numpy as np

创建数组

arr1=np.array([1,2,3])

arr2=np.array([[3,4,5],[2,4,1]])

print(arr1)

print(arr2)

print(type(arr1))

1 2 3

\[3 4 5

2 4 1\]

<class 'numpy.ndarray'>

查看数组的基础属性

print(arr1.shape)

print(arr1.ndim)

print(arr1.dtype)

print(arr2.shape)

print(arr2.ndim)

print(arr2.dtype)

(3,)

1

int32

(2, 3)

2

int32

初识数组特点

list1=([0.3,0.5,4.2])

arr1=np.array([0.3,0.5,4.2])

print(list1)

print(arr1)

print(list1 ** 2)

print([i**2 for i in list1])

print(arr1 ** 2)

0.3, 0.5, 4.2

0.3 0.5 4.2

0.09, 0.25, 17.64

0.09 0.25 17.64

创建常见数组

arr3=np.arange(0,10)

arr4=np.arange(10)

arr5=np.arange(0,1,0.1)

print(arr3)

print(arr4)

print(arr5)

0 1 2 3 4 5 6 7 8 9

0 1 2 3 4 5 6 7 8 9

0. 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9

arr6=np.linspace(0,1,10)

print(arr6)

0. 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.

arr7=np.zeros([3,4,5])

print(arr7)

\[\[ 0. 0. 0. 0. 0.

0. 0. 0. 0. 0.

0. 0. 0. 0. 0.

0. 0. 0. 0. 0.\]

\[ 0. 0. 0. 0. 0.

0. 0. 0. 0. 0.

0. 0. 0. 0. 0.

0. 0. 0. 0. 0.\]

\[ 0. 0. 0. 0. 0.

0. 0. 0. 0. 0.

0. 0. 0. 0. 0.

0. 0. 0. 0. 0.\]\]

arr8=np.ones([3,4,5])

print(arr8)

\[\[ 1. 1. 1. 1. 1.

1. 1. 1. 1. 1.

1. 1. 1. 1. 1.

1. 1. 1. 1. 1.\]

\[ 1. 1. 1. 1. 1.

1. 1. 1. 1. 1.

1. 1. 1. 1. 1.

1. 1. 1. 1. 1.\]

\[ 1. 1. 1. 1. 1.

1. 1. 1. 1. 1.

1. 1. 1. 1. 1.

1. 1. 1. 1. 1.\]\]

数组的数据类型

arr8=np.array([3,4,5],dtype=np.float) # 声明数组的数据类型

print(arr8)

print(arr8.dtype)

arr8[0]=1.2

print(arr8)

print(np.int32(arr8)) # 转换数组的数据类型

3. 4. 5.

float64

1.2 4. 5.

1 4 5

生成随机数

print(np.random.random(10))

0.73744669 0.80080002 0.69015703 0.51743988 0.05570415 0.34901843 0.0734643 0.86541768 0.18830607 0.07036505

print(np.random.rand(10))

0.27580848 0.69278954 0.12912278 0.53180506 0.21617686 0.4357371 0.92604774 0.19278177 0.81396217 0.15550441

print(np.random.rand(3,4))

print(np.random.randn(3,4))

\[ 0.03654586 0.23675581 0.35543946 0.31476077

0.12875907 0.78566879 0.87653987 0.78687239

0.79805071 0.46032083 0.08375826 0.60476404\]

\[ 0.22623064 -0.34199973 -0.50766523 0.01726679

0.71462127 -1.19509683 -0.3916739 0.67375221

-1.02150652 0.98995901 1.64691806 0.81784057\]

数组的索引

arr1=np.array([0.3,0.78,0.24,5,3.2])

print(arr1)

print(arr1[0])

print(arr1[-5])

print(arr1[1:2])

print(arr1[-4:-2])

0.3 0.78 0.24 5. 3.2

0.3

0.3

0.78

0.78 0.24

逻辑型索引

arr2=np.array([2.3,1.8,4.5])

print(arr2)

print(arr2[[False,False,True]])

index=arr2>2

print(arr2[index])

2.3 1.8 4.5

4.5

2.3 4.5

多维数组的索引

arr3=np.arange(1,13).reshape([3,4])

print(arr3)

print(arr3[2,3])

print(arr3[2,0:])

print(arr3[:,3])

print(arr3[1:,1:3])

\[ 1 2 3 4

5 6 7 8

9 10 11 12\]

12

9 10 11 12

4 8 12

\[ 6 7

10 11\]

\[ 5 6 7 8

9 10 11 12\]

False True True

修改数组中的元素

arr3=np.arange(1,13).reshape([3,4])

print(arr3)

arr3[0,0]=15

print(arr3)

\[ 1 2 3 4

5 6 7 8

9 10 11 12\]

\[15 2 3 4

5 6 7 8

9 10 11 12\]

求解距离矩阵

n=10 # 样本个数

x=np.linspace(1,100,n) # 样本的横坐标

y=np.linspace(1,100,n) # 样本的纵坐标

dist=np.sqrt((x[0]-x[1])**2+(y[0]-y[1])**2)

dist = np.zeros([n, n]) # 初始距离矩阵

for i in range(n):

for j in range(n):

dist[i, j] = np.sqrt((x[i] - x[j])**2 + (y[i] - y[j])**2) # 计算欧式距离

print(x)

print(y)

print(dist)

1. 12. 23. 34. 45. 56. 67. 78. 89. 100.

1. 12. 23. 34. 45. 56. 67. 78. 89. 100.

\[ 0. 15.55634919 31.11269837 46.66904756 62.22539674 77.78174593 93.33809512 108.8944443 124.45079349 140.00714267

15.55634919 0. 15.55634919 31.11269837 46.66904756 62.22539674 77.78174593 93.33809512 108.8944443 124.45079349

31.11269837 15.55634919 0. 15.55634919 31.11269837 46.66904756 62.22539674 77.78174593 93.33809512 108.8944443

46.66904756 31.11269837 15.55634919 0. 15.55634919 31.11269837 46.66904756 62.22539674 77.78174593 93.33809512

62.22539674 46.66904756 31.11269837 15.55634919 0. 15.55634919 31.11269837 46.66904756 62.22539674 77.78174593

77.78174593 62.22539674 46.66904756 31.11269837 15.55634919 0. 15.55634919 31.11269837 46.66904756 62.22539674

93.33809512 77.78174593 62.22539674 46.66904756 31.11269837 15.55634919 0. 15.55634919 31.11269837 46.66904756

108.8944443 93.33809512 77.78174593 62.22539674 46.66904756 31.11269837 15.55634919 0. 15.55634919 31.11269837

124.45079349 108.8944443 93.33809512 77.78174593 62.22539674 46.66904756 31.11269837 15.55634919 0. 15.55634919

140.00714267 124.45079349 108.8944443 93.33809512 77.78174593 62.22539674 46.66904756 31.11269837 15.55634919 0. \]

数组形态变化

arr4=np.arange(1,13)

print(arr4)

print(arr4.reshape([3,4]))

arr5=arr4.reshape([3,4])

print(arr5.ravel()) # 数组的展平

print(arr5.flatten('F')) #数组的纵向展平

arr6=arr4.reshape([3,4])

print(np.hstack((arr5,arr6))) # 数组的横向拼接

print(np.vstack((arr5,arr6))) # 数组的纵向拼接

1 2 3 4 5 6 7 8 9 10 11 12

\[ 1 2 3 4

5 6 7 8

9 10 11 12\]

1 2 3 4 5 6 7 8 9 10 11 12

1 5 9 2 6 10 3 7 11 4 8 12

\[ 1 2 3 4 1 2 3 4

5 6 7 8 5 6 7 8

9 10 11 12 9 10 11 12\]

\[ 1 2 3 4

5 6 7 8

9 10 11 12

1 2 3 4

5 6 7 8

9 10 11 12\]

掌握 NumPy 矩阵与通用函数

import numpy as np

matr1 = np.mat("1 2 3;4 5 6;7 8 9")

print(type(matr1))

matr2 = np.matrix([[1, 2, 3],[4, 5, 6],[7, 8, 9]])

np.bmat('matr1 matr2; matr1, matr2')

<class 'numpy.matrix'>

matrix([[1, 2, 3, 1, 2, 3],

4, 5, 6, 4, 5, 6\], \[7, 8, 9, 7, 8, 9\], \[1, 2, 3, 1, 2, 3\], \[4, 5, 6, 4, 5, 6\], \[7, 8, 9, 7, 8, 9\]\]) # 通用函数 arr1 = np.array(\[0.2, 0.4, 0.6\]) arr2=np.array(\[0.2,0.6,0.78\]) list1 = \[0.2, 0.4, 0.6

print(arr1+1)

print(arr1-1)

print(arr1*2)

print(arr1/2)

print(list1*2)

print(arr1+arr2)

print(arr1>0)

print(arr1<arr2)

print(np.any(arr1==0.2))

print(np.all(arr2==0.2))

1.2 1.4 1.6

-0.8 -0.6 -0.4

0.4 0.8 1.2

0.1 0.2 0.3

0.2, 0.4, 0.6, 0.2, 0.4, 0.6

0.4 1. 1.38

True True True

False True True

True

False

广播机制

arr3 = np.arange(1, 13).reshape([4, 3])

arr4 = np.array([1, 2, 3])

arr5 = np.array([[1], [2], [3], [4]])

print(arr3)

print(arr4)

print(arr5)

print(arr3+arr4)

print(arr3+arr5)

\[ 1 2 3

4 5 6

7 8 9

10 11 12\]

1 2 3

\[1

2

3

4\]

\[ 2 4 6

5 7 9

8 10 12

11 13 15\]

\[ 2 3 4

6 7 8

10 11 12

14 15 16\]

利用 NumPy 进行统计分析

import numpy as np

读写二进制文件

arr1 = np.arange(1, 13).reshape([4, 3])

arr2 = np.arange(1, 13).reshape([3, 4])

print(arr1)

print(arr2)

\[ 1 2 3

4 5 6

7 8 9

10 11 12\]

\[ 1 2 3 4

5 6 7 8

9 10 11 12\]

读写txt文件

np.savetxt('tmp/arr1.txt', arr1, delimiter=',') # 保存数据

np.loadtxt('tmp/arr1.txt', delimiter=',') # 读取数据

array([[ 1., 2., 3.],

4., 5., 6.\], \[ 7., 8., 9.\], \[10., 11., 12.\]\]) # 使用数组进行简单统计分析 arr3 = np.random.randint(1, 10, (3, 4)) print(arr3) arr3.sort(axis=0) print(arr3) print(arr3.argsort(axis=0)) print(np.tile(arr3, 2)) print(np.repeat(arr3, 2, axis=1)) print(arr3.mean()) print(arr3.mean(axis=0)) print(arr3.max(axis=0)) print(arr3.argmax(axis=0)) \[\[8 6 7 3

2 9 9 4

2 5 2 3\]

\[2 5 2 3

2 6 7 3

8 9 9 4\]

\[0 0 0 0

1 1 1 1

2 2 2 2\]

\[2 5 2 3 2 5 2 3

2 6 7 3 2 6 7 3

8 9 9 4 8 9 9 4\]

\[2 2 5 5 2 2 3 3

2 2 6 6 7 7 3 3

8 8 9 9 9 9 4 4\]

5.0

4. 6.66666667 6. 3.33333333

8 9 9 4

2 2 2 2

Pandas

import pandas as pd

读取文本数据

pd.read_csv?

data_txt = pd.read_csv('data/meal_order_info.txt',sep=' ')

data_csv = pd.read_csv('data/meal_order_info.csv', encoding='gbk', header=0)

data_csv

将数据框存储为文本文件数据

data_csv.to_csv('tmp/data_csv.csv',index=None, encoding='gbk')

data_csv

读取Excel文件

data_excel = pd.read_excel('data/meal_order_detail.xlsx',sheet_name='meal_order_detail2')

data_excel

data_excel.to_excel('tmp/data_excel.xlsx', index=None, sheet_name='test1')

掌握DataFrame的常用操作

import pandas as pd

Series系列

ser1 = pd.Series([1,2,'a'],index=['a','b','c'])

print(ser1)

ser2 = pd.Series({'a':[1,2,3],'b':['1','2','3']})

print(ser2)

a 1

b 2

c a

dtype: object

a [1, 2, 3]

b [1, 2, 3]

dtype: object

构造数据框(DataFrame)

d=[[1.3,2.0,3,4],[2,4,1,4],[2,5,1.9,7],[3,1,0,11]]

print(d)

df = pd.DataFrame(d, index=['a', 'b', 'c', 'd'], columns=['A', 'B', 'C', 'D'])

print(df)

d={'color':['blue','green','yellow','red','white'],

'object':['ball','pen','pencil','paper','mug'],

'price':[1.2,1.0,0.6,0.9,1.7]}

frame = pd.DataFrame(d,index=['a','b','c','d','e'])

print(frame)

print(pd.DataFrame(index=[1, 2], columns=[1, 2]))

print(pd.DataFrame(1, index=[1, 2], columns=[1, 2]))

\[1.3, 2.0, 3, 4\], \[2, 4, 1, 4\], \[2, 5, 1.9, 7\], \[3, 1, 0, 11\]

A B C D

a 1.3 2.0 3.0 4

b 2.0 4.0 1.0 4

c 2.0 5.0 1.9 7

d 3.0 1.0 0.0 11

color object price

a blue ball 1.2

b green pen 1.0

c yellow pencil 0.6

d red paper 0.9

e white mug 1.7

1 2

1 NaN NaN

2 NaN NaN

1 2

1 1 1

2 1 1

数据框的常用属性

d=[[1.3,2.0,3,4],[2,4,1,4],[2,5,1.9,7],[3,1,0,11]]

df = pd.DataFrame(d, index=['a', 'b', 'c', 'd'], columns=['A', 'B', 'C', 'D'])

print(df)

print(df.values)

print(df.index)

print(df.shape)

print(df.dtypes)

A B C D

a 1.3 2.0 3.0 4

b 2.0 4.0 1.0 4

c 2.0 5.0 1.9 7

d 3.0 1.0 0.0 11

\[ 1.3 2. 3. 4.

2. 4. 1. 4.

2. 5. 1.9 7.

3. 1. 0. 11. \]

Index(['a', 'b', 'c', 'd'], dtype='object')

(4, 4)

A float64

B float64

C float64

D int64

dtype: object

数据框的查改增删操作

import pandas as pd

访问数据框中的元素

d=[[1.3,2.0,3,4],[2,4,1,4],[2,5,1.9,7],[3,1,0,11]]

df = pd.DataFrame(d, index=['a', 'b', 'c', 'd'], columns=['A', 'B', 'C', 'D'])

print(df)

print(df['A']) # 单列数据访问

print(df[['A', 'C']]) # 多列数据访问

A B C D

a 1.3 2.0 3.0 4

b 2.0 4.0 1.0 4

c 2.0 5.0 1.9 7

d 3.0 1.0 0.0 11

a 1.3

b 2.0

c 2.0

d 3.0

Name: A, dtype: float64

A C

a 1.3 3.0

b 2.0 1.0

c 2.0 1.9

d 3.0 0.0

print(df.head(3)) # 访问某几行数据

print(df.tail(3))

A B C D

a 1.3 2.0 3.0 4

b 2.0 4.0 1.0 4

c 2.0 5.0 1.9 7

A B C D

b 2.0 4.0 1.0 4

c 2.0 5.0 1.9 7

d 3.0 1.0 0.0 11

print(df)

print(df.iloc[0, 0]) # 按照行列顺序进行数据访问

print(df.iloc[0:3, 0])

print(df.iloc[:, 0])

print(df.iloc[0, :])

print(df.iloc[1:3, 1:3])

A B C D

a 1.3 2.0 3.0 4

b 2.0 4.0 1.0 4

c 2.0 5.0 1.9 7

d 3.0 1.0 0.0 11

1.3

a 1.3

b 2.0

c 2.0

Name: A, dtype: float64

a 1.3

b 2.0

c 2.0

d 3.0

Name: A, dtype: float64

A 1.3

B 2.0

C 3.0

D 4.0

Name: a, dtype: float64

B C

b 4.0 1.0

c 5.0 1.9

<class 'pandas.core.series.Series'>

print(df.loc['a', 'A']) # 按照行列名称进行数据访问

print(df.loc['a':'c', 'A'])

print(df.loc[:, 'A'])

print(df.loc['a', :])

print(df.loc[['b','c'], ['B', 'C']])

1.3

a 1.3

b 2.0

c 2.0

Name: A, dtype: float64

a 1.3

b 2.0

c 2.0

d 3.0

Name: A, dtype: float64

A 1.3

B 2.0

C 3.0

D 4.0

Name: a, dtype: float64

B C

b 4.0 1.0

c 5.0 1.9

注意如下方式返回值的区别

print(df.iloc[:, 0])

print(df.iloc[:, 0:1])

print(type(df.iloc[:, 0]))

print(type(df.iloc[:, 0:1]))

a 1.3

b 2.0

c 2.0

d 3.0

Name: A, dtype: float64

A

a 1.3

b 2.0

c 2.0

d 3.0

<class 'pandas.core.series.Series'>

<class 'pandas.core.frame.DataFrame'>

修改数据框中的元素

d=[[1.3,2.0,3,4],[2,4,1,4],[2,5,1.9,7],[3,1,0,11]]

df = pd.DataFrame(d, index=['a', 'b', 'c', 'd'], columns=['A', 'B', 'C', 'D'])

print(df)

df.loc['a', 'A'] = 101

df.loc[:, 'B'] = 0.25

df.loc[:, 'C'] = [1, 2, 3, 4]

print(df)

A B C D

a 1.3 2.0 3.0 4

b 2.0 4.0 1.0 4

c 2.0 5.0 1.9 7

d 3.0 1.0 0.0 11

A B C D

a 101.0 0.25 1 4

b 2.0 0.25 2 4

c 2.0 0.25 3 7

d 3.0 0.25 4 11

C:\Users\Administrator\AppData\Local\Temp\ipykernel_70148\2679651701.py:7: DeprecationWarning: In a future version, `df.iloc[:, i] = newvals` will attempt to set the values inplace instead of always setting a new array. To retain the old behavior, use either `df[df.columns[i]] = newvals` or, if columns are non-unique, `df.isetitem(i, newvals)`

df.loc[:, 'C'] = [1, 2, 3, 4]

为数据框增添数据

d=[[1.3,2.0,3,4],[2,4,1,4],[2,5,1.9,7],[3,1,0,11]]

df = pd.DataFrame(d, index=['a', 'b', 'c', 'd'], columns=['A', 'B', 'C', 'D'])

print(df)

df['E'] = 5

df['F'] = [1, 2, 3, 4]

print(df)

A B C D

a 1.3 2.0 3.0 4

b 2.0 4.0 1.0 4

c 2.0 5.0 1.9 7

d 3.0 1.0 0.0 11

A B C D E F

a 1.3 2.0 3.0 4 5 1

b 2.0 4.0 1.0 4 5 2

c 2.0 5.0 1.9 7 5 3

d 3.0 1.0 0.0 11 5 4

import pandas as pd

删除数据框中的元素

d=[[1.3,2.0,3,4],[2,4,1,4],[2,5,1.9,7],[3,1,0,11]]

df = pd.DataFrame(d, index=['a', 'b', 'c', 'd'], columns=['A', 'B', 'C', 'D'])

print(df)

print(df.drop('D'))

print(df.drop('D', axis=1, inplace=False)) # 删除数据框的列元素

print(df)

print(df.drop(['a', 'c'], axis=0)) # 输出数据框的行元素

A B C D

a 1.3 2.0 3.0 4

b 2.0 4.0 1.0 4

c 2.0 5.0 1.9 7

d 3.0 1.0 0.0 11

A B C

a 1.3 2.0 3.0

b 2.0 4.0 1.0

c 2.0 5.0 1.9

d 3.0 1.0 0.0

A B C D

a 1.3 2.0 3.0 4

b 2.0 4.0 1.0 4

c 2.0 5.0 1.9 7

d 3.0 1.0 0.0 11

A B C D

b 2.0 4.0 1.0 4

d 3.0 1.0 0.0 11

描述分析DataFrame数据

import numpy as np

import pandas as pd

d=[[1.3,2.0,3,4],[2,4,1,4],[2,5,1.9,7],[3,1,0,11]]

df = pd.DataFrame(d, index=['a', 'b', 'c', 'd'], columns=['A', 'B', 'C', 'D'])

print(df)

print(np.mean(df, axis=1))

print(df.mean(axis=1))

print(df.std())

print(df.describe())

print(df.T.describe())

df['A'].value_counts()

转换与处理时间序列数据

import pandas as pd

order = pd.read_csv('data/meal_order_info.csv', encoding='gbk')

print(order)

print(order['lock_time'].dtypes)

order['lock_time'] = pd.to_datetime(order['lock_time'])

print(order['lock_time'].dtypes)

print(pd.DatetimeIndex(order['lock_time']))

print(pd.PeriodIndex(order['lock_time'], freq='H'))

order['lock_time']

print(order['lock_time'][0].year) # 获取数据年份信息

print(order['lock_time'].dt.year) # 获取数据年份信息

print(order['lock_time'].dt.month) # 获取数据月份信息

print(order['lock_time'].dt.week) # 获取数据周次信息

print(order['lock_time'] + pd.Timedelta(days=1)) # 时间平移

print(order['lock_time'][1] - order['lock_time'][0]) # 求时间差别

使用分组聚合进行组内计算

import pandas as pd

detail = pd.read_excel('data/meal_order_detail.xlsx')

detail.head()

detail_group = detail[['order_id', 'counts', 'amounts']].groupby(by='order_id') # 分组操作

detail_group.agg('mean').head(3) # 对分组数据的所有列都执行mean操作

detail_group.agg(['mean', 'sum']).head(3) # 对分组数据的所有列都执行mean和sum操作

detail_group.agg({'counts': ['mean', np.max], 'amounts': 'std'}).head(3) # 对分组数据的不同列执行不同操作

detail_group.agg({'counts': lambda x: sum(x)**2}).head(3) # 将自定义函数放入聚合操作中

创建透视表与交叉表

import pandas as pd

detail = pd.read_excel('data/meal_order_detail.xlsx')

detail.head()

pd.pivot_table(detail[['order_id', 'counts', 'amounts']], index='order_id', aggfunc='sum').head(3)

pd.pivot_table(detail[['order_id', 'dishes_name', 'counts']], index='order_id', columns='dishes_name',aggfunc='sum').head(3)

pd.pivot_table(detail[['order_id', 'dishes_name', 'counts']], index='order_id', columns='dishes_name',values='counts', fill_value=0).head()

pd.crosstab(index=detail['order_id'], columns=detail['dishes_name']).head(3)

pd.crosstab(index=detail['order_id'], columns=detail['dishes_name'], values=detail['counts'], aggfunc='sum').fillna(0).head(3)

Matplotlib

python 复制代码
import numpy as np
import matplotlib.pyplot as plt
import matplotlib

x = np.arange(0, 1.1, 0.1)
print(x)
plt.figure()        # 第一环节,创建画布
plt.plot(x, x**2)   # 第二环节,绘制图形
plt.plot(x, x**4)
plt.show()          # 第三环节,显示图形
python 复制代码
plt.figure

x = np.arange(0, 1.1, 0.1)
print(x)
plt.figure()        # 第一环节,创建画布
plt.plot(x, x**2)   # 第二环节,绘制图形
plt.plot(x, x**4)
plt.xlim(0, 1)
plt.ylim(0, 1)
plt.title('lines')
plt.xlabel('x')
plt.ylabel('y')
plt.legend(['y=x^2', 'y=x^4'])

plt.savefig('tmp/examplt.png')
plt.show()          # 第三环节,显示图形
python 复制代码
import numpy as np
import matplotlib.pyplot as plt

data = np.load('国民经济核算季度数据.npz', allow_pickle=True)
columns = data['columns']
values = data['values']
print(columns)
print(values)
data['values'].shape


# 绘制散点图
plt.figure(figsize=(8, 6))
plt.rcParams['font.sans-serif'] = 'SimHei'  # 设置中文显示
plt.rcParams['axes.unicode_minus'] = False

plt.scatter(values[:, 1], values[:, 3], marker='o')
plt.scatter(values[:, 1], values[:, 4], marker='*')
plt.scatter(values[:, 1], values[:, 5], marker='D')

plt.xticks(range(0, 70, 4), values[range(0, 70, 4), 1], rotation=45)
plt.legend(['第一产业生产总值', '第二产业生产总值', '第三产业生产总值'])
plt.title('2000-2017年各产业生产总值散点图')
plt.ylabel('生产总值(亿元)')
plt.savefig('tmp/2000-2017年各产业生产总值散点图.png')
plt.show()
python 复制代码
# 绘制折线图
plt.figure(figsize=(8, 6))
plt.rcParams['font.sans-serif'] = 'SimHei'  # 设置中文显示
plt.rcParams['axes.unicode_minus'] = False

plt.plot(values[:, 1], values[:, 3], linestyle='solid')
plt.plot(values[:, 1], values[:, 4], marker='*')
plt.plot(values[:, 1], values[:, 5], marker='D')

plt.xticks(range(0, 70, 4), values[range(0, 70, 4), 1], rotation=45)
plt.legend(['第一产业生产总值', '第二产业生产总值', '第三产业生产总值'])
plt.title('2000-2017年各产业生产总值折线图')
plt.ylabel('生产总值(亿元)')
plt.savefig('tmp/2000-2017年各产业生产总值折线图.png')
plt.show()
python 复制代码
# 绘制直方图
plt.figure(figsize=(8, 6))
plt.rcParams['font.sans-serif'] = 'SimHei'  # 设置中文显示
plt.rcParams['axes.unicode_minus'] = False

plt.title('2017年第一季度各产业生产总值直方图')
plt.ylabel('生产总值(亿元)')
plt.bar(columns[3:6], values[-1, 3:6])
my_height = values[-1, 3:6]
for i in range(len(my_height)):
    plt.text(i, my_height[i]+1000, my_height[i], va='bottom', ha='center')

plt.show()
python 复制代码
# 绘制饼图
plt.figure(figsize=(6, 6))
plt.rcParams['font.sans-serif'] = 'SimHei'  # 设置中文显示
plt.rcParams['axes.unicode_minus'] = False

labels = ['第一产业', '第二产业', '第三产业']

plt.pie(values[-1, 3:6], explode=[0.01, 0.01, 0.01], labels=labels, autopct='%1.1f%%')
plt.title('2017年第一季度各产业生产总值饼图')
plt.show()
python 复制代码
# 绘制箱线图
plt.figure(figsize=(6, 6))
plt.rcParams['font.sans-serif'] = 'SimHei'  # 设置中文显示
plt.rcParams['axes.unicode_minus'] = False

labels = ['第一产业', '第二产业', '第三产业']
plt.boxplot(values[:, 3:6], notch=True, labels=labels)
plt.show()
python 复制代码
plt.figure(figsize=(6, 6))
plt.rcParams['font.sans-serif'] = 'SimHei'  # 设置中文显示
plt.rcParams['axes.unicode_minus'] = False

labels = ['第一产业', '第二产业', '第三产业']
plt.boxplot(values[:, 3], notch=True)
plt.show()

Requests库

相关推荐
扯淡的闲人2 分钟前
多语言编码Agent解决方案(2)-后端服务实现
开发语言·python·深度学习
蒋星熠6 分钟前
深度学习实战指南:从神经网络基础到模型优化的完整攻略
人工智能·python·深度学习·神经网络·机器学习·卷积神经网络·transformer
万粉变现经纪人35 分钟前
如何解决pip安装报错ModuleNotFoundError: No module named ‘cuml’问题
python·scrapy·beautifulsoup·pandas·ai编程·pip·scipy
吴秋霖35 分钟前
主流反爬虫、反作弊防护与风控对抗手段
爬虫·算法·反爬虫技术
IT学长编程38 分钟前
计算机毕业设计 基于Hadoop豆瓣电影数据可视化分析设计与实现 Python 大数据毕业设计 Hadoop毕业设计选题【附源码+文档报告+安装调试
大数据·hadoop·python·django·毕业设计·毕业论文·豆瓣电影数据可视化分析
java1234_小锋1 小时前
Scikit-learn Python机器学习 - 分类算法 - K-近邻(KNN)算法
python·算法·机器学习
大翻哥哥1 小时前
Python上下文管理器进阶指南:不仅仅是with语句
前端·javascript·python
QiZhang | UESTC1 小时前
JAVA算法练习题day11
java·开发语言·python·算法·hot100
PyHaVolask1 小时前
Python进阶教程:随机数、正则表达式与异常处理
python·正则表达式·异常处理·随机数生成
折翼的恶魔2 小时前
数据分析:合并二
python·数据分析·pandas