Python爬虫

Numpy

创建数组

import numpy as np

创建数组

arr1=np.array([1,2,3])

arr2=np.array([[3,4,5],[2,4,1]])

print(arr1)

print(arr2)

print(type(arr1))

1 2 3

\[3 4 5

2 4 1\]

<class 'numpy.ndarray'>

查看数组的基础属性

print(arr1.shape)

print(arr1.ndim)

print(arr1.dtype)

print(arr2.shape)

print(arr2.ndim)

print(arr2.dtype)

(3,)

1

int32

(2, 3)

2

int32

初识数组特点

list1=([0.3,0.5,4.2])

arr1=np.array([0.3,0.5,4.2])

print(list1)

print(arr1)

print(list1 ** 2)

print([i**2 for i in list1])

print(arr1 ** 2)

0.3, 0.5, 4.2

0.3 0.5 4.2

0.09, 0.25, 17.64

0.09 0.25 17.64

创建常见数组

arr3=np.arange(0,10)

arr4=np.arange(10)

arr5=np.arange(0,1,0.1)

print(arr3)

print(arr4)

print(arr5)

0 1 2 3 4 5 6 7 8 9

0 1 2 3 4 5 6 7 8 9

0. 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9

arr6=np.linspace(0,1,10)

print(arr6)

0. 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.

arr7=np.zeros([3,4,5])

print(arr7)

\[\[ 0. 0. 0. 0. 0.

0. 0. 0. 0. 0.

0. 0. 0. 0. 0.

0. 0. 0. 0. 0.\]

\[ 0. 0. 0. 0. 0.

0. 0. 0. 0. 0.

0. 0. 0. 0. 0.

0. 0. 0. 0. 0.\]

\[ 0. 0. 0. 0. 0.

0. 0. 0. 0. 0.

0. 0. 0. 0. 0.

0. 0. 0. 0. 0.\]\]

arr8=np.ones([3,4,5])

print(arr8)

\[\[ 1. 1. 1. 1. 1.

1. 1. 1. 1. 1.

1. 1. 1. 1. 1.

1. 1. 1. 1. 1.\]

\[ 1. 1. 1. 1. 1.

1. 1. 1. 1. 1.

1. 1. 1. 1. 1.

1. 1. 1. 1. 1.\]

\[ 1. 1. 1. 1. 1.

1. 1. 1. 1. 1.

1. 1. 1. 1. 1.

1. 1. 1. 1. 1.\]\]

数组的数据类型

arr8=np.array([3,4,5],dtype=np.float) # 声明数组的数据类型

print(arr8)

print(arr8.dtype)

arr8[0]=1.2

print(arr8)

print(np.int32(arr8)) # 转换数组的数据类型

3. 4. 5.

float64

1.2 4. 5.

1 4 5

生成随机数

print(np.random.random(10))

0.73744669 0.80080002 0.69015703 0.51743988 0.05570415 0.34901843 0.0734643 0.86541768 0.18830607 0.07036505

print(np.random.rand(10))

0.27580848 0.69278954 0.12912278 0.53180506 0.21617686 0.4357371 0.92604774 0.19278177 0.81396217 0.15550441

print(np.random.rand(3,4))

print(np.random.randn(3,4))

\[ 0.03654586 0.23675581 0.35543946 0.31476077

0.12875907 0.78566879 0.87653987 0.78687239

0.79805071 0.46032083 0.08375826 0.60476404\]

\[ 0.22623064 -0.34199973 -0.50766523 0.01726679

0.71462127 -1.19509683 -0.3916739 0.67375221

-1.02150652 0.98995901 1.64691806 0.81784057\]

数组的索引

arr1=np.array([0.3,0.78,0.24,5,3.2])

print(arr1)

print(arr1[0])

print(arr1[-5])

print(arr1[1:2])

print(arr1[-4:-2])

0.3 0.78 0.24 5. 3.2

0.3

0.3

0.78

0.78 0.24

逻辑型索引

arr2=np.array([2.3,1.8,4.5])

print(arr2)

print(arr2[[False,False,True]])

index=arr2>2

print(arr2[index])

2.3 1.8 4.5

4.5

2.3 4.5

多维数组的索引

arr3=np.arange(1,13).reshape([3,4])

print(arr3)

print(arr3[2,3])

print(arr3[2,0:])

print(arr3[:,3])

print(arr3[1:,1:3])

\[ 1 2 3 4

5 6 7 8

9 10 11 12\]

12

9 10 11 12

4 8 12

\[ 6 7

10 11\]

\[ 5 6 7 8

9 10 11 12\]

False True True

修改数组中的元素

arr3=np.arange(1,13).reshape([3,4])

print(arr3)

arr3[0,0]=15

print(arr3)

\[ 1 2 3 4

5 6 7 8

9 10 11 12\]

\[15 2 3 4

5 6 7 8

9 10 11 12\]

求解距离矩阵

n=10 # 样本个数

x=np.linspace(1,100,n) # 样本的横坐标

y=np.linspace(1,100,n) # 样本的纵坐标

dist=np.sqrt((x[0]-x[1])**2+(y[0]-y[1])**2)

dist = np.zeros([n, n]) # 初始距离矩阵

for i in range(n):

for j in range(n):

dist[i, j] = np.sqrt((x[i] - x[j])**2 + (y[i] - y[j])**2) # 计算欧式距离

print(x)

print(y)

print(dist)

1. 12. 23. 34. 45. 56. 67. 78. 89. 100.

1. 12. 23. 34. 45. 56. 67. 78. 89. 100.

\[ 0. 15.55634919 31.11269837 46.66904756 62.22539674 77.78174593 93.33809512 108.8944443 124.45079349 140.00714267

15.55634919 0. 15.55634919 31.11269837 46.66904756 62.22539674 77.78174593 93.33809512 108.8944443 124.45079349

31.11269837 15.55634919 0. 15.55634919 31.11269837 46.66904756 62.22539674 77.78174593 93.33809512 108.8944443

46.66904756 31.11269837 15.55634919 0. 15.55634919 31.11269837 46.66904756 62.22539674 77.78174593 93.33809512

62.22539674 46.66904756 31.11269837 15.55634919 0. 15.55634919 31.11269837 46.66904756 62.22539674 77.78174593

77.78174593 62.22539674 46.66904756 31.11269837 15.55634919 0. 15.55634919 31.11269837 46.66904756 62.22539674

93.33809512 77.78174593 62.22539674 46.66904756 31.11269837 15.55634919 0. 15.55634919 31.11269837 46.66904756

108.8944443 93.33809512 77.78174593 62.22539674 46.66904756 31.11269837 15.55634919 0. 15.55634919 31.11269837

124.45079349 108.8944443 93.33809512 77.78174593 62.22539674 46.66904756 31.11269837 15.55634919 0. 15.55634919

140.00714267 124.45079349 108.8944443 93.33809512 77.78174593 62.22539674 46.66904756 31.11269837 15.55634919 0. \]

数组形态变化

arr4=np.arange(1,13)

print(arr4)

print(arr4.reshape([3,4]))

arr5=arr4.reshape([3,4])

print(arr5.ravel()) # 数组的展平

print(arr5.flatten('F')) #数组的纵向展平

arr6=arr4.reshape([3,4])

print(np.hstack((arr5,arr6))) # 数组的横向拼接

print(np.vstack((arr5,arr6))) # 数组的纵向拼接

1 2 3 4 5 6 7 8 9 10 11 12

\[ 1 2 3 4

5 6 7 8

9 10 11 12\]

1 2 3 4 5 6 7 8 9 10 11 12

1 5 9 2 6 10 3 7 11 4 8 12

\[ 1 2 3 4 1 2 3 4

5 6 7 8 5 6 7 8

9 10 11 12 9 10 11 12\]

\[ 1 2 3 4

5 6 7 8

9 10 11 12

1 2 3 4

5 6 7 8

9 10 11 12\]

掌握 NumPy 矩阵与通用函数

import numpy as np

matr1 = np.mat("1 2 3;4 5 6;7 8 9")

print(type(matr1))

matr2 = np.matrix([[1, 2, 3],[4, 5, 6],[7, 8, 9]])

np.bmat('matr1 matr2; matr1, matr2')

<class 'numpy.matrix'>

matrix([[1, 2, 3, 1, 2, 3],

4, 5, 6, 4, 5, 6\], \[7, 8, 9, 7, 8, 9\], \[1, 2, 3, 1, 2, 3\], \[4, 5, 6, 4, 5, 6\], \[7, 8, 9, 7, 8, 9\]\]) # 通用函数 arr1 = np.array(\[0.2, 0.4, 0.6\]) arr2=np.array(\[0.2,0.6,0.78\]) list1 = \[0.2, 0.4, 0.6

print(arr1+1)

print(arr1-1)

print(arr1*2)

print(arr1/2)

print(list1*2)

print(arr1+arr2)

print(arr1>0)

print(arr1<arr2)

print(np.any(arr1==0.2))

print(np.all(arr2==0.2))

1.2 1.4 1.6

-0.8 -0.6 -0.4

0.4 0.8 1.2

0.1 0.2 0.3

0.2, 0.4, 0.6, 0.2, 0.4, 0.6

0.4 1. 1.38

True True True

False True True

True

False

广播机制

arr3 = np.arange(1, 13).reshape([4, 3])

arr4 = np.array([1, 2, 3])

arr5 = np.array([[1], [2], [3], [4]])

print(arr3)

print(arr4)

print(arr5)

print(arr3+arr4)

print(arr3+arr5)

\[ 1 2 3

4 5 6

7 8 9

10 11 12\]

1 2 3

\[1

2

3

4\]

\[ 2 4 6

5 7 9

8 10 12

11 13 15\]

\[ 2 3 4

6 7 8

10 11 12

14 15 16\]

利用 NumPy 进行统计分析

import numpy as np

读写二进制文件

arr1 = np.arange(1, 13).reshape([4, 3])

arr2 = np.arange(1, 13).reshape([3, 4])

print(arr1)

print(arr2)

\[ 1 2 3

4 5 6

7 8 9

10 11 12\]

\[ 1 2 3 4

5 6 7 8

9 10 11 12\]

读写txt文件

np.savetxt('tmp/arr1.txt', arr1, delimiter=',') # 保存数据

np.loadtxt('tmp/arr1.txt', delimiter=',') # 读取数据

array([[ 1., 2., 3.],

4., 5., 6.\], \[ 7., 8., 9.\], \[10., 11., 12.\]\]) # 使用数组进行简单统计分析 arr3 = np.random.randint(1, 10, (3, 4)) print(arr3) arr3.sort(axis=0) print(arr3) print(arr3.argsort(axis=0)) print(np.tile(arr3, 2)) print(np.repeat(arr3, 2, axis=1)) print(arr3.mean()) print(arr3.mean(axis=0)) print(arr3.max(axis=0)) print(arr3.argmax(axis=0)) \[\[8 6 7 3

2 9 9 4

2 5 2 3\]

\[2 5 2 3

2 6 7 3

8 9 9 4\]

\[0 0 0 0

1 1 1 1

2 2 2 2\]

\[2 5 2 3 2 5 2 3

2 6 7 3 2 6 7 3

8 9 9 4 8 9 9 4\]

\[2 2 5 5 2 2 3 3

2 2 6 6 7 7 3 3

8 8 9 9 9 9 4 4\]

5.0

4. 6.66666667 6. 3.33333333

8 9 9 4

2 2 2 2

Pandas

import pandas as pd

读取文本数据

pd.read_csv?

data_txt = pd.read_csv('data/meal_order_info.txt',sep=' ')

data_csv = pd.read_csv('data/meal_order_info.csv', encoding='gbk', header=0)

data_csv

将数据框存储为文本文件数据

data_csv.to_csv('tmp/data_csv.csv',index=None, encoding='gbk')

data_csv

读取Excel文件

data_excel = pd.read_excel('data/meal_order_detail.xlsx',sheet_name='meal_order_detail2')

data_excel

data_excel.to_excel('tmp/data_excel.xlsx', index=None, sheet_name='test1')

掌握DataFrame的常用操作

import pandas as pd

Series系列

ser1 = pd.Series([1,2,'a'],index=['a','b','c'])

print(ser1)

ser2 = pd.Series({'a':[1,2,3],'b':['1','2','3']})

print(ser2)

a 1

b 2

c a

dtype: object

a [1, 2, 3]

b [1, 2, 3]

dtype: object

构造数据框(DataFrame)

d=[[1.3,2.0,3,4],[2,4,1,4],[2,5,1.9,7],[3,1,0,11]]

print(d)

df = pd.DataFrame(d, index=['a', 'b', 'c', 'd'], columns=['A', 'B', 'C', 'D'])

print(df)

d={'color':['blue','green','yellow','red','white'],

'object':['ball','pen','pencil','paper','mug'],

'price':[1.2,1.0,0.6,0.9,1.7]}

frame = pd.DataFrame(d,index=['a','b','c','d','e'])

print(frame)

print(pd.DataFrame(index=[1, 2], columns=[1, 2]))

print(pd.DataFrame(1, index=[1, 2], columns=[1, 2]))

\[1.3, 2.0, 3, 4\], \[2, 4, 1, 4\], \[2, 5, 1.9, 7\], \[3, 1, 0, 11\]

A B C D

a 1.3 2.0 3.0 4

b 2.0 4.0 1.0 4

c 2.0 5.0 1.9 7

d 3.0 1.0 0.0 11

color object price

a blue ball 1.2

b green pen 1.0

c yellow pencil 0.6

d red paper 0.9

e white mug 1.7

1 2

1 NaN NaN

2 NaN NaN

1 2

1 1 1

2 1 1

数据框的常用属性

d=[[1.3,2.0,3,4],[2,4,1,4],[2,5,1.9,7],[3,1,0,11]]

df = pd.DataFrame(d, index=['a', 'b', 'c', 'd'], columns=['A', 'B', 'C', 'D'])

print(df)

print(df.values)

print(df.index)

print(df.shape)

print(df.dtypes)

A B C D

a 1.3 2.0 3.0 4

b 2.0 4.0 1.0 4

c 2.0 5.0 1.9 7

d 3.0 1.0 0.0 11

\[ 1.3 2. 3. 4.

2. 4. 1. 4.

2. 5. 1.9 7.

3. 1. 0. 11. \]

Index(['a', 'b', 'c', 'd'], dtype='object')

(4, 4)

A float64

B float64

C float64

D int64

dtype: object

数据框的查改增删操作

import pandas as pd

访问数据框中的元素

d=[[1.3,2.0,3,4],[2,4,1,4],[2,5,1.9,7],[3,1,0,11]]

df = pd.DataFrame(d, index=['a', 'b', 'c', 'd'], columns=['A', 'B', 'C', 'D'])

print(df)

print(df['A']) # 单列数据访问

print(df[['A', 'C']]) # 多列数据访问

A B C D

a 1.3 2.0 3.0 4

b 2.0 4.0 1.0 4

c 2.0 5.0 1.9 7

d 3.0 1.0 0.0 11

a 1.3

b 2.0

c 2.0

d 3.0

Name: A, dtype: float64

A C

a 1.3 3.0

b 2.0 1.0

c 2.0 1.9

d 3.0 0.0

print(df.head(3)) # 访问某几行数据

print(df.tail(3))

A B C D

a 1.3 2.0 3.0 4

b 2.0 4.0 1.0 4

c 2.0 5.0 1.9 7

A B C D

b 2.0 4.0 1.0 4

c 2.0 5.0 1.9 7

d 3.0 1.0 0.0 11

print(df)

print(df.iloc[0, 0]) # 按照行列顺序进行数据访问

print(df.iloc[0:3, 0])

print(df.iloc[:, 0])

print(df.iloc[0, :])

print(df.iloc[1:3, 1:3])

A B C D

a 1.3 2.0 3.0 4

b 2.0 4.0 1.0 4

c 2.0 5.0 1.9 7

d 3.0 1.0 0.0 11

1.3

a 1.3

b 2.0

c 2.0

Name: A, dtype: float64

a 1.3

b 2.0

c 2.0

d 3.0

Name: A, dtype: float64

A 1.3

B 2.0

C 3.0

D 4.0

Name: a, dtype: float64

B C

b 4.0 1.0

c 5.0 1.9

<class 'pandas.core.series.Series'>

print(df.loc['a', 'A']) # 按照行列名称进行数据访问

print(df.loc['a':'c', 'A'])

print(df.loc[:, 'A'])

print(df.loc['a', :])

print(df.loc[['b','c'], ['B', 'C']])

1.3

a 1.3

b 2.0

c 2.0

Name: A, dtype: float64

a 1.3

b 2.0

c 2.0

d 3.0

Name: A, dtype: float64

A 1.3

B 2.0

C 3.0

D 4.0

Name: a, dtype: float64

B C

b 4.0 1.0

c 5.0 1.9

注意如下方式返回值的区别

print(df.iloc[:, 0])

print(df.iloc[:, 0:1])

print(type(df.iloc[:, 0]))

print(type(df.iloc[:, 0:1]))

a 1.3

b 2.0

c 2.0

d 3.0

Name: A, dtype: float64

A

a 1.3

b 2.0

c 2.0

d 3.0

<class 'pandas.core.series.Series'>

<class 'pandas.core.frame.DataFrame'>

修改数据框中的元素

d=[[1.3,2.0,3,4],[2,4,1,4],[2,5,1.9,7],[3,1,0,11]]

df = pd.DataFrame(d, index=['a', 'b', 'c', 'd'], columns=['A', 'B', 'C', 'D'])

print(df)

df.loc['a', 'A'] = 101

df.loc[:, 'B'] = 0.25

df.loc[:, 'C'] = [1, 2, 3, 4]

print(df)

A B C D

a 1.3 2.0 3.0 4

b 2.0 4.0 1.0 4

c 2.0 5.0 1.9 7

d 3.0 1.0 0.0 11

A B C D

a 101.0 0.25 1 4

b 2.0 0.25 2 4

c 2.0 0.25 3 7

d 3.0 0.25 4 11

C:\Users\Administrator\AppData\Local\Temp\ipykernel_70148\2679651701.py:7: DeprecationWarning: In a future version, `df.iloc[:, i] = newvals` will attempt to set the values inplace instead of always setting a new array. To retain the old behavior, use either `df[df.columns[i]] = newvals` or, if columns are non-unique, `df.isetitem(i, newvals)`

df.loc[:, 'C'] = [1, 2, 3, 4]

为数据框增添数据

d=[[1.3,2.0,3,4],[2,4,1,4],[2,5,1.9,7],[3,1,0,11]]

df = pd.DataFrame(d, index=['a', 'b', 'c', 'd'], columns=['A', 'B', 'C', 'D'])

print(df)

df['E'] = 5

df['F'] = [1, 2, 3, 4]

print(df)

A B C D

a 1.3 2.0 3.0 4

b 2.0 4.0 1.0 4

c 2.0 5.0 1.9 7

d 3.0 1.0 0.0 11

A B C D E F

a 1.3 2.0 3.0 4 5 1

b 2.0 4.0 1.0 4 5 2

c 2.0 5.0 1.9 7 5 3

d 3.0 1.0 0.0 11 5 4

import pandas as pd

删除数据框中的元素

d=[[1.3,2.0,3,4],[2,4,1,4],[2,5,1.9,7],[3,1,0,11]]

df = pd.DataFrame(d, index=['a', 'b', 'c', 'd'], columns=['A', 'B', 'C', 'D'])

print(df)

print(df.drop('D'))

print(df.drop('D', axis=1, inplace=False)) # 删除数据框的列元素

print(df)

print(df.drop(['a', 'c'], axis=0)) # 输出数据框的行元素

A B C D

a 1.3 2.0 3.0 4

b 2.0 4.0 1.0 4

c 2.0 5.0 1.9 7

d 3.0 1.0 0.0 11

A B C

a 1.3 2.0 3.0

b 2.0 4.0 1.0

c 2.0 5.0 1.9

d 3.0 1.0 0.0

A B C D

a 1.3 2.0 3.0 4

b 2.0 4.0 1.0 4

c 2.0 5.0 1.9 7

d 3.0 1.0 0.0 11

A B C D

b 2.0 4.0 1.0 4

d 3.0 1.0 0.0 11

描述分析DataFrame数据

import numpy as np

import pandas as pd

d=[[1.3,2.0,3,4],[2,4,1,4],[2,5,1.9,7],[3,1,0,11]]

df = pd.DataFrame(d, index=['a', 'b', 'c', 'd'], columns=['A', 'B', 'C', 'D'])

print(df)

print(np.mean(df, axis=1))

print(df.mean(axis=1))

print(df.std())

print(df.describe())

print(df.T.describe())

df['A'].value_counts()

转换与处理时间序列数据

import pandas as pd

order = pd.read_csv('data/meal_order_info.csv', encoding='gbk')

print(order)

print(order['lock_time'].dtypes)

order['lock_time'] = pd.to_datetime(order['lock_time'])

print(order['lock_time'].dtypes)

print(pd.DatetimeIndex(order['lock_time']))

print(pd.PeriodIndex(order['lock_time'], freq='H'))

order['lock_time']

print(order['lock_time'][0].year) # 获取数据年份信息

print(order['lock_time'].dt.year) # 获取数据年份信息

print(order['lock_time'].dt.month) # 获取数据月份信息

print(order['lock_time'].dt.week) # 获取数据周次信息

print(order['lock_time'] + pd.Timedelta(days=1)) # 时间平移

print(order['lock_time'][1] - order['lock_time'][0]) # 求时间差别

使用分组聚合进行组内计算

import pandas as pd

detail = pd.read_excel('data/meal_order_detail.xlsx')

detail.head()

detail_group = detail[['order_id', 'counts', 'amounts']].groupby(by='order_id') # 分组操作

detail_group.agg('mean').head(3) # 对分组数据的所有列都执行mean操作

detail_group.agg(['mean', 'sum']).head(3) # 对分组数据的所有列都执行mean和sum操作

detail_group.agg({'counts': ['mean', np.max], 'amounts': 'std'}).head(3) # 对分组数据的不同列执行不同操作

detail_group.agg({'counts': lambda x: sum(x)**2}).head(3) # 将自定义函数放入聚合操作中

创建透视表与交叉表

import pandas as pd

detail = pd.read_excel('data/meal_order_detail.xlsx')

detail.head()

pd.pivot_table(detail[['order_id', 'counts', 'amounts']], index='order_id', aggfunc='sum').head(3)

pd.pivot_table(detail[['order_id', 'dishes_name', 'counts']], index='order_id', columns='dishes_name',aggfunc='sum').head(3)

pd.pivot_table(detail[['order_id', 'dishes_name', 'counts']], index='order_id', columns='dishes_name',values='counts', fill_value=0).head()

pd.crosstab(index=detail['order_id'], columns=detail['dishes_name']).head(3)

pd.crosstab(index=detail['order_id'], columns=detail['dishes_name'], values=detail['counts'], aggfunc='sum').fillna(0).head(3)

Matplotlib

python 复制代码
import numpy as np
import matplotlib.pyplot as plt
import matplotlib

x = np.arange(0, 1.1, 0.1)
print(x)
plt.figure()        # 第一环节,创建画布
plt.plot(x, x**2)   # 第二环节,绘制图形
plt.plot(x, x**4)
plt.show()          # 第三环节,显示图形
python 复制代码
plt.figure

x = np.arange(0, 1.1, 0.1)
print(x)
plt.figure()        # 第一环节,创建画布
plt.plot(x, x**2)   # 第二环节,绘制图形
plt.plot(x, x**4)
plt.xlim(0, 1)
plt.ylim(0, 1)
plt.title('lines')
plt.xlabel('x')
plt.ylabel('y')
plt.legend(['y=x^2', 'y=x^4'])

plt.savefig('tmp/examplt.png')
plt.show()          # 第三环节,显示图形
python 复制代码
import numpy as np
import matplotlib.pyplot as plt

data = np.load('国民经济核算季度数据.npz', allow_pickle=True)
columns = data['columns']
values = data['values']
print(columns)
print(values)
data['values'].shape


# 绘制散点图
plt.figure(figsize=(8, 6))
plt.rcParams['font.sans-serif'] = 'SimHei'  # 设置中文显示
plt.rcParams['axes.unicode_minus'] = False

plt.scatter(values[:, 1], values[:, 3], marker='o')
plt.scatter(values[:, 1], values[:, 4], marker='*')
plt.scatter(values[:, 1], values[:, 5], marker='D')

plt.xticks(range(0, 70, 4), values[range(0, 70, 4), 1], rotation=45)
plt.legend(['第一产业生产总值', '第二产业生产总值', '第三产业生产总值'])
plt.title('2000-2017年各产业生产总值散点图')
plt.ylabel('生产总值(亿元)')
plt.savefig('tmp/2000-2017年各产业生产总值散点图.png')
plt.show()
python 复制代码
# 绘制折线图
plt.figure(figsize=(8, 6))
plt.rcParams['font.sans-serif'] = 'SimHei'  # 设置中文显示
plt.rcParams['axes.unicode_minus'] = False

plt.plot(values[:, 1], values[:, 3], linestyle='solid')
plt.plot(values[:, 1], values[:, 4], marker='*')
plt.plot(values[:, 1], values[:, 5], marker='D')

plt.xticks(range(0, 70, 4), values[range(0, 70, 4), 1], rotation=45)
plt.legend(['第一产业生产总值', '第二产业生产总值', '第三产业生产总值'])
plt.title('2000-2017年各产业生产总值折线图')
plt.ylabel('生产总值(亿元)')
plt.savefig('tmp/2000-2017年各产业生产总值折线图.png')
plt.show()
python 复制代码
# 绘制直方图
plt.figure(figsize=(8, 6))
plt.rcParams['font.sans-serif'] = 'SimHei'  # 设置中文显示
plt.rcParams['axes.unicode_minus'] = False

plt.title('2017年第一季度各产业生产总值直方图')
plt.ylabel('生产总值(亿元)')
plt.bar(columns[3:6], values[-1, 3:6])
my_height = values[-1, 3:6]
for i in range(len(my_height)):
    plt.text(i, my_height[i]+1000, my_height[i], va='bottom', ha='center')

plt.show()
python 复制代码
# 绘制饼图
plt.figure(figsize=(6, 6))
plt.rcParams['font.sans-serif'] = 'SimHei'  # 设置中文显示
plt.rcParams['axes.unicode_minus'] = False

labels = ['第一产业', '第二产业', '第三产业']

plt.pie(values[-1, 3:6], explode=[0.01, 0.01, 0.01], labels=labels, autopct='%1.1f%%')
plt.title('2017年第一季度各产业生产总值饼图')
plt.show()
python 复制代码
# 绘制箱线图
plt.figure(figsize=(6, 6))
plt.rcParams['font.sans-serif'] = 'SimHei'  # 设置中文显示
plt.rcParams['axes.unicode_minus'] = False

labels = ['第一产业', '第二产业', '第三产业']
plt.boxplot(values[:, 3:6], notch=True, labels=labels)
plt.show()
python 复制代码
plt.figure(figsize=(6, 6))
plt.rcParams['font.sans-serif'] = 'SimHei'  # 设置中文显示
plt.rcParams['axes.unicode_minus'] = False

labels = ['第一产业', '第二产业', '第三产业']
plt.boxplot(values[:, 3], notch=True)
plt.show()

Requests库

相关推荐
Juchecar21 分钟前
分析:将现代开源浏览器的JavaScript引擎更换为Python的可行性与操作
前端·javascript·python
科大饭桶23 分钟前
昇腾AI自学Day2-- 深度学习基础工具与数学
人工智能·pytorch·python·深度学习·numpy
天才测试猿2 小时前
常见的Jmeter压测问题
自动化测试·软件测试·python·测试工具·jmeter·职场和发展·压力测试
mortimer2 小时前
一次与“顽固”外部程序的艰难交锋:subprocess 调用exe踩坑实录
windows·python·ai编程
来自天蝎座的孙孙4 小时前
洛谷P1595讲解(加强版)+错排讲解
python·算法
张子夜 iiii5 小时前
机器学习算法系列专栏:主成分分析(PCA)降维算法(初学者)
人工智能·python·算法·机器学习
跟橙姐学代码6 小时前
学Python像学做人:从基础语法到人生哲理的成长之路
前端·python
Keying,,,,6 小时前
力扣hot100 | 矩阵 | 73. 矩阵置零、54. 螺旋矩阵、48. 旋转图像、240. 搜索二维矩阵 II
python·算法·leetcode·矩阵
桃源学社(接毕设)6 小时前
基于人工智能和物联网融合跌倒监控系统(LW+源码+讲解+部署)
人工智能·python·单片机·yolov8
yunhuibin7 小时前
pycharm2025导入anaconda创建的各个AI环境
人工智能·python