@浙大疏锦行 PythonDay8.
内容:
-
字典(字典对)
*pythondict_test = dict(name='zhangsan', age=18)
-
标签编码(离散特征 && 有顺序)
-
连续特征的归一化和标准化处理
代码:
python
# Question 1 dic
dict = {'Alice': 100, 'Bob': 200, 'Charlie': 300}
# Question 2 label-Code and 连续变量编码
import pandas as pd
import numpy as np
# 独热编码
def one_hot(data, columns):
data = pd.get_dummies(data=data, columns=columns)
return data
# 标签编码
def label_encoder(data, columns):
if len(columns) == 0: # 处理异常情况
return data
value_index = list(range(len(columns))) # [1 2 3 4 5 6]
mapping = dict(zip(columns, value_index))
data[columns] = data[columns].map(mapping)
return data
# 归一化
def min_max(data, columns):
if len(columns) == 0: # 处理异常情况
return data
for column in columns:
column_data = data[column]
min_val = column_data.min()
max_val = column_data.max()
column_data = (column_data - min_val) / (max_val - min_val)
data[column] = column_data
return data
data = pd.read_csv("./data/heart.csv")
discrete_columns = []
continuous_columns = []
for column in data.columns:
if data[column].dtype == 'object':
discrete_columns.append(column)
else:
continuous_columns.append(column)
data = min_max(data, continuous_columns) # 归一化
# data = one_hot(data, discrete_columns) # 独热编码
data = label_encoder(data, discrete_columns) # 标签编码
print(data.head())