1 字典类型的特征抽取(结果是one-hot编码)
API:
from sklearn.feature_extraction import DictVectorizer
语法:
dv = DictVectorizer(sparse=False) #实例化
dv.fit_transform() # 字典 --> one hot编码
dv.inverse_transform() # one hot编码 --> 字典
dv..get_feature_names() # 获取特征的名称
例子
# 字典特征抽取
from sklearn.feature_extraction import DictVectorizer
def dict_extraciton():
data_dict = [{
'city': '北京','temperature':32},
{
'city': '上海','temperature':22},
{
'city': '深圳','temperature':17}]
dict_vectorizer = DictVectorizer(sparse=False)
one_hot_data = dict_vectorizer.fit_transform(data_dict)
print(dict_vectorizer.get_feature_names())
print(one_hot_data)
# 转回字典类型
mydict = dict_vectorizer.inverse_transform(one_hot_data)
print(mydict)
dict_extraciton()
['city=上海', 'city=北京', 'city=深圳', 'temperature']
[[ 0. 1. 0. 32.]
[ 1. 0. 0. 22.]
[ 0. 0. 1