环境:ubuntu20.10,python3.8
代码如下:
#coding:utf-8
from sklearn.feature_extraction import DictVectorizer, stop_words
from sklearn.feature_extraction.text import CountVectorizer
import jieba
def dict_demo():
#""""
#字典特征提取
#:return:
#""""
data = [{"city":"北京","temperature":100},
{"city":"上海","temperature":60},
{"city":"深圳","temperature":30}]
# 字典特征提取
# 1、实例化
#transfer = DictVectorizer()
transfer = DictVectorizer(sparse=False)
# 2、调用fit_transform
trans_data = transfer.fit_transform(data)
print("特征名字是:\n",transfer.get_feature_names())
print(trans_data)
def english_count_text_demo():
#"&#