1.1python——理解数据集
python入门——理解数据集(特征类型及统计)
涵盖了基础问题的架构、术语、机器学习数据集的特性
开头载入所用Python内置的模块(标准库)
_author_ = 'mike_blowels'
import urllib.request
import sys
从网页载入数据
#read data from uci
target_url = ("https://archive.ics.uci.edu/ml/machine-learning-databases/undocumented/connectionist-bench/sonar/sonar.all-data")
data = urllib.request.urlopen(target_url)
# 将数据排列为标签列表和属性列表
xlist = []
labels = []
for line in data:
# split on comma 逗号分割数据
row = line.decode().strip().split(",")
xlist.append(row)
识别标签类型(数值or字符)
nrow = len(xlist) #行
ncol = len(xlist[1]) #列
# sys.stdout.write("Number of rows"+str(len(xlist))+'\n')#输出
# sys.stdout.write("number of columes"+str(len(xlist[1])))
type = [0]*3
colcounts = []
for col in range(ncol):
for row in xlist:
try:
a = float(row[col])