####实验八####
#第一题
#importrandom#withopen("suiji.txt",'w+', encoding="utf-8") as pytxt:#fori in range(100000):#pytxt.write(str(random.randint(1,100)))#pytxt.write('\n')#pytxt.close()
####第二题
#importcsv#headers=['SNO','NAME','SCORE']#rows=[('004','zhangqi','98'),
# ('005','lijiu','95'),
# ('006','wangyi','92')]
#
#withopen('tongji.csv','w', encoding='utf8', newline='') as f:#writer= csv.writer(f)#writer.writerow(headers)#writer.writerows(rows)
#
#
#print('tongji的csv文件内容为:')#withopen('tongji.csv',"r", encoding='utf8', newline='') as f:#reader= csv.reader(f)#forrow in reader:#print(row)
#
#withopen("tongji.csv",'r') as pycsv:#count1=0#count2=0#count3=0#fori in pycsv.read():#ifi.isdigit():#count1+=1#ifi.islower():#count2+=1#ifi.isupper():#count3+=1#print("数字出现次数:",count1)#print("小写字母出现次数:",count2)#print("大写字母出现次数:",count3)
#
#pycsv.close()
###第三题
#importcsv#importrandom#headers=['SNO','NAME','SCORE']#rows=[('004','zhangqi',98),
# ('005','lijiu',95),
# ('006','wangyi',92)]#withopen(r'./pingjun.csv','w',encoding='utf-8', newline='') as f:#w=csv.writer(f)#w.writerow(headers)#w.writerows(rows)
#
#sum=0#n=0#withopen("pingjun.csv",'r',encoding='utf-8') as f:#r=csv.reader(f,dialect='excel',delimiter=',')#not_head=next(r)#fori in r:#sum=sum+int(i[2])#n+=1#print(i)
#
#print(f'数值的总分为:{sum},平均分为{sum/n}')#f.close()
###第四题
#withopen(r'./test-row.txt','w',encoding='utf8') as f:#fori in range(10):#ifi%2==0:#f.write('#nihao')#else:#f.write('没有#')#f.write('\n')
#
#withopen(r'./test-row.txt','r',encoding='utf8') as f:#s= f.readlines()#forrow in s:#ifnot row.startswith('#'):#print(row,end='')
###第五题
#importos#oldname=os.listdir('./')#os.chdir('./')#forname in oldname:#os.rename(name,"文件操做-"+name)
###第六题
#
#withopen(r'./yingwen.txt','w',encoding='utf-8') as f:#fori in range(5):#f.write('ABabYZ')#f.write('\n')
#
#
#withopen("yingwen.txt","r") as f:#content=f.read()#print("原文件的内容:",'\n'+content)
#
#newstr=''#fori in content:#ifi.islower():#ifi =='a'or i =='b':#i=chr(ord(i)+1)#ifi.isupper():#ifi =='A'or i =='B'or i =='Y':#i=chr(ord(i)+1)#elifi =='Z':#i=chr(65)#newstr=''.join([newstr,i])#print("修改后文件中的内容:",'\n'+ newstr)
#
#withopen("yingwen_new.txt","w+") as fp:#fp.write(newstr)
####第七题
#importrequests
# # url ='https://img12.360buyimg.com/n1/s450x450_jfs/t1/142383/17/13825/73504/5fa8b1c8E6a34854c/91c7d2bb5ea7f9aa.jpg'#url= 'http://img10.360buyimg.com/n1/s450x450_jfs/t1/70270/22/10661/109832/5d834285E50d7c841/ad61e639c4fb32d5.jpg'#response= requests.get(url).content
#
#withopen(r'./文件操作-ad61e639c4fb32d5.jpg','wb') as f:#f.write(response)#f.close()
###########################实验十一########################
importpandas as pd
importnumpy as np
importcsv
data ={'animal':['cat','cat','snake','dog','dog','cat','snake','cat','dog','dog'],\
'age':[2.5,3,0.5, np.nan,5,2,4.5, np.nan,7,3],\
'visits':[1,3,2,3,2,3,1,1,2,1],\
'priority':['yes',np.nan,'no','yes','no','no','no','yes','no','no']}
labels =['a','b','c','d','e','f','g','h','i','j']
##1)创建date_frame类型df
df=pd.DataFrame(data,index=labels,columns=data)#print(df)
##2)输出 df 的前三行,并选择所有 visits 属性值大于 2 的所有行
print(df.iloc[0:3],"\n")print(df.loc[df['visits']>2])
##3)输出 df 缺失值所在的行,输出'age'与'animal'两列数据
print(df[df.index.isnull()],'\n')print(df[df.isnull().values ==True])print(df[['age','animal']])
##4)输出animal == cat且age<3的所有行,并将行为”f“列为"age"的值改为1.5
df1= df.loc[(df['animal']=='cat')&(df['age']<3)]
df.iloc[5,1]=1.5print(df1,'\n')
##5)列出animal所有取值的出现的次数
sum=df.iloc[:,0].value_counts()print(sum)
##6)将 animal 列中所有 snake 替换为 tangyudi
df['animal']= df['animal'].replace('snake','tangyudi')print(df['animal'])
#7)对 df 按列 animal 进行排序
print(df.sort_values(by ='animal'))print(df.sort_index(axis=1))
#8)在 df 的在后一列后添加一列列名为 No.数据 0,1,2,3,4,5,6,7,8,9
df['No.']=[0,1,2,3,4,5,6,7,8,9]print(df)
num = pd.Series([0,1,2,3,4,5,6,7,8,9], index=df.index)
df['No.']= num
print(df)
#9)对 df 中的'visits'列求平均值以及乘积、和
avg = df.visits.mean()
chenji = df.visits.sum()
sum =df.visits.prod()#cumprod()和cumsum()不一样,累计的话输出每一项运算的结果
print(avg)print(chenji)print(sum)
#10)将 animal 对应的列中所有字符串字母变为大写
df2= df.animal.str.capitalize()
df['animal']= df.animal.str.upper()print(df)
#11)利用浅复制方式创建 df 的副本 df2 并将其所有缺失值填充为 3
df2=df.copy() #复制
df2=df2.fillna(3)print(df2)
#12)利用浅复制方式创建 df 的副本 df3 并将其删除缺失值所在的行
df3=df.copy()
df3=df3.dropna() #删除含缺失值的列:(axis=1)print(df3)
#13)将 df 写入 animal.csv 文件
df.to_csv("animal.csv")#withopen("animal.csv","w+") as fp:#writer= csv.writer(fp)#writer.writerow(headers)#writer.writerows(rows)
##第二题
##1)列名为“Class”中取值分别将“negative”和“positive”替换为数字 0 和 1,并统计 0 和 1 各自出现的频数
dft = pd.read_csv('haberman-kmes.dat')
dft_class = dft[" Class"]
dft.loc[dft[' Class']==' negative',' Class']=0
dft.loc[dft[' Class']==' positive',' Class']=1print(dft)
#2)创建df的副本df2,其中df2为除了df最后一列之外的所有列;
df2 = pd.DataFrame(dft,columns=['Age',' Year',' Positive'])#df2.drop(labels=' Class', axis=1, inplace=True)#print(df2)
#3)将 df2 的每一列数据进行归一化处理,即 \frac{x-x_{min}}{x_{max}-x_{min}} 其中 x 为列中的任一数据,x_{min},x_{max} 分别为列中所有数据的最大值和最小值;
df2 = df2.apply(lambda x:(x - np.min(x))/(np.max(x)- np.min(x)))#print(df2)
#4)计算 df2 行(样本或观测值)与行(样本或观测值)之间的欧式距离,并组成新的欧式距离数组 df3
from numpy.linalg importnorm
x =norm(df2['Age']- df2[" Year"])#fori in range((df2['Age'].size+1)):#x.append(norm(df2.iloc[i]- df2.iloc[i+1]))print(x)
#5)将 df3 中所有的行中的数据从小到大的顺序进行排序
df2 = df2.sort_values(by ='Age')#DataFrame.sort_values(by=‘##’,axis=0,ascending=True, inplace=False)#print(df2)
######第三题
df = pd.read_csv('adult.dat')
# ##1) 删除该数据集中全部含有缺失值的行数据;
df.dropna(axis=0,how='any')
# ##2)删除该数据集中重复的行数据;
##df = df[~df.duplicated()]
df = df.drop_duplicates()
# # print(df)
# # print(df.isnull().sum())
# ##3)按照 class 字段将该数据进行分组,
# ##并计算各组中列分别 age, Education-num,Capital-gain, Capital-loss 和 Hours-per-week 的均值和方差,
# ##并计算其余各列中不重复元素的个数以及所占的比例
x =list(df.columns)print(x)
y =['Age',' Education-num',' Capital-gain',' Capital-loss',' Hours-p-week']
df1 = df.groupby(' Class')for i in y:print("{}的平均值{}".format(i,df1[i].mean()))for i in y:print("{}的方差{}".format(i,df1[i].var()))for i in x:if i not in y:print("{0:}不同元素有{1:}个,所占比例:{2:.2f}%".format(i,df1[i].nunique().sum(),df1[i].nunique().sum()*100/df[i].size))
##4)将列 Age 字段取值划分为青年人(0-18)、中年人(19-45)、老年人 (45-100),
##并故根据该属性将该数据进行分组,然后计算各组中列分别 Education-num,Capital-gain, Capital-loss 和 Hours-per-week 的均值和方差,
##并计算其余各列中不重复元素的个数以及所占的比例。
print(df.columns)
a =[' Education-num',' Capital-gain',' Capital-loss',' Hours-p-week']
bins =[0,18,45,100]
labels =['青年人','中年人','老年人']
cut_groups = pd.cut(df['Age'], bins=bins, right=False, labels=labels)
Age_groups =df.groupby(cut_groups)
Age_groups1 = df.groupby(cut_groups).count()for i in a:print("{0:} 的平均值{1:2f}".format(i,Age_groups1[i].mean()))for i in a:print("{0:} 的方差{1:2f}".format(i,Age_groups1[i].var()))for i in x:if i not in a:print("{0:}不同元素有{1:}个,所占比例:{2:.2f}%".format(i,Age_groups[i].nunique().sum(),Age_groups[i].nunique().sum()*100/Age_groups1[i].sum()))
##############################实验十二##############
#importmatplotlib.pyplot as plt#importnumpy as np
#
#plt.rcParams['font.sans-serif']=['SimHei']#中文乱码问题解决
######第一题,用点加线的方式画出 x=(0,10) 间 sin 的图像
#x= np.linspace(0,10)#y= np.sin(x)#plt.plot(x, y,'o-') # 用点加线的方式画出x=(0,10)间sin的图像#plt.show()
#######第二题, 利用以下数据分别制作水平和垂直柱状图
#x=[1,2,3,4,5,6,7,8]#y=[3,1,4,5,8,9,7,2]#label=['A','B','C','D','E','F','G','H']#plt.barh(x,y,tick_label = label) #水平#plt.show()#plt.bar(x,y,tick_label = label) #垂直#plt.show()
#######第二题,自定义图表元素
##1) 绘制 x=(0,10)间 sin 的图像,设置线性为虚线
#x= np.linspace(0,10)#y= np.sin(x)#plt.plot(x,y,'--')
# ##2) 设置 y 轴显示范围为(-1.5,1.5)#plt.ylim(-1.5,1.5)
# ##3)设置 x,y 轴标签分别为“variable x”,“value y”
#plt.xlabel('variable x')#plt.ylabel('value y')
# ##4)设置图表标题“三角函数”
#plt.title('三角函数')
# ##5)显示网络
#plt.grid()
# ##6) 绘制平行于 x轴 y=0.8的水平参考线(提示:使用.axhline)
#plt.axhline(y =0.8,ls='--',c='red')
# ##7)添加注释文字 sin(x)#plt.text(1,1,'y=sin(x)',weight='bold', color='black')#plt.show()
########第四题, 多子图
##在一个 10×10 的画布中,制作 2 个子图,分别显示 sin(x) 和 cos(x)的图像,设置相同行和列共享 x,y 轴
#x= np.linspace(0,20,2000)#y1= np.sin(x)#y2= np.cos(x)
#
#figure,(ax1,ax2)= plt.subplots(2,1,figsize=(10,10),dpi=100,sharex=True,sharey=True)#ax1.plot(x,y1,c='blue',linestyle='-')#ax2.plot(x,y2,c='orange',linestyle='-')#figure.subplots_adjust(hspace=0.1)
##方法二
#fig= plt.figure(figsize=(10,10))#axe1= fig.add_axes([0.5,0.5,0.4,0.2], ylim=(-1.1,1.1))#axe2= fig.add_axes([0.05,0.5,0.4,0.2], ylim=(-1.1,1.1))#x= np.linspace(0,20)#axe1.plot(np.sin(x))#axe2.plot(np.cos(x))
##########第五题#######
#text='''Hooray! It's snowing! It's time to make a snowman.\#James runs out. He makes a big pile of snow. He puts a big snowball on top.\#He adds a scarf and a hat. He adds an orange for the nose.\#He adds coal for the eyes and buttons. In the evening, James opens the door.\#What does he see ? The snowman is moving! James invites him in.\#The snowman has never been inside a house. He says hello to the cat.\#He plays with paper towels. A moment later, the snowman takes James's hand and goes out.\#They go up, up, up into the air! They are flying! What a wonderful night!\#The next morning, James jumps out of bed. He runs to the door.\#He wants to thank the snowman. But he's gone.'''
#
#text= text.replace(',','').replace('.','').replace('!','').replace('?','')#text= text.split() # 默认会以空格,回车符,空格符等作为分割条件。#date={}#words=[]#counts=[]#setword=set(text)#fori in setword:#count= text.count(i)#words.append(i)#counts.append(count)
#
#date=dict(zip(words, counts))
#
#date=sorted(date.items(), key = lambda x:x[1], reverse=1)
#
#words5=[]#count5=[]#fori in range(5):#words5.append(date[i][0])#count5.append(date[i][1])
#
#plt.pie(np.array(count5), labels=words5, autopct='%1.1f%%')#plt.title('WordsCount')#plt.show()