作业1
结合Matplotlib绘制各自的评论数量的图形,体现其评论数主要分布在哪个区间。
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.pyplot import MultipleLocator as mul #重新设置坐标轴刻度
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
#文字设置
def load_csv_to_array(filename:str):
np_arr=np.loadtxt(filename,delimiter=',',skiprows=0,dtype=float)
# print(np_arr)
# print(type(np_arr))
# print(np_arr.dtype)
return np_arr
#加载csv文件
def comment_hist(ax_set,np_arr,graph_name,interval:int,color:str):
np_data=np_arr[:,3]
#获取评论列
difference_com=np.max(np_data)-np.min(np_data) #极差
bins=round(difference_com/interval) #组数
print(bins)
ax_set.set_xlabel('评论数')
ax_set.set_ylabel('频数')
ax_set.set_title(graph_name)
x_major_locator = mul(interval) #手动设置x轴的刻度间隔
ax=plt.gca()
ax.xaxis.set_major_locator (x_major_locator) #设置x轴的主刻设置为x_major_locator的倍数
plt.xticks(rotation=60)
ax_set.hist(np_data,bins=bins,color=color, alpha=0.8, rwidth=0.8)
if __name__ == '__main__':
fig,axes=plt.subplots(2,1,sharex=True,sharey=True,figsize=(14,8))
ax1=fig.add_subplot(axes[0])
ax2=fig.add_subplot(axes[1])
np_arr = load_csv_to_array('GB_video_data_numbers.csv')
comment_hist(ax1,np_arr,'GB_video_data_numbers',10000,'b')
np_arr2 = load_csv_to_array('US_video_data_numbers.csv')
comment_hist(ax2, np_arr2, 'US_video_data_numbers', 10000, 'b')
plt.show()
作业2
绘制图形,分析英国的Youtube中视频的评论数与喜欢数的关系。
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.pyplot import MultipleLocator as mul
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
def load_csv_to_array(filename:str):
np_arr=np.loadtxt(filename,delimiter=',',skiprows=0,dtype=float)
return np_arr
def links_comments_to_likes(np_arr):
plt.figure(figsize=(14,8))
comment_list=np_arr[:,3]
likes_list=np_arr[:,1]
x_major_locator = mul(20000)
ax=plt.gca()
ax.xaxis.set_major_locator (x_major_locator)
ax.set_xlabel('评论数')
ax.set_ylabel('喜欢数')
ax.set_xlim ( np.min(comment_list), np.max(comment_list) )
plt.xticks(rotation=60)
plt.scatter(comment_list,likes_list, marker = 'x',color = 'red',s=20)
plt.show()
np_arr = load_csv_to_array('GB_video_data_numbers.csv')
links_comments_to_likes(np_arr)
作业3
望将两个国家的数据拼接一起来研究分析。
• 拼接全为0的数组标识为英国
• 拼接全为1的数组标识为美国
• 将两个国家的数据拼接
import numpy as np
def load_csv_to_array(filename:str):
np_arr=np.loadtxt(filename,delimiter=',',skiprows=0,dtype=float)
# print(np_arr)
# print(type(np_arr))
# print(np_arr.dtype)
return np_arr
def hstack_col(np_arr,data_col):
if data_col == 0:
col_0 = np.zeros ( (np_arr.shape[0], 1), dtype=int )
elif data_col == 1:
col_0 = np.ones ( (np_arr.shape[0], 1), dtype=int )
else:
print('data_col 的值必须为0或1')
return np.array([])
new_np_arr = np.hstack((col_0,np_arr))
return new_np_arr
if __name__ == '__main__':
np_eng_arr = load_csv_to_array ( 'GB_video_data_numbers.csv' )
if np_eng_arr.size != 0:
new_np_eng_arr = hstack_col ( np_eng_arr, 0)
np_us_arr = load_csv_to_array ( 'US_video_data_numbers.csv' )
if np_us_arr.size != 0 :
new_np_us_arr = hstack_col ( np_us_arr, 1 )
if new_np_en_arr.size != 0 and new_np_us_arr.size != 0:
np_eng_and_us_arr = np.vstack ( (new_np_eng_arr, new_np_us_arr) )
print(np_eng_and_us_arr)