数据爬取:
def get_url(name):
url = 'https://zhujia.zhuwang.cc/'
header = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.55 Safari/537.36 Edg/96.0.1054.43'
}
res = requests.get(url=url, headers=header)
ex_2 = '<a href="(.*?)" target="_blank" title=".*?">(.*?)</a>'
local_data = re.findall(ex_2, res.text, re.M)
dict = {
}
for i, j in local_data:
dict[j] = i
# print(dict.items())
return dict[name], dict
def get_data():
# city = input('请问想要爬取什么城市?\n')
citys = ['四川省', '内蒙古', '上海市', '北京市', '广西', '天津市', '黑龙江省']
for city in citys:
url, data = get_url(city)
if city == "https://zhujia.zhuwang.cc/":
city = '全国'
header = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.55 Safari/537.36 Edg/96.0.1054.43'
}
res = requests.get(url=url, headers=header).text
ex = '<ul class="zhujia-hd clear">(.*?)</ul>'
res = re.findall(ex, str(res), re.S)
ex = '<span>(.*?)</span>'
ex_data = '<b class=".*?">(.*?)</b>'
name = re.findall(ex, str(res), re.S)
data = re.findall(ex_data, str(res), re.S)
with open(city + '.txt', 'w', encoding='utf-8') as fp:
for i, j in zip(name, data):
fp.write(i + ':' + j + '\n')
数据储存:
with open(city+'.txt', 'w', encoding='utf-8') as fp: for i,j in zip(name,data): fp.write(i+':'+j+'\n') get_data()
数据分析:
def paint_ay(name): # 猪肉外三元每日价格折线图
x = []
y = []
funts = ""
for gets_name in name:
x.append(gets_name)
with open(gets_name + '.txt', 'r', encoding='utf-8') as txt:
for line in txt.read().split("\n"):
lines = line.split(":")
funts = lines[0]
y.append(float(lines[1]))
break
plt.figure(figsize=(10, 5))
plt.title(funts) # 折线图标题
plt.rcParams['font.sans-serif'] = ['SimHei'] # 显示汉字
plt.ylabel('价格 (元)') # y轴标题
plt.plot(x, y, marker='o', markersize=5) # 绘制折线图,添加数据点,设置点的大小
for a, b in zip(x, y):
plt.text(a, b, b, ha='center', va='bottom', fontsize=10) # 设置数据标签位置及大小
plt.legend(['方案']) # 设置折线名称
plt.savefig(funts + '.jpg')
plt.show()
def paint_nei(name):
x = []
y = []
funts = ""
for gets_name in name:
x.append(gets_name)
flag = 0
with open(gets_name + '.txt', 'r', encoding='utf-8') as txt:
for line in txt.read().split("\n"):
if flag == 0:
pass
else:
lines = line.split(":")
funts = lines[0]
y.append(float(lines[1]))
break
flag += 1
plt.figure(figsize=(10, 5))
plt.title(funts) # 折线图标题
plt.rcParams['font.sans-serif'] = ['SimHei'] # 显示汉字
plt.ylabel('价格 (元)') # y轴标题
plt.plot(x, y, marker='o', markersize=5) # 绘制折线图,添加数据点,设置点的大小
for a, b in zip(x, y):
plt.text(a, b, b, ha='center', va='bottom', fontsize=10) # 设置数据标签位置及大小
plt.legend(['方案']) # 设置折线名称
plt.savefig(funts + '.jpg')
plt.show()
def paint_pin(name):
x = []
y = []
funts = ""
for gets_name in name:
x.append(gets_name)
flag = 0
with open(gets_name + '.txt', 'r', encoding='utf-8') as txt:
for line in txt.read().split("\n"):
if flag <= 1:
pass
else:
lines = line.split(":")
funts = lines[0]
y.append(float(lines[1]))
break
flag += 1
plt.figure(figsize=(10, 5))
plt.title(funts) # 折线图标题
plt.rcParams['font.sans-serif'] = ['SimHei'] # 显示汉字
plt.ylabel('价格 (元)') # y轴标题
plt.plot(x, y, marker='o', markersize=5) # 绘制折线图,添加数据点,设置点的大小
for a, b in zip(x, y):
plt.text(a, b, b, ha='center', va='bottom', fontsize=10) # 设置数据标签位置及大小
plt.legend(['方案']) # 设置折线名称
plt.savefig(funts + '.jpg')
plt.show()
数据展示: