以下面的需求为例,获取该条件的平均价,欢迎留言
import requests
import re
session = requests.session()
url = "http://passport.f139.com/doLogin.do"
# 第一次请求,获取token值
get_token = session.get(url=url, allow_redirects=False)
# print(get_token.text)
get_token = re.findall('name="token" value="(.*?)"', get_token.text)
# print(get_token)
data = {
"url": "http%3A%2F%2Fdata.f139.com%2Ftrend.do%3Fpid%3D3052%26type%3D6%26parentId%3D",
"token": get_token,
"userName": "xxxxx",
"passWord": "xxxxx"
}
headers = {
'User-Agent': 'Mozilla/5.0(WindowsNT10.0;Win64;x64)AppleWebKit/537.36(KHTML,likeGecko)Chrome/93.0.4577.82Safari/537.36'
}
# 登录
res = session.post(url=url, data=data, headers=headers, allow_redirects=False)
if session.cookies.get('_qquc'):
print("富宝资讯:登录成功")
print("开始查询数据。。。")
import time
time.sleep(3)
else:
print("富宝资讯:登录失败,请重新登录")
exit()
# print(session.cookies)
# print(res.headers)
# 查询时段,手动更改
start_time = '2021-08-16'
end_time = '2021-08-23'
url = f'http://data.f139.com/related.do?pid=3052&start={start_time}&end={end_time}&type=6'
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36",
"Cookie":
f'_qquc={session.cookies.get("_qquc")};'
f'JSESSIONID={session.cookies.get("JSESSIONID")};'
}
get_info = session.get(url=url, headers=headers, allow_redirects=False)
get_info.encoding = "utf-8"
# print(get_info.text)
from lxml import etree
# 获取均价
wb_data = get_info.text
html = etree.HTML(wb_data)
result = etree.tostring(html)
html_data = html.xpath('//table[@id="tab_1"]//td[@class="textpaddingleft"][4]/text()')
for i in range(len(html_data)):
html_data[i] = str(html_data[i]).replace("\r", "")
html_data[i] = str(html_data[i]).replace("\n", "")
html_data[i] = str(html_data[i]).replace("\t", "")
html_data[i] = str(html_data[i]).replace(" ", "")
# 获取时间
html_time = html.xpath('//table[@id="tab_1"]//td[@class="textpaddingleft"][6]/text()')
for i in range(len(html_time) - 1):
html_time[i] = str(html_time[i]).replace("\r", "")
html_time[i] = str(html_time[i]).replace("\n", "")
html_time[i] = str(html_time[i]).replace("\t", "")
html_time[i] = str(html_time[i]).replace(" ", "")
html_time.remove("")
html_time.remove("")
for j in range(len(html_time)):
print(f"{html_time[j]}的平均单价为:{html_data[j]}")
# 时段总结
html_data1 = html.xpath('(//td[@align="center"]/text())[4]')
html_data1[0] = str(html_data1[0]).replace("\r", "")
html_data1[0] = str(html_data1[0]).replace("\n", "")
html_data1[0] = str(html_data1[0]).replace("\t", "")
html_data1[0] = str(html_data1[0]).replace(" ", "")
print(f"时段总结{html_data1[0]}")