【本博客仅供学习参考】python获取富宝资讯数据

网络山顶洞人

已于 2022-11-07 09:58:48 修改

阅读量171

点赞数

文章标签： python 爬虫 html

于 2021-09-23 10:09:46 首次发布

本文链接：https://blog.csdn.net/copa_ax99/article/details/120428996

版权

以下面的需求为例，获取该条件的平均价，欢迎留言
在这里插入图片描述

在这里插入图片描述

import requests
import re

session = requests.session()
url = "http://passport.f139.com/doLogin.do"
# 第一次请求，获取token值
get_token = session.get(url=url, allow_redirects=False)
# print(get_token.text)
get_token = re.findall('name="token" value="(.*?)"', get_token.text)
# print(get_token)
data = {
    "url": "http%3A%2F%2Fdata.f139.com%2Ftrend.do%3Fpid%3D3052%26type%3D6%26parentId%3D",
    "token": get_token,
    "userName": "xxxxx",
    "passWord": "xxxxx"

}
headers = {
    'User-Agent': 'Mozilla/5.0(WindowsNT10.0;Win64;x64)AppleWebKit/537.36(KHTML,likeGecko)Chrome/93.0.4577.82Safari/537.36'
}
# 登录
res = session.post(url=url, data=data, headers=headers, allow_redirects=False)
if session.cookies.get('_qquc'):
    print("富宝资讯：登录成功")
    print("开始查询数据。。。")
    import time
    time.sleep(3)
else:
    print("富宝资讯：登录失败，请重新登录")
    exit()
# print(session.cookies)
# print(res.headers)
# 查询时段，手动更改
start_time = '2021-08-16'
end_time = '2021-08-23'
url = f'http://data.f139.com/related.do?pid=3052&start={start_time}&end={end_time}&type=6'
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36",
    "Cookie":
        f'_qquc={session.cookies.get("_qquc")};'
        f'JSESSIONID={session.cookies.get("JSESSIONID")};'
}
get_info = session.get(url=url, headers=headers, allow_redirects=False)
get_info.encoding = "utf-8"
# print(get_info.text)
from lxml import etree

# 获取均价
wb_data = get_info.text
html = etree.HTML(wb_data)
result = etree.tostring(html)
html_data = html.xpath('//table[@id="tab_1"]//td[@class="textpaddingleft"][4]/text()')
for i in range(len(html_data)):
    html_data[i] = str(html_data[i]).replace("\r", "")
    html_data[i] = str(html_data[i]).replace("\n", "")
    html_data[i] = str(html_data[i]).replace("\t", "")
    html_data[i] = str(html_data[i]).replace(" ", "")
# 获取时间
html_time = html.xpath('//table[@id="tab_1"]//td[@class="textpaddingleft"][6]/text()')
for i in range(len(html_time) - 1):
    html_time[i] = str(html_time[i]).replace("\r", "")
    html_time[i] = str(html_time[i]).replace("\n", "")
    html_time[i] = str(html_time[i]).replace("\t", "")
    html_time[i] = str(html_time[i]).replace(" ", "")
html_time.remove("")
html_time.remove("")

for j in range(len(html_time)):
    print(f"{html_time[j]}的平均单价为：{html_data[j]}")
# 时段总结
html_data1 = html.xpath('(//td[@align="center"]/text())[4]')
html_data1[0] = str(html_data1[0]).replace("\r", "")
html_data1[0] = str(html_data1[0]).replace("\n", "")
html_data1[0] = str(html_data1[0]).replace("\t", "")
html_data1[0] = str(html_data1[0]).replace(" ", "")
print(f"时段总结{html_data1[0]}")

网络山顶洞人

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
【本博客仅供学习参考】python获取富宝资讯数据

以下面的需求为例，获取该条件的平均价import requestsimport resession = requests.session()url = "http://passport.f139.com/doLogin.do"# 第一次请求，获取token值get_token = session.get(url=url, allow_redirects=False)# print(get_token.text)get_token = re.findall('name="token" v
复制链接

扫一扫