声明:代码仅作学习交流用途,代码分享者与创作者不承担任何由他人恶意运行而导致的责任,勿擅自修改限制频率的参数,勿恶意攻击网页,请学习浏览者遵守社会公德与法律秩序,爬虫导致的网页崩溃等损失由计算机操作者负全部责任,造成严重后果的需要承担刑事责任
爬虫代写:邮箱 leon_leon@yeah.net
import requests
from lxml import etree
from time import sleep
from fake_useragent import UserAgent
import pandas as pd
name_all = []
min_price_all = []
mean_price_all = []
max_price_all = []
guige_all = []
unit_all = []
data_all = []
for i in range(13902):
print(i)
UA = UserAgent().edge
url_base='http://www.xinfadi.com.cn/marketanalysis/0/list/{}.shtml'
url = 'http://www.xinfadi.com.cn/marketanalysis/0/list/{}.shtml'.format(i+1)
headers={
'User-Agent':UA
}
response=requests.get(url,headers=headers)
sleep(3)
# print(response.text)
# print(response.encoding)
e = etree.HTML(response.text)
name = e.xpath('''//table[@class='hq_table']/tr[position()>1]/td[1]/text()''')
min_price = e.xpath('''//table[@class='hq_table']/tr[position()>1]/td[2]/text()''')
mean_price = e.xpath('''//table[@class='hq_table']/tr[position()>1]/td[3]/text()''')
max_price = e.xpath('''//table[@class='hq_table']/tr[position()>1]/td[4]/text()''')
guige = e.xpath('''//table[@class='hq_table']/tr[position()>1]/td[5]/text()''')
unit = e.xpath('''//table[@class='hq_table']/tr[position()>1]/td[6]/text()''')
data = e.xpath('''//table[@class='hq_table']/tr[position()>1]/td[7]/text()''')
name_all = name_all+name
min_price_all = min_price_all+min_price
mean_price_all = mean_price_all+mean_price
max_price_all=max_price_all+max_price
guige_all = guige_all+guige
unit_all = unit_all+unit
data_all=data_all+data
if i % 300 == 0:
all_info = {
'名称': name_all,
'最低价格': min_price_all,
'平均价格': mean_price_all,
'最高价格': max_price_all,
'规格': guige_all,
'单位':unit_all,
'日期':data_all
}
outdata = pd.DataFrame(all_info)
outdata.to_csv('C:\\Users\\Admin\\PycharmProjects\\untitled\\新发地市场价格.csv', encoding='GBK')