import json
import requests
import csv
from multiprocessing.dummy import Pool as ThreadPool
import time
import random
from lxml import etree
def spider(page):
url = 'http://waimai.meituan.com/search/wx4g19983su8/rt?keyword='+str(page)
headers ={
'User-Agent':'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36',
'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Encoding':'gzip, deflate, sdch',
'Accept-Language':'zh-CN,zh;q=0.8',
'Cache-Control':'max-age=0',
'Connection':'keep-alive',
'Cookie':'w_uuid=Lk80hpGbK2WdGpo3knW8qZw2M6dDGUODsDrcGIPj8TdyhDChRFTV-fjxkGWwEQ9w; _lxsdk=15d9af78c28c8-0ee719fb4f1d34-57e1b3c-144000-15d9af78c29c8; Hm_lvt_f66b37722f586a240d4621318a5a6ebe=1501555925,1501555942,1501629715; __utma=211559370.58204263.1500387690.1501555925.1501629715.2; __utmz=211559370.1501629715.2.2.utmcsr=baidu|utmccn=baidu|utmcmd=organic|utmcct=zt_search; __mta=45569435.1501629712300.1501629720518.1501629728116.3; uuid=9dd0ef628497b76a1925.1501543494.0.0.0; _lxsdk_cuid=15e4aa6effc9e-0b23fb07f9c1bf-57e1b3c-144000-15e4aa6effdc8; _ga=GA1.2.58204263.1500387690; _gid=GA1.2.1025663946.1504513189; w_cid=110101; w_cpy_cn="%E4%B8%9C%E5%9F%8E%E5%8C%BA"; w_cpy=dongchengqu; waddrname="%E6%9C%AA%E7%9F%A5"; w_geoid=wx4g19983su8; w_ah="39.91065189242363,116.43523581326008,%E6%9C%AA%E7%9F%A5|39.8989349976182,116.50381989777088,%E7%99%BE%E5%AD%90%E6%B9%BE|39.96550491079688,116.30504373461008,%E6%B5%B7%E6%B7%80%E5%8C%BA|40.004531890153885,116.47530399262905,%E6%9C%9B%E4%BA%AC%2C%E6%9D%A5%E5%B9%BF%E8%90%A5%2C%E8%8A%B1%E5%AE%B6%E5%9C%B0"; JSESSIONID=s1gvzkp9v2th1ed70m8qr5yyz; _ga=GA1.3.58204263.1500387690; _gid=GA1.3.1025663946.1504513189; _gat=1; w_utmz="utm_campaign=(direct)&utm_source=(direct)&utm_medium=(none)&utm_content=(none)&utm_term=(none)"; w_visitid=039dc5a0-4720-4cef-afc1-e43eeec3671a; __mta=45569435.1501629712300.1501629728116.1504518924133.4',
'Host':'waimai.meituan.com',
'Upgrade-Insecure-Requests':'1'
}
param ={
'keyword':str(page)
}
timeout = random.choice(range(10, 20))
html = requests.get(url,headers = headers,timeout = timeout,params = param)
selector = etree.HTML(html.text)
content_field = selector.xpath('//*[@class="result-content"]/ul/li')
item = {}
for each in content_field:
name = each.xpath('a/div[1]/p[1]/text()')
time = each.xpath('a/div[1]/p[4]/text()')
order = each.xpath('a/div[1]/p[3]/span[2]/text()')
item['name'] = "".join(name)
item['time'] = "".join(time)
item['order'] = "".join(order)
fieldnames = ['name', 'time', 'order']
with open('mtjd3.csv', 'a', newline='', errors='ignore')as f:
f_csv = csv.DictWriter(f, fieldnames=fieldnames)
f_csv.writeheader()
f_csv.writerow(item)
if __name__ == '__main__':
f = open('mtjd3.csv', 'w')
list = ["大虾来了","夹克的虾","簋街仔仔","辣私房","烧虾师"]
page = []
for i in list:
page.append(i)
print(page)
pool = ThreadPool(4)
results = pool.map(spider, page)
pool.close()
pool.join()
f.close()
运行输出文件为空,不知道是迭代设置错误还是函数错误?求帮助