#encoding=utf-8
import urllib
import re
import threading
import requests
import time
def get_ranK(b):
url='https://www.alibaba.com/products/'+b+'.html'
url01='https://www.alibaba.com/products/'+b+'/2.html'
url02='https://www.alibaba.com/products/'+b+'/3.html'
html=requests.get(url=url).content
html01=requests.get(url=url01).content
html02=requests.get(url=url02).content
company=re.compile(r'
if "https://gzxibolai.en.alibaba.com" in html:
page=re.findall(company,html)
for x in page:
print b,u'约第1页%s位\n'%x
op_paiming_txt.writelines("%s,约第1页%s位\n"%(b,x))
time.sleep(2)
return
elif "https://gzxibolai.en.alibaba.com" in html01:
page01=re.findall(company,html01)
for m in page01:
print b,u'约第2页%s位\n'%m
op_paiming_txt.writelines("%s,约第2页%s位\n"%(b,m))
time.sleep(2)
return
elif "https://gzxibolai.en.alibaba.com" in html02:
page02=re.findall(company,html02)
for o in page02:
print b,u'约第3页%s位\n'%o
op_paiming_txt.writelines("%s,约第3页%s位\n"%(b,o))
time.sleep(2)
return
else:
print b,u'前3页无排名\n'
op_paiming_txt.writelines("%s,前3页无排名\n"%b)
time.sleep(2)
return
# if __name__ == '__main__':
# with open('kfc.txt','r') as c:
# for i in c.readlines():
# f=i.strip()
# get_ranK(f)
if __name__ == '__main__':
with open('kfc.txt','r') as c:
for i in c.readlines():
f=i.strip()
threading.Lock()
p=threading.Thread(target=get_ranK,args=(f,))
p.start()
op_paiming_txt=open('paiming.csv','a')