python获取企库信息
from datetime import time
import requests
from bs4 import BeautifulSoup
import re
from selenium import webdriver
city=''
header={
'accept':'*/*',
'accept-encoding':'gzip, deflate, br',
'accept-language':'zh-CN,zh;q=0.9',
'origin':'http://www.qeecoo.com',
'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36'
}
def findone(num,city):
cityurl='http://www.qeecoo.com/'+city+'企业黄页-'+city+'企业名录'
citynumlist = []
for one in range(num):
if(one+1==1):
findproduct(cityurl)
citynumlist.append(cityurl)
else:
findproduct(cityurl+'_'+str(one+1))
citynumlist.append(cityurl+'_'+str(one+1))
return citynumlist
def findallpage(city):
url='http://www.qeecoo.com/'+city+'企业黄页-'+city+'企业名录'
resp = requests.get(url,headers=header)
resp.encoding = 'utf-8'
soup = BeautifulSoup(resp.content,'html.parser')
title = soup.select('.last')
soup1 = BeautifulSoup(str(title[0]), "html.parser")
sum = 0
for i in soup1.find_all('a'):
sum = i['href'].split("_",1)[1]
return int(sum)
def productone(urlp):
print(urlp)
def findproduct(citynumone):
productone = requests.get(citynumone,headers=header)
productone.encoding = 'utf-8'
soup = BeautifulSoup(productone.content,'html.parser')
title = soup.select('.list')
soup1 = BeautifulSoup(str(title), "html.parser")
pp = 0
for i in soup1.find_all('a'):
pp+=1
urlp = ''
if(pp%2==0):
continue
else:
urlp='http://www.qeecoo.com/'+i['href']
productone(urlp)
if __name__ == '__main__':
city=input('请输入:'+city)
findone(findallpage(city),city)