python获取企库信息

python获取企库信息

from datetime import time
import requests
from bs4 import BeautifulSoup
import re
from selenium import webdriver


city=''
# 请求头
header={
    'accept':'*/*',
    'accept-encoding':'gzip, deflate, br',
    'accept-language':'zh-CN,zh;q=0.9',
    'origin':'http://www.qeecoo.com',
    'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36'
}

# 查询地区公司单页
def findone(num,city):
    cityurl='http://www.qeecoo.com/'+city+'企业黄页-'+city+'企业名录'
    citynumlist = []
    for one in range(num):
        if(one+1==1):
            findproduct(cityurl)
            citynumlist.append(cityurl)
        else:
            findproduct(cityurl+'_'+str(one+1))
            citynumlist.append(cityurl+'_'+str(one+1))
    return citynumlist

# 查询地区公司总页数
def findallpage(city):
    url='http://www.qeecoo.com/'+city+'企业黄页-'+city+'企业名录'
    resp = requests.get(url,headers=header)
    resp.encoding = 'utf-8'
    soup = BeautifulSoup(resp.content,'html.parser')
    title = soup.select('.last')
    soup1 = BeautifulSoup(str(title[0]), "html.parser")
    sum = 0
    for i in soup1.find_all('a'):
        sum = i['href'].split("_",1)[1]
    return int(sum)

# 查询单个公司
def productone(urlp):
	# 这里我没写了,发现网页数据不是我想要的数据。所以不爬了
    print(urlp)

# 查询一页公司路径
def findproduct(citynumone):
    productone = requests.get(citynumone,headers=header)
    productone.encoding = 'utf-8'
    soup = BeautifulSoup(productone.content,'html.parser')
    title = soup.select('.list')
    soup1 = BeautifulSoup(str(title), "html.parser")
    pp = 0
    # 根据a连接查找公司名称,每第二个a连接是地址连接所以我这里过滤了一下
    for i in soup1.find_all('a'):
        pp+=1
        urlp = ''
        if(pp%2==0):
            continue
        else:
            urlp='http://www.qeecoo.com/'+i['href']
            productone(urlp)

# 主方法
if __name__ == '__main__':
	# 输入要查询的地区
    city=input('请输入:'+city)
    # 总页数+地区->查询一页公司名单
    findone(findallpage(city),city)
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值