from urllib import request
from bs4 import BeautifulSoup
import xlwt
import time
links = []
webs = []
ydm = []
yddh = []
yddz = []
def website():
ws = input('输入网址:')
links.append(ws)
return ws
def liulan():
url = website()
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) \
AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 \
Safari/537.36 Edge/18.17763'}
rep = request.Request(url,headers=headers)
rsp = request.urlopen(rep)
result = rsp.read()
soup = BeautifulSoup(result, 'html.parser')
tags = soup.find(attrs={'class':'page'})
for i in tags:
if i.name == 'a':
links.append(i['href'])
del links[1]
def getwebs():
for i in links:
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) \
AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 \
Safari/537.36 Edge/18.17763'}
rep = request.Request(i,headers=headers)
rsp = request.urlopen(rep)
result = rsp.read()
soup = BeautifulSoup(result, 'html.parser')
ul = soup.ul
for i in ul.contents:
if i.name == 'li':
web = i.strong.a.attrs['href']
webs.append(web)
def info():
for i in webs:
try:
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) \
AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 \
Safari/537.36 Edge/18.17763'}
rep = request.Request(i,headers=headers)
rsp = request.urlopen(rep)
result = rsp.read()
soup = BeautifulSoup(result, 'html.parser')
tags = soup.find(attrs={'class':'company'})
name = str(tags.h1.string)
print(name)
ydm.append(name)
phone = str(tags.p.text)
phone = phone.split()
phone = phone[1]
print(phone)
yddh.append(phone)
cleartext = soup.ul.contents
zhenghe1 = []
zhenghe2 = []
for x in cleartext[0:5]:
if x.name == 'li':
k = str(x.text)
zhenghe1.append(k)
for s in zhenghe1:
ss = s[5:]
zhenghe2.append(ss)
sss = zhenghe2[0] + zhenghe2[1]
yddz.append(sss)
print(sss)
# time.sleep(0.9)
except:
print('HTTP Error 404: Not Found')
def excel():
a = 0
b = 0
c = 0
d = 1
e = 0
f = 2
sheetname = links[0]
sheetname = sheetname[26:-5]
ex = xlwt.Workbook()
ex = xlwt.Workbook()
sheet = ex.add_sheet(f'{sheetname}')
for i in ydm:
sheet.write(a, b, i)
a += 1
for ii in yddh:
sheet.write(c, d, ii)
c += 1
for iii in yddz:
sheet.write(e, f, iii)
e += 1
ex.save(f'{sheetname}.xls')
liulan()
getwebs()
info()
excel()