import requests
import pandas as pd
from bs4 import BeautifulSoup
links = ['https://91.ndd']
views = []
names = []
webs = []
pages = []
def get_links():
for i in range(2,130):
url1 = 'https://91.ndd/page/'
url2 = str(i)
url3 = '/'
fullurl = url1 + url2 + url3
links.append(fullurl)
def get_info():
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) \
AppleWebKit/537.36 (KHTML, like Gecko) \
Chrome/71.0.3578.80 Safari/537.36'}
page = 0
for i in links:
r = requests.get(i,headers=headers,verify=False,timeout=500)
r = r.text
soup = BeautifulSoup(r,'html.parser')
n = soup.main
page += 1
for ii in n.contents:
if ii.name == 'article':
iii = ii.find(attrs={'class':'views'})
view = iii.text
view = view.split()
view = view[0]
view = view.replace(',','')
views.append(view)
print(view)
web = ii.h2.a['href']
webs.append(web)
name = ii.h2.text
names.append(name)
pages.append(page)
def get_excel():
data = {'影片名称':names,'网站链接':webs,'总浏览人数':views,'页码':pages}
df = pd.DataFrame(data)
df.to_excel('wasai.xlsx',index=False)
print('done')
get_links()
get_info()
get_excel()
用pyinstaller打包 不知道为啥打包不了
pandas很大 不知道什么问题........