当你在众多大佬哪里解决不了问题的时候来这里看看吧
‘NoneType’ object has no attribute ‘find’
这是我的代码
是爬取的抖音网红信息
爬着爬着就出现了NoneType’ object has no attribute ‘find’
# -*- coding: utf-8 -*-
"""
Created on Tue May 26 09:15:33 2020
@author: MR li
"""
""""请求网页"""
import re
import os
import requests
from bs4 import BeautifulSoup
import bs4
import time
class DOUDOU():
def getHTMLtext(self,url):
try:
#kv={'user-agent':'Mozilla/5.0'}
response = requests.get(url,timeout=5)
response.raise_for_status()
return response.text
except:
return ""
def fillUnitvList(self,ulist,html):
soup=BeautifulSoup(html,"html.parser")
i=1
for child in soup.tbody.findAll(re.compile('a')):
url='https://kolranking.com'+child.get('href')
htmluser=self.getHTMLtext(url)
soupuser=BeautifulSoup(htmluser,"html.parser")
time.sleep(1)
douuser=soupuser.body.find('div','col-md-3 col-sm-12 align-center')
if douuser is not None:
userimage=douuser.find(re.compile('img')).get('src')
username=douuser.find(re.compile('p')).string
douuser2=soupuser.body.findAll('div',attrs={'class':'col-md-6 col-sm-12'})
child1 =douuser2[0].findAll(re.compile('p'))
child2 =douuser2[1].findAll(re.compile('p'))
usersex=child1[0].string
usercity=child1[1].string
userbir=child1[2].string
userxing=child1[3].string
userwei=child1[4].string
userbye=child2[1].string
userlan=child2[2].string
#image=soupuser.body.findAll('p','big-num')
#image1=image[0].find(re.compile('img')).get('src')
#image2=image[1].find(re.compile('img')).get('src')
#image3=image[2].find(re.compile('img')).get('src')
ulist.append([userimage,username,usersex,usercity,userbir,userxing,userwei,userbye,userlan])
# print(userimage,username,usersex,usercity,userbir,userxing,userwei,userbye,userlan)
print("正在爬取",i,username)
i=i+1
def printUnivList(self,ulist,num):
print(ulist)
"""
图片下载
dyuserimg="D://douyin//"
path=dyuserimg+url.split('/')[-1]
try:
if not os.path.exists(dyuserimg):
os.mkdir(dyuserimg)
if not os.path.exists(path):
r=requests.get(url)
with open(path,'wb') as f:
f.write(r.content)
f.close()
print("图片保存成功")
else:
print("图片下载失败")
eccept:
print(爬取是失败)
"""
if __name__ == '__main__':
unifo=[]
for i in range(3,7):
url='https://kolranking.com/douyin/users?s=&category=&ot=DESC&order=follower_count&page='
html=DOUDOU().getHTMLtext(url+str(i))
DOUDOU().fillUnitvList(unifo, html)
DOUDOU().printUnivList(unifo,6)
我是怎么解决的呢
ERR说的是 你这个类型没有这个方法
- 首先 我确定我爬的是静态页面
- pip install 也成功导入了库
- 确定你获取的不是空值
- 最终那就是你调用。find或fingall的对象有问题
我的程序是
for child in soup.tbody.findAll(re.compile(‘a’)):
url=‘https://kolranking.com’+child.get(‘href’)
htmluser=self.getHTMLtext(url)
soupuser=BeautifulSoup(htmluser,“html.parser”)
#问题所在位置
time.sleep(1)
douuser=soupuser.body.find(‘div’,‘col-md-3 col-sm-12 align-center’)
因为我是多页面爬取,存在频繁的BeautifulSoup解析页面,所以会出现解析不成功就执行方法的情况,所以我在解析后暂停一秒time.sleep(1),在去找标签就解决了问题