上一篇文章中
写了一个小爬虫来判断omg战队吧是不是每分钟都有人回复,这一次用类来封装一下上一篇的小爬虫
from bs4 import BeautifulSoup
import urllib.request
import time
class tieba(object):
def gethtml(self, url):
response = urllib.request.urlopen(url)
self.html = response.read().decode('utf-8')
return self.html
def judge(self, L1):
for i in range(len(L1) - 1):
count = 0
if L1[i] - L1[i+1] > 50:
count = L1[i] - (L1[i+1] + 60)
else:
count = L1[i] - L[i+1]
if count != -1:
print('并不是每分钟都有人回复')
break
else:
print('每分钟都有人回复')
def gettime(self, sleeptime, count, url):
L = []
L1 = []
for i in range(count):
html = self.gethtml(url)
url_soup = BeautifulSoup(html, 'html.parser')
for a in url_soup.find_all("div", class_="pull_right"):
grab_time = a.find_all("span")[1].get_text().strip()
try:
change_time = grab_time.split(':')[1]
L.append(change_time)
except:
pass
L1.append(int(L[0]))
print(L1)
L = []
time.sleep(sleeptime)
self.judge(L1)
url = 'https://tieba.baidu.com/f?kw=omg%E6%88%98%E9%98%9F&fr=fenter&prequery=omg'
tieba_by_object = tieba()
'''
用来运行爬虫
'''
tieba_by_object.gettime(60, 5, url)
用来存放获取的时间的数组放在了函数中,如果要当函数的参数写就要这样写tieba_by_object.gettime(60, 5, L, L1, url)
,这样写有点low,可以用可变参数*args来小幅提升一下逼格。
args = [[], []]
tieba_by_object = tieba()
tieba_by_object.gettime(60, 5, url, *args)
获取数组的方法
def gettime(self, sleeptime, count, url, *args):
L = args[0]
L1 = args[1]