from bs4 import BeautifulSoup
import requests
import pymysql
from datetime import datetime
import time
class Baidu:
def __init__(self,url):
self.url = url
self.db = pymysql.connect(user='root',password='root',host='localhost',database='laravel')
def getinfo(self):
user_agent = "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36"
headers = {"user-agent":user_agent}
baidu = requests.get(self.url,headers=headers)
baidu.encoding = 'gbk'
hotlist = BeautifulSoup(baidu.text,'lxml')
list = hotlist.select("table.list-table tr")
for new in list:
title = new.find('a', class_='list-title')
rank = new.find('span',{'class':['num-top','num-normal']})
rise = new.find('span', {'class':['icon-rise', 'icon-normal', 'icon-fair','icon-fall']})
if rise and rank and title:
rise = rise.string
rank = rank.string
url = title.get('href')
title = title.string
self.saveData(rank,title,rise,url)
print("--------save end-----------------")
def saveData(self,rank,title,rise,url):
course = self.db.cursor()
find_sql = "select * from baidu_hot where id>0"
course.execute(find_sql)
result = course.rowcount
if result>=29:
updatesql = "update baidu_hot set title='{}', rise='{}' ,url='{}' where rank='{}'".format(title,rise,url,rank)
try:
print('--------------' + title + '--------------update')
course.execute(updatesql)
self.db.commit()
except OSError as err:
self.db.rollback()
print("OS error: {0}".format(err))
else:
try:
print('--------------' + title + '--------------saved')
sql = "insert into baidu_hot (title,rank,rise,url) values('{}','{}','{}','{}')".format(title, rank, rise, url)
course.execute(sql)
self.db.commit()
except OSError as err:
self.db.rollback()
print("OS error: {0}".format(err))
def time(self,timscrip):
while True:
self.getinfo()
time.sleep(timscrip)
hotnew = Baidu("http://top.baidu.com/buzz?b=1")
hotnew.time(120)
python 百度热点新闻
最新推荐文章于 2025-03-14 20:26:59 发布