import requests
from lxml import etree
import pymysql
class Spiderqinghua():
def __init__(self):
#self.url = 'http://www.ainicr.cn/qh/t83.html'
self.headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.81 Safari/537.36 Edg/94.0.992.50',
'Cookie': 'UM_distinctid=17c96621588508-0006f186e395d8-513c1f42-154ac4-17c96621589136; PHPSESSID=lr7ongb4hq463lesgr5aj3qb41; BAIDU_SSP_lcr=https://www.baidu.com/link?url=uKihKodBCJW5w1BABPlnATnPqDbc46lex6pypXn0_GC&wd=&eqid=ea8a741e0000017e00000003616e2d83; Hm_lvt_eaa57ca47dacb4ad4f5a257001a3457c=1634651723,1634652050,1634652160,1634652257; CNZZDATA1272896529=1159959131-1634602421-https%253A%252F%252Fwww.baidu.com%252F%7C1634656990; Hm_lpvt_eaa57ca47dacb4ad4f5a257001a3457c=1634658647',
}
#链接数据库 #用户名 #密码 #数据库名
self.db=pymysql.connect(user='root',password='kobe123456',database='kobe',charset='utf8')
self.cursor=self.db.cursor()#获取游标
# 请求代码
def get_data(self,url):
response = requests.get(url, headers=self.headers)
return response.text
# 解析数据函数url
def kobe_data(self, data):
xml = etree.HTML(data)
hrefs = xml.xpath('//div[@class="item"]//div/a/@href')
# print(hrefs)
return hrefs
#解析情话内容
def parse_data(self,url):
qinghua = self.get_data(url)
# print(qinghua)
lebron=etree.HTML(qinghua)
durant=lebron.xpath('//div[@class="stbody "]//a/p/text()|//div[@class="stbody first"]//a/p/text()')
# print(durant)
for content in durant:
print(content)
print("="*100)
self.save_sql(content)
#调用消息发送模块
#windows_weChat_message(BRYANT, message, sleepTime=0.5)
def save_sql(self,qinghua):
sql='insert into Q(text) value(%s)'#sql语句
self.cursor.execute(sql,[qinghua])#execute要求列表或者元组的形式插入数据
self.db.commit()
# 每一个方法写完了,然后再main方法里面统一调用
def main(self):
data = self.get_data(url)
herefs=self.kobe_data(data)
# print(herefs)
for i in herefs:
print(i)
self.parse_data(i)
if __name__ == '__main__':
# url = 'http://www.ainicr.cn/qh/t83.html'
url_list=[
'http://www.ainicr.cn/qh/t6.html',
'http://www.ainicr.cn/qh/t57.html',
'http://www.ainicr.cn/qh/t4.html',
'http://www.ainicr.cn/qh/t8.html',
]
for url in url_list:
abc = Spiderqinghua() # 实例化生成一个类对象
abc.main()
xpath使用方法
最新推荐文章于 2024-10-12 12:26:23 发布