xpath使用方法

最新推荐文章于 2024-10-12 12:26:23 发布

paul jeorgh

最新推荐文章于 2024-10-12 12:26:23 发布

阅读量215

点赞数 1

文章标签： python

本文链接：https://blog.csdn.net/2302_79777012/article/details/142033272

版权

import requests
from lxml import etree
import pymysql




class Spiderqinghua():
    def __init__(self):
        #self.url = 'http://www.ainicr.cn/qh/t83.html'
        self.headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.81 Safari/537.36 Edg/94.0.992.50',
            'Cookie': 'UM_distinctid=17c96621588508-0006f186e395d8-513c1f42-154ac4-17c96621589136; PHPSESSID=lr7ongb4hq463lesgr5aj3qb41; BAIDU_SSP_lcr=https://www.baidu.com/link?url=uKihKodBCJW5w1BABPlnATnPqDbc46lex6pypXn0_GC&wd=&eqid=ea8a741e0000017e00000003616e2d83; Hm_lvt_eaa57ca47dacb4ad4f5a257001a3457c=1634651723,1634652050,1634652160,1634652257; CNZZDATA1272896529=1159959131-1634602421-https%253A%252F%252Fwww.baidu.com%252F%7C1634656990; Hm_lpvt_eaa57ca47dacb4ad4f5a257001a3457c=1634658647',

        }
        #链接数据库                    #用户名      #密码             #数据库名
        self.db=pymysql.connect(user='root',password='kobe123456',database='kobe',charset='utf8')
        self.cursor=self.db.cursor()#获取游标

    # 请求代码
    def get_data(self,url):

        response = requests.get(url, headers=self.headers)
        return response.text


    # 解析数据函数url
    def kobe_data(self, data):
        xml = etree.HTML(data)
        hrefs = xml.xpath('//div[@class="item"]//div/a/@href')
        # print(hrefs)
        return hrefs


    #解析情话内容
    def parse_data(self,url):
        qinghua = self.get_data(url)
        # print(qinghua)
        lebron=etree.HTML(qinghua)
        durant=lebron.xpath('//div[@class="stbody "]//a/p/text()|//div[@class="stbody first"]//a/p/text()')
       # print(durant)
        for content in durant:
            print(content)
            print("="*100)
            self.save_sql(content)
            #调用消息发送模块
            #windows_weChat_message(BRYANT, message, sleepTime=0.5)


    def save_sql(self,qinghua):
        sql='insert into Q(text) value(%s)'#sql语句
        self.cursor.execute(sql,[qinghua])#execute要求列表或者元组的形式插入数据
        self.db.commit()

    # 每一个方法写完了，然后再main方法里面统一调用
    def main(self):
        data = self.get_data(url)
        herefs=self.kobe_data(data)
        # print(herefs)
        for i in herefs:
            print(i)
            self.parse_data(i)



if __name__ == '__main__':
    # url = 'http://www.ainicr.cn/qh/t83.html'
    url_list=[
        'http://www.ainicr.cn/qh/t6.html',
        'http://www.ainicr.cn/qh/t57.html',
        'http://www.ainicr.cn/qh/t4.html',
        'http://www.ainicr.cn/qh/t8.html',
    ]
    for url in url_list:

        abc = Spiderqinghua()  # 实例化生成一个类对象
        abc.main()