xpath爬取网页评论，网址的的调用方法，数据库特殊字符的替换

最新推荐文章于 2023-02-04 18:59:23 发布

baodao9225

最新推荐文章于 2023-02-04 18:59:23 发布

阅读量180

点赞数

文章标签：数据库 json

原文链接：http://www.cnblogs.com/ZHANG576433951/p/6151290.html

版权

# -*- coding:utf-8-*-
from lxml import etree
import urllib
import json
import requests

import MySQLdb
id=0
class SQL(object):
    conn=MySQLdb.connect(host="localhost",
                             port=3306,
                             user="root",
                             passwd="123456",
                             db="test",
                             charset="utf8",)

    def insert(self,name,time,content): #函数的调用
        cur=self.conn.cursor()
        global id
        id+=1
        #cur.execute("create table pinglun (id int ,name text,time text,content text)")

        cur.execute("insert into pinglun  VALUES ('%s' ,'%s','%s','%s')"%(id,name,time,content))
        cur.close()
        self.conn.commit()

mysql=SQL()#实例化类
def spider(url):
html=requests.get(url)
    seletor=etree.HTML(html.text)
    content_field=seletor.xpath('//div[@class="l_post j_l_post l_post_bright  "]') #空格不能省略
    #print type(content_field)
    #fd=open(u'评论'+'.txt','wb')
    for each in content_field:
        reply_info=json.loads(each.xpath('@data-field')[0])
        author=reply_info['author']['user_name']



        time=reply_info['content']['date']
        #print time
        content=each.xpath('div[@class="d_post_content_main"]/div/cc/div[@class="d_post_content j_d_post_content  clearfix"]/text()')[0]
contents=content.replace("'","")#'对于数据库来说是特殊符号，要用替换的方法
        mysql.insert(author,time,contents)


        #fd.write(author+'\r\n'+time+'\r\n'+content+'\r\n')
    #fd.close()
if __name__ == '__main__':
    for i in range(1,100):
        url="http://tieba.baidu.com/p/3522395718?pn=%s"%i
        spider(url)#网址的调用方法
mysql.conn.close()

转载于:https://www.cnblogs.com/ZHANG576433951/p/6151290.html

baodao9225

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
xpath爬取网页评论，网址的的调用方法，数据库特殊字符的替换

# -*- coding:utf-8-*-from lxml import etreeimport urllibimport jsonimport requestsimport MySQLdbid=0class SQL(object): conn=MySQLdb.connect(host="localhost", port=3...
复制链接

扫一扫