python爬取当当网商品价格并写入数据

import requests
import pymysql
import re
from fake_useragent import UserAgent
from lxml import etree

ua = UserAgent()
values = []
def spider(db, cursor,key,index):
headers = { ‘User-Agent’:ua.random}
url = “http://search.dangdang.com/?key={key}&act=input&page_index={index}”.format(key = key,index = index)
response = requests.get(url,headers = headers)
html = response.text
content = etree.HTML(html)
ul_list = content.xpath(‘//div[@id=“search_nature_rg”]/ul[@id=“component_59”]/li’)

for li in ul_list:
    title = li.xpath('./a/@title')[0]  # 商品名

    price = li.xpath('.//p[@class="price"]/span[@class="price_n"]/text()')   # 价格
    price = ''.join(price).replace("¥","")

    link = li.xpath('./a/@href')[0]  # 链接
    value = (title,price,link)
    values.append(value)
    #print(value)

def table_exists(cursor,table_name): #判断数据库中表是否存在,不存在则新建
findsql = “show tables;”
cursor.execute(findsql)
tables = [cursor.fetchall()]
table_list = re.findall(‘(’.*?‘)’,str(tables))
table_list = [re.sub(“'”,‘’,each) for each in table_list]
if table_name in table_list:
print(‘表已存在不需要创建’)
else:
sql = “”“CREATE TABLE IF NOT EXISTS {} (
title VARCHAR(2000),
price VARCHAR (10),
link VARCHAR(2000)
)”“” .format(table_name)

    cursor.execute("drop table if exists {}".format(table_name))
    cursor.execute(sql)  # 创建表
    print("创建成功")

def insertData(db, cursor):
for item in range(1, 30):
spider(db, cursor,“手机”, item) # 搜索需要爬取的物品

for i in values:
    idtext = i[0]
    nametext = i[1]
    linktext = i[2]

    sql = "INSERT INTO %s (title, price, link)VALUES ('%s', '%s', '%s')" %(table_name,idtext, nametext, linktext)

    try:
        cursor.execute(sql)  # 执行sql语句
        db.commit()  # 提交到数据库执行
        print("成功写入数据" + idtext)
    except:
        db.rollback()  # 发生错误时回滚
        print("写入失败" + sql)

if name == ‘main’:
db = pymysql.connect(host=‘localhost’, port=3306, user=‘root’, passwd=‘123456’, db=‘students’, charset=‘utf8’)
cursor = db.cursor()

table_name = 'dangdangwang' # 数据库表名
table_exists(cursor,table_name)
insertData(db, cursor)

cursor.close()
db.close()

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值