获取淘宝信息(商品、价格等)存入csv文件

参考博客:(https://blog.csdn.net/qq_26373925/article/details/101185979)

import re
import csv
import requests
import time

class shopping_list():
    def __init__(self,searchkey,pageTotal):
        self.searchkey = searchkey
        self.startPage = 1 # 起始页面
        self.pageTotal = pageTotal # 爬取多少页
        self.pagecount = 0
        self.nowTime = time.strftime('%Y-%m-%d_%H-%M-%S', time.localtime())
        self.startTime = time.time()
        self.savefile = r'E:/{}购物清单_{}.csv'.format(self.searchkey,self.nowTime)
    def run(self):
        url = 'https://s.taobao.com/search'
        params = {'q':self.searchkey, 'ie':'utf8'} #q 搜索的关键字
        headers = {'cookie': 'thw=cn; t=8615e43948e4f325a452fcfc75658295; enc=5aX3Fxp7gnjdnzYhyZgW5C8bsEB9iy%2Fm9FINNSxeLkSyzHw%2BW%2FQ75wyPT1B2d%2FuBnBL1M3%2Fesyz5gCF5olGU%2Fw%3D%3D; hng=CN%7Czh-CN%7CCNY%7C156; x=e%3D1%26p%3D*%26s%3D0%26c%3D0%26f%3D0%26g%3D0%26t%3D0%26__ll%3D-1%26_ato%3D0; cna=pZzuFYcYfncCATo5S54w4cVz; miid=1475301187983023342; lgc=%5Cu8D60%5Cu6211%5Cu4E00%5Cu573A%5Cu7E41%5Cu534E%5Cu68A6; tracknick=%5Cu8D60%5Cu6211%5Cu4E00%5Cu573A%5Cu7E41%5Cu534E%5Cu68A6; _cc_=UtASsssmfA%3D%3D; tg=0; mt=ci=104_1; uc3=nk2=ty9RApAMboOj14GWEeg%3D&id2=UUGk2KGOvdSPAg%3D%3D&lg2=UtASsssmOIJ0bQ%3D%3D&vt3=F8dByuK6XCEdAazEr0o%3D; uc4=nk4=0%40tVx%2FqaVOleOvXlf%2BkJkZlRjcY1N55lu2bw%3D%3D&id4=0%40U2OT6E4rYowWmY6LDe8ez%2F%2FRBzTs; _m_h5_tk=abaf9db87e4b021b982ac24aa4a8008f_1569383153151; _m_h5_tk_enc=afefc5a7ad9c15ba310176c664e01f64; v=0; cookie2=10078b0643deba39a84cfd017ed07cdf; _tb_token_=5373b37ee10de; uc1=cookie14=UoTaEcfKm1vnRA%3D%3D; l=cBg9SfccqHIxBPbLBOCanurza77OSIRYYuPzaNbMi_5QV6T_-u_Ok6j8tF96VjWdOcTB4tm2-gv9-etkZwMy6uBKpdx1.; isg=BMPDNpg7mqWqPlYynWUrL58qUodtOFd6f31HOvWgHyKZtOPWfQjnyqEmLgRfD69y',
                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36'}
        #源代码:"raw_title":"蔓越莓曲奇饼干网红零食充饥夜宵整箱一箱","view_price":"9.90","view_fee":"0.00","item_loc":"安徽 宿州","view_sales":"4899人付款","comment_count":"","user_id":"4157761164","nick":"卓滋旗舰店"
        keys = ('raw_title','view_price','item_loc','view_sales','comment_count','nick') #可做表头,也可做提取内容所需的键名
        with open(self.savefile,'w') as f:
            f.write(','.join(keys)+'\n')
        for page in range(self.startPage,self.pageTotal+1):
            params['s'] = str((page-1) * 44)
            time.sleep(2)
            response = requests.get(url,params,headers = headers)
            response.encoding = 'utf8'
            html = response.text
            results = [re.findall(r'"{}":"([^"]+)"'.format(key),html) for key in keys]
            print('page{}: 正在写入数据...'.format(page))
            with open(self.savefile,'a') as f:
                for row in range(len(results[0])):  #48
                    print('\r正在写入第{}条..'.format(row+1))
                    self.pagecount += 1
                    for key in range(len(results)): #6
                        try:
                            f.write('{}{}'.format(results[key][row], ',' if key+1<len(results) else '\n'))  #如果索引越界就换行
                        except:        #如有缺失,以null填充
                            f.write('null{}'.format(',' if key+1<len(results) else '\n'))  
            print('page{}: 写入完成'.format(page))

        print('\n任务完成!! 页面总数: {} | 写入数据: {}条 | 用时: {:.2f}s'.format(self.pageTotal, self.pagecount, time.time()-self.startTime))

if __name__ == "__main__":
    list_1 = shopping_list('零食',3)
    list_1.run()
    list_2 = shopping_list('女外套',2)
    list_2.run()

运行结果:
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述

  • 3
    点赞
  • 9
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值