使用python的pyquery简单爬取数据demo

#!/bin/env python
#_*_ coding: utf-8 _*_

from pyquery import PyQuery as pq
import time
import random


def get_appinfo_from_yyb(app_pack,storenum):
	url=xxxxxxxxxxxxxx
	data = pq(url)
	if storenum==1:
		app_name = data('.det-name-int').text()
		app_down_cnt = data('.det-ins-num').text().replace(u'下载' ,'')
		app_desc = data('.det-app-data-info').text()
		if len(app_desc)==0:
			return ""
		text = '\t'.join([app_pack, app_name, app_down_cnt, app_desc])

	elif storenum==2:
		app_name = data('title').text()[:-7]
		app_down_cnt = ""
		app_desc = data('.app-text .pslide').text().replace('\n','')
		if len(app_desc)==0:
			return ""
		text = '\t'.join([app_pack, app_name, app_down_cnt, app_desc])

	return text


if __name__ == '__main__':
	storenum = 2
	file_num = "11999.csv"
	input_path= "1filename"+file_num
	output_path = "1result"+file_num
	#time.sleep(3600*6)
	with open(input_path) as fr, open(output_path, "w", encoding='utf-8') as fw:
		print(input_path,output_path)
		num = 1
		for app_apck_name in fr:
			app_apck_name=app_apck_name.strip('\n')
			app_info = get_appinfo_from_yyb(app_apck_name,storenum)
			print(num, app_apck_name, app_info)
			if len(app_info)>0:
				fw.write('%s\n' % app_info)
			num = num +1
			sleep_num = random.randint(100, 3000)/1000
			time.sleep(sleep_num)
""""""

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值