python抓京东手机数据

最新推荐文章于 2023-12-11 14:24:55 发布

poetliu

最新推荐文章于 2023-12-11 14:24:55 发布

阅读量1k

点赞数 1

分类专栏： Python python小demo分享文章标签： python phone

本文链接：https://blog.csdn.net/u013018721/article/details/46610663

版权

Python 同时被 2 个专栏收录

20 篇文章 1 订阅

订阅专栏

python小demo分享

20 篇文章 13 订阅

订阅专栏

python抓京东手机数据

作者:vpoet

日期:大约在夏季

#coding=utf-8

import urllib2

from lxml import etree

import re


if __name__ == '__main__':
    
    main_url = """http://search.jd.com/Search?keyword=%%E6%%89%%8B%%E6%%9C%%BA&enc=utf-8&suggest=0#keyword=%%E6%%89%%8B%%E6%%9C%%BA&enc=utf-8&qrst=1&ps=addr&rt=1&stop=1&sttr=1&cid3=655&click=3-		  	  655&psort=3&page=%s"""
    
    page_num = 1
    
    for page in range(page_num):
        
        html_url = main_url % page
        
        Res = urllib2.urlopen(html_url)
        
        Htm = Res.read()
        
        #print Htm

        tree = etree.HTML(Htm);
 
        #phone_names = tree.xpath("//div[@id='plist']/ul/li/div[@class='lh-wrap']/div[@class='p-name']/a/text()")   
        
	#x = 1
        #for phone_name in phone_names: 
            #print phone_name+'\t'+str(x)+'\n'

	    #x=x+1
	
 

	#phone_pic_urls = tree.xpath("//div[@class='lh-wrap']/div[@class='p-img']/a/img")

	#for phone_pic_url in phone_pic_urls: 
	    #print phone_pic_url.values()[3]

	#phone_prices = tree.xpath("//div[@class='p-price']/strong")  
	phone_prices = tree.xpath("//*[@id='plist']/ul[@class='list-h clearfix']/li/div/div[@class='p-price']/strong") 
        
	x = 1

        for phone_price in phone_prices: 
            print phone_price.values()[1]+'\t'+str(x)+'\n'
	    x = x + 1


	#phone_comments = tree.xpath("//div[@class='extra']/a/text()")  
        
        #for phone_comment in phone_comments: 
            #print "评价数"
	    #comment_num = re.findall(r'.{2}(\d+).{3}',phone_comment)
	    #print comment_num[0]


	#phone_good_comments = tree.xpath("//div[@class='extra']/span[@class='reputation']/text()")  
        
        #for phone_good_comment in phone_good_comments: 
            #print "好评率"
	    #comment_good_num = re.findall(r'\((\d{2})%.{2}\)',phone_good_comment)
	    #print comment_good_num[0]


	print "over"

这个没写完，贴出来供大家参考。