标题和价格的爬取，主要是遍历的方法

最新推荐文章于 2020-05-27 23:24:08 发布

baodao9225

最新推荐文章于 2020-05-27 23:24:08 发布

阅读量102

点赞数

原文链接：http://www.cnblogs.com/ZHANG576433951/p/6090038.html

版权


import re
import urllib2

def get_html():
    html = urllib2.urlopen("http://uland.taobao.com/sem/tbsearch").read()
    r = r'target="_blank">&yen; <strong>(.*)</strong>'
    reg = re.compile(r)
    text = re.findall(reg,html)
    return text

def get_html1():
    html = urllib2.urlopen("http://uland.taobao.com/sem/tbsearch").read()
    r = r' target="_blank" class="title" title="(.*)">(.*)</a>'
    reg = re.compile(r)
    text = re.findall(reg,html)
    return text

dict = {}
price = get_html()
title = get_html1()
list = []
for k in title:
    list.append(k[-1])

for i in range(len(price)):
    for j in range(len(list)):
        if i==j:
            dict[price[i]]=list[j]
fd=open('aaa.txt','a')
for k,v in dict.items():
    fd.write(k+'\t'+v+'\n\n')
fd.close()

# - *- coding:utf-8 -*-
import urllib2
import re

def geturl():
    html=urllib2.urlopen("http://uland.taobao.com/sem/tbsearch").read()
    reg=re.compile(r' target="_blank" class="title" title="(.*?)">.*?</a>')
    title=re.findall(reg,html)
    return title
def geturl1():
    html=urllib2.urlopen("http://uland.taobao.com/sem/tbsearch").read()
    reg=re.compile(r' <strong>(.*?)</strong>')
    title1=re.findall(reg,html)
    return title1
fd=open('baobiao.txt','wb')
for(i,j) in zip(geturl(),geturl1()):

    fd.write(i+'\t'+ j + '\r\n')
fd.close()