第一个Python3.0 爬虫程序，爬取百度贴吧图片

最新推荐文章于 2023-12-10 20:23:53 发布

wangxiaoming

最新推荐文章于 2023-12-10 20:23:53 发布

阅读量1.9k

点赞数 1

本文链接：https://blog.csdn.net/wangming520liwei/article/details/53786652

版权

第一个Python3.0 爬虫程序，爬取百度贴吧图片

基于python 3.0

#print("hello222")
# coding= utf-8
import urllib.request
import  re

def getHtml(url):
    page = urllib.request.urlopen(url)
    html = page.read()
    return html

a = input('inpt url:')
html = getHtml(a)


def getImg(html):
    reg = r'src="(.+?\.jpg)"'
    #reg = r'img.src="(.*?)"'
    imgre = re.compile(reg)
    html = html.decode('utf-8')
    imglist = re.findall(imgre, html)
    local = 'd://google'
    '''
    '''
    x = 0
    print(len(imglist))
    for imgurl in imglist:
        urllib.request.urlretrieve(imgurl, local + '%s.jpg' % x)
        x += 1
    #return imglist

#html = getHtml("http://tieba.baidu.com/p/2460150866")
getImg(html)
#print(img)
#print(html)

看运行：