实现抓图的工具

实现抓图的工具

#encoding:UTF-8

import urllib
import urllib2
import re
import os
from BeautifulSoup import BeautifulSoup

def GetUrlContent(url,path):
#url = "http://www.2cto.com/meinv/sexmv/"
req = urllib2.urlopen(url)
content = req.read()
soup = BeautifulSoup(content)
# print soup.pret()
#查找左右链接,并且不含title属性
alinks = soup.findAll('a', attrs={"target": "_blank"}, title=None)
i = 0
for a in alinks:
surl = a['href']
print surl
GetUrl(surl,path)
print " "
print " "
#print surl

def createFileWithFileName(localPathParam,fileName):
totalPath=localPathParam+'\\'+fileName
if not os.path.exists(totalPath):
file=open(totalPath,'a+')
file.close()
return totalPath

def GetFileName(url):
sFilename=os.path.basename(url)
return sFilename

def GetUrl(myUrl,localPath):
#url = "http://www.2cto.com/meinv/sexmv/1819.html"
try:
req = urllib2.urlopen(myUrl,None,5)
content = req.read()
soup = BeautifulSoup(content)
alinks =soup.findAll("img",attrs={"src": re.compile("(.*)uploads/allimg(.*)")})

for d in alinks:
imgUrl=d["src"]
print imgUrl
fileName=GetFileName(imgUrl)
print fileName
urllib.urlretrieve(imgUrl,createFileWithFileName(localPath,fileName))
except Exception,e:
print "Error"

if __name__=='__main__':
#GetUrl("http://www.2cto.com/meinv/sexmv/1810.html")
print GetFileName("http://www.2cto.com/meinv/sexmv/1810.jpg")
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值