自己用python+BeautifulSoup做的一个小工具,获取网站的图片存入指定文件夹,获取页面重要信息存入txt文件。代码如下:
__author__ = 'wangzg'
# -*- coding:utf-8 -*-
import urllib
import urllib2
import re
import cookielib
import bs4
import sys
import os
reload(sys)
sys.setdefaultencoding( "utf-8" )
f = open('chunguowangInfo.txt', 'w')
imageDir = 'chunguowangImgs'
imagesf = open('chunguowangImgsInfo.txt', 'w')
if os.path.exists(imageDir):
message = 'OK, the "%s" file exists.'
else:
#message = "Sorry, I cannot find the "%s" file.
os.makedirs(imageDir)
pwd = os.getcwd()
user_agent ='"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 Safari/537.36"'
headers = { 'User-Agent' : user_agent }
'''cj=cookielib.CookieJar()
opener=urllib2.build_opener(urllib2.HTTPCookie