本文实例为大家分享了Python抓取聚划算商品页面获取商品信息并保存的具体代码,供大家参考,具体内容如下
#!/user/bin/python
# -*- coding: gbk -*-
#Spider.py
import urllib2
import httplib
import StringIO
import gzip
import re
import chardet
import sys
import os
import datetime
from xml.dom.minidom import Document
from BeautifulSoup import BeautifulSoup
## 这段代码是用于解决控制台打印汉字报错的问题
reload(sys)
sys.setdefaultencoding("utf8")
#####################################################
## debug模式开关,开启后可以看到Http请求的头部信息以及debug日志
DEBUG = 1
NO_DEBUG = 0
httplib.HTTPConnection.debuglevel = DEBUG
## 是否显示爬取网页源代码开关
showSrcCode = False
## 压缩方式
ZIP_TYPE = "gzip"
fileName = "auctions"
location = "d://spiderData/"
## header
headerConfig = {"User-Agent":"taobao-yanyuan.qzs", "Accept-encoding":ZIP_TYPE}
#####################################################
#############class SpiderConfig #####################
class SpiderConfig:
"""
configuration for spider name and url
"""
def __init__(self, name, url):
self.name = name
self.url = url
#####################################################
##############class SpiderAuctionDomain##############
class SpiderAuctionDomain:
"""
Store information with auctions spidered by python
"""
title = ""
url = ""
img = ""
price = ""
def __ini