虽然有一些c++编程的基础,但对python之类的脚本语言,寡人用的并不多。今天准备花三个小时的时间来写一个爬虫顺带一篇博客。对于webosocket,自行github上搜索,有教程案例。时间不够,废话不多说,直接上代码
`
import os
import sys
reload(sys)
sys.setdefaultencoding(‘utf-8’)
import re #正则表达式
import urlparse #urlparse模块主要是把url拆分为6部分,并返回元组。并且可以把拆分后的部分再组成一个url。主要有函数有urljoin、urlsplit、urlunsplit、urlparse等
import urllib
import urllib2
import socket
import cookielib
import websocket
import grnumber
import threading
try:
import thread
except ImportError: # TODO use Threading instead of _thread in python3
import _thread as thread
import time
”’
import logging
import logging.handlers
import logging.config
cur_path_ = os.path.dirname(file)
LOG_FILE = os.path.join(cur_path_, ‘logs/main.log’)
handler = logging.handlers.RotatingFileHandler(LOG_FILE, \
maxBytes = 500*1024*1024, backupCount = 3)
fmt = “%(name)s %(levelname)s %(filename)s:%(lineno)s %(asctime)s %(process)d:%(thread)d %(message)s”
formatter = logging.Formatter(fmt)
handler.setFormatter(formatter)
logger = logging.getLogger(‘main’)