一、实现效果
二、源码
#coding:utf-8
import re
import time
import codecs
import StringIO, gzip
import os
import urllib
import urllib2
from bs4 import BeautifulSoup
import chardet
def my_cmp(a, b):
return -cmp(int(a[0]), int(b[0]))
class SortAuthor(object):
def __init__(self):
self.author_url = u'http://www.zhihu.com/question/26956671'
self.filename = u'26956671'
self.headers = { u'Host':u'www.zhihu.com',
u'User-Agent':u'Mozilla/5.0