#-*-coding:utf-8-*-
import sys
reload(sys)
sys.setdefaultencoding( "utf-8" )
import ConfigParser
from crawl.crawl import *
from crawl.filter import *
import MySQLdb
import time
import random
import MySQLdb.cursors
import re
#抓取列表页 链接url
debug = 1
def get_list_url( seed_url ):
html = get_html(seed_url)
soup = BeautifulSoup( html )
urls = soup.findAll('a' ,href=True )
for url in urls:
if re.findall('sou.zhaopin.com' ,url['href'] ):
print url['href']
import sys
reload(sys)
sys.setdefaultencoding( "utf-8" )
import ConfigParser
from crawl.crawl import *
from crawl.filter import *
import MySQLdb
import time
import random
import MySQLdb.cursors
import re
#抓取列表页 链接url
debug = 1
def get_list_url( seed_url ):
html = get_html(seed_url)
soup = BeautifulSoup( html )
urls = soup.findAll('a' ,href=True )
for url in urls:
if re.findall('sou.zhaopin.com' ,url['href'] ):
print url['href']