import DataBaseHelper
import ThreadPool
import http.cookiejar
import urllib
#cookie 登录
cj = http.cookiejar.LWPCookieJar()
cookie_support = urllib.request.HTTPCookieProcessor(cj)
opener = urllib.request.build_opener(cookie_support, urllib.request.HTTPHandler)
db=DataBaseHelper.DbHelper(1,50000)
pool=ThreadPool.ThreadPool(20,500)
def crab(i):
url1="http://fangjia.fang.com/pghouse-c0suzhou/10-kw%cb%d5%d6%dd/"
try:
temp=opener.open(url1, timeout=30)
data=temp.read()
print(data)
db.add("insert into craw(information) values(%s)", [data.decode("GBK")])
finally:
temp.close()
opener.close()
try:
for i in range(0,1):
pool.add(crab, [i])
finally:
print("runOutAndJoin")
pool.runOutAndJoin()
print("pool quit")
pool.syncQuit()
db.quit()
#! /usr/bin/env python #coding=utf-8 import DataBaseHelper import re #get data from DataBase data = DataBaseHelper.fetchAll("select information from craw") #convert into string data = str(data) #print(data) #match pattern1 = r'\\r\\n\s*(.*?)\s*' pattern2 = r'(.*?)' match1 = re.findall(pattern1,data) match2 = re.findall(pattern2,data) #print(match1) #print(match2) try: #connect DataBase db = DataBaseHelper.DbHelper(1,10000) #write into the Database for i in range(0,len(match1)): db.add("insert into Data(Name) values(%s)", [match1[i]]) db.add("insert into Data(Address) values(%s)",[match2[i]]) finally: #close the DataBase db.quit()