python怎么爬虫数据库_Python网络爬虫与数据库

import DataBaseHelper

import ThreadPool

import http.cookiejar

import urllib

#cookie 登录

cj = http.cookiejar.LWPCookieJar()

cookie_support = urllib.request.HTTPCookieProcessor(cj)

opener = urllib.request.build_opener(cookie_support, urllib.request.HTTPHandler)

db=DataBaseHelper.DbHelper(1,50000)

pool=ThreadPool.ThreadPool(20,500)

def crab(i):

url1="http://fangjia.fang.com/pghouse-c0suzhou/10-kw%cb%d5%d6%dd/"

try:

temp=opener.open(url1, timeout=30)

data=temp.read()

print(data)

db.add("insert into craw(information) values(%s)", [data.decode("GBK")])

finally:

temp.close()

opener.close()

try:

for i in range(0,1):

pool.add(crab, [i])

finally:

print("runOutAndJoin")

pool.runOutAndJoin()

print("pool quit")

pool.syncQuit()

db.quit()

#! /usr/bin/env python #coding=utf-8 import DataBaseHelper import re #get data from DataBase data = DataBaseHelper.fetchAll("select information from craw") #convert into string  data = str(data) #print(data) #match pattern1 = r'\\r\\n\s*(.*?)\s*' pattern2 = r'(.*?)' match1 = re.findall(pattern1,data) match2 = re.findall(pattern2,data) #print(match1) #print(match2) try:     #connect DataBase     db = DataBaseHelper.DbHelper(1,10000)     #write into the Database     for i in range(0,len(match1)):         db.add("insert into Data(Name) values(%s)", [match1[i]])         db.add("insert into Data(Address) values(%s)",[match2[i]]) finally:         #close the DataBase        db.quit()

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值