python

该代码示例展示了一个Python类MySpider,它用于从指定网页抓取货币汇率信息,然后将数据存储到SQLite数据库中。类包含数据库操作方法如打开、关闭、插入和显示数据,以及正则表达式匹配功能来解析HTML表格内容。
摘要由CSDN通过智能技术生成

import urllib.request

import re

import sqlite3

class MySpider:

def openDB(self):

# 初始化数据库,创建数据库rates.db与一张空表rates

self.con = sqlite3.connect("rates.db")

self.cursor = self.con.cursor()

try:

self.cursor.execute("drop table rates")

except:

pass

sql = "create table rates (Currency varchar(256) primary key,TSP float,CSP float, TBP float, CBP float,Time varchar(256))"

try:

self.cursor.execute(sql)

except:

pass

def closeDB(self):

# 关闭数据库

self.con.commit()

self.con.close()

def insertDB(self, Currency, TSP, CSP, TBP, CBP, Time):

# 记录插入数据库

try:

sql = "insert into rates (Currency,TSP,CSP,TBP,CBP,Time) values (?,?,?,?,?,?)"

self.cursor.execute(sql, [Currency, TSP, CSP, TBP, CBP, Time])

except Exception as err:

print(err)

def show(self):

# 显示函数

self.cursor.execute("select Currency,TSP,CSP,TBP,CBP,Time from rates")

rows = self.cursor.fetchall()

print("%-18s%-12s%-12s%-12s%-12s%-12s" % ("Currency", "TSP", "CSP", "TBP", "CBP", "Time"))

for row in rows:

print("%-18s%-12.2f%-12.2f%-12.2f%-12.2f%-12s" % (row[0], row[1], row[2], row[3], row[4], row[5]))

def match(self, t, s):

# 匹配函数

m = re.search(r"<" + t, s)

if m:

a = m.start()

m = re.search(r">", s[a:])

if m:

b = a + m.end()

return {"start": a, "end": b}

return None

def spider(self, url):

# 爬虫函数

try:

resp = urllib.request.urlopen(url)

data = resp.read()

html = data.decode()

m = re.search(r'<table class="pj_table">', html)

html = html[m.end():]

m = re.search(r'</table>', html)

# 取出<div id="realRateInfo">...</div>部分

html = html[:m.start()]

i = 0

while True:

p = self.match("tr", html)

q = self.match("/tr", html)

if p and q:

i = i + 1

a = p["end"]

b = q["start"]

tds = html[a:b]

row = []

count = 0

while True:

m = self.match("td", tds)

n = self.match("/td", tds)

if m and n:

u = m["end"]

v = n["start"]

count += 1

if count <= 6:

row.append(tds[u:v].strip())

tds = tds[n["end"]:]

else:

# 匹配不到<td>...</td>,退出内层循环

break

if i >= 2 and len(row) == 6:

Currency = row[0]

TSP = float(row[1])

CSP = float(row[2])

TBP = float(row[3])

CBP = float(row[4])

Time = row[6]

self.insertDB(Currency, TSP, CSP, TBP, CBP, Time)

html = html[q["end"]:]

else:

# 匹配不到<tr>...</tr>,退出外层循环

break

except Exception as err:

print(err)

def process(self):

# 爬取过程

self.openDB()

self.spider("https://www.psbc.com/cn/common/bjfw/whpjcx/")

self.show()

self.closeDB()

# 主程序

spider = MySpider()

spider.process()

评论 3
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值