一个爬虫作业我想爬取link 里面的网站其他不要 正则表达式规则写了
但是不知道如何把规则添加上去
#正则表达式规则
relink = re.compile(r'<td class="bz"><a href="(.*)">')
#需要正则表达式筛选的数据
link = item.find('td',class_="bz")
试过在link = item.find(‘td’,class_=“bz”)后面直接+.relink但是最终结果输出是无
完整代码
rom bs4 import BeautifulSoup
import requests
import sqlite3
import re
relink = re.compile(r'<td class="bz"><a href="(.*)">')
def main():
url = 'https://www.usd-cny.com/'
#seveDataDB
city(url)
def city(url):
headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.113 Safari/537.36 Edg/81.0.416.58' }
html = requests.get(url,headers=headers)
html.encoding = 'gb2312'
soup = BeautifulSoup(html.text,'html.parser')
tr = soup.find_all('tr')
time = soup.p.string
for item in tr[1:]:
link = item.find('td',class_="bz")
curr = item.find('a').string
fbuy = item.find_all('td')[1].string
cbuy = item.find_all('td')[2].string
fsell = item.find_all('td')[3].string
csell = item.find_all('td')[4].string
print(link,curr,fbuy,cbuy,fsell,csell,time)
if __name__ == '__main__':
main()