每天进步一点点,五年之后大不同 。
from bs4 import BeautifulSoup
html = """
<table class="tablelist" cellpadding="0" cellspacing="0">
<tbody>
<tr class="h">
<td class="l" width="374">职位名称</td>
<td>职位类别</td>
<td>人数</td>
<td>地点</td>
<td>发布时间</td>
</tr>
<tr class="even">
<td class="l square"><a target="_blank" href="position_detail.php?id=33824&keywords=python&tid=87&lid=2218">22989-金融云区块链高级研发工程师(深圳)</a></td>
<td>技术类</td>
<td>1</td>
<td>深圳</td>
<td>2017-11-25</td>
</tr>
<tr class="odd">
<td class="l square"><a target="_blank" href="position_detail.php?id=29938&keywords=python&tid=87&lid=2218">22989-金融云高级后台开发</a></td>
<td>技术类</td>
<td>2</td>
<td>深圳</td>
<td>2017-11-25</td>
</tr>
<tr class="even">
<td class="l square"><a target="_blank" href="position_detail.php?id=31236&keywords=python&tid=87&lid=2218">SNG16-腾讯音乐运营开发工程师(深圳)</a></td>
<td>技术类</td>
<td>2</td>
<td>深圳</td>
<td>2017-11-25</td>
</tr>
<tr class="odd">
<td class="l square"><a target="_blank" href="position_detail.php?id=31235&keywords=python&tid=87&lid=2218">SNG16-腾讯音乐业务运维工程师(深圳)</a></td>
<td>技术类</td>
<td>1</td>
<td>深圳</td>
<td>2017-11-25</td>
</tr>
<tr class="even">
<td class="l square"><a target="_blank" href="position_detail.php?id=34531&keywords=python&tid=87&lid=2218">TEG03-高级研发工程师(深圳)</a></td>
<td>技术类</td>
<td>1</td>
<td>深圳</td>
<td>2017-11-24</td>
</tr>
<tr class="odd">
<td class="l square"><a target="_blank" href="position_detail.php?id=34532&keywords=python&tid=87&lid=2218">TEG03-高级图像算法研发工程师(深圳)</a></td>
<td>技术类</td>
<td>1</td>
<td>深圳</td>
<td>2017-11-24</td>
</tr>
<tr class="even">
<td class="l square"><a target="_blank" href="position_detail.php?id=31648&keywords=python&tid=87&lid=2218">TEG11-高级AI开发工程师(深圳)</a></td>
<td>技术类</td>
<td>4</td>
<td>深圳</td>
<td>2017-11-24</td>
</tr>
<tr class="odd">
<td class="l square"><a target="_blank" href="position_detail.php?id=32218&keywords=python&tid=87&lid=2218">15851-后台开发工程师</a></td>
<td>技术类</td>
<td>1</td>
<td>深圳</td>
<td>2017-11-24</td>
</tr>
<tr class="even">
<td class="l square"><a target="_blank" href="position_detail.php?id=32217&keywords=python&tid=87&lid=2218">15851-后台开发工程师</a></td>
<td>技术类</td>
<td>1</td>
<td>深圳</td>
<td>2017-11-24</td>
</tr>
<tr class="odd">
<td class="l square"><a id="test" class="test" target='_blank' href="position_detail.php?id=34511&keywords=python&tid=87&lid=2218">SNG11-高级业务运维工程师(深圳)</a></td>
<td>技术类</td>
<td>1</td>
<td>深圳</td>
<td>2017-11-24</td>
</tr>
</tbody>
</table>
"""
bs = BeautifulSoup(html,'html5lib')
#获取所有的tr标签
# trs = bs.find_all('tr')
# for tr in trs:
# print(tr)
# 获取第三个tr标签
# tr = bs.find_all('tr')[2] #下标从 0开始
# print(tr)
# 获取所有class为even的tr标签
#attribute 属性
# trs = bs.find_all('tr',attrs={'class':'even'})
# for tr in trs:
# print(tr)
#获取所有id=test class 也等于test 的a标签
# aList = bs.find_all('a',id='test',class_='test')
# aList = bs.find_all('a',attrs={'class':'test',"id":"test"})
# for a in aList:
# print(a)
# 获取所有a标签的href属性
# aList = bs.find_all('a')
# for a in aList:
# # print(a['href'])
# print(a.attrs['href'])
#获取所有的职位信息
trs = bs.find_all('tr')[1:]
ress = [] #存放最终的结果
for tr in trs:
movie = {}
tds = tr.find_all('td')
title = tds[0].string
category = tds[1].string
nums = tds[2].string
base = tds[3].string
pubtime = tds[4].string
# movie = {
# 'title':title,
# 'category':category,
# 'nums':nums,
# 'base':base,
# 'pubtime':pubtime,
# }
# ress.append(movie)
infos = list(tr.stripped_strings)
movie['title']= infos[0]
movie['category']= infos[1]
movie['nums']= infos[2]
movie['base']= infos[3]
movie['pubtime']= infos[4]
ress.append(movie)
print(ress)
万水千山总是情,点个关注行不行。
你的一个小小举动,将是我分享更多干货的动力。
我的博客都是上下具有连贯性的,只看一篇可能不太懂,需要多篇结合在一起才能真正看懂,我是学习python的小颜,希望大家点个关注,一起沟通学习哦。