# -*- coding: utf-8 -*- from bs4 import BeautifulSoup def file2soup(ffile): with open(ffile,'r+b') as f: f.encoding='utf-8' soup=BeautifulSoup(f,'lxml') # print (soup) return soup pass # def soup2list(soup,ulist): # # print (soup.div) # for i in soup.find('tbody').children: # if isinstance(i,bs4.element.Tag): # o=i('td') # ulist.append([o[0].string,o[1].string,o[3].string]) # return ulist def soup2list(soup,ulist): # print (soup.div) for i in soup.find('tbody').children: if len(i)>1:#去空行 o=i('td')#取出td标签 "td"加引号 a,b,c=0,1,3 ulist.append([o[a].string,o[b].string,o[c].string]) return ulist url = 'http://bj.58.com/pinpaigongyu/pn/{ppp}/?minprice=2000_4000' ffile='d://best.txt' ulist=[] soup=file2soup(ffile) ulist=soup2list(soup,ulist) print (ulist) print ('{:0>3}\t {:+<15}\t {: >5}\t'.format('排名','校名','总分')) for i in range(11): u=ulist[i] #{:起头+一个填充符+对齐方式+字符长度} print ('{:0>3}\t {:+<15}\t {: >5}\t'.format(u[0], u[1], u[2] ))
网页取数据bs4
最新推荐文章于 2020-10-04 15:31:39 发布