python BS4 收集上港队球员赛季表现

from bs4 import BeautifulSoup
import requests
import pandas as pd

url='http://zq.win007.com/cn/team/PlayerData/7642.html'
url2='http://zq.win007.com/cn/team/PlayerDataAjax.aspx?SclassID=60&matchSeason=2018&teamID=7642'

header={'user-agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.XXXX.XXX Safari/537.36'}

html=requests.get(url2,headers=header).text
#print (html)
soup=BeautifulSoup(html,'lxml') #用BeautifulSoup来解析获取的子页面html代码


name=[]
nation=[]
age=[]
goal=[]
yellow=[]
red=[]



#获取球员名
for k in soup.find_all('div',align="left"):
    k=k.get_text()
    k=k.replace('\n','')
    k=k.replace(' ','')
    print (k)
    name.append(k)

print (soup.find_all('div',align="center"))
print (len(soup.find_all('div',align="center")))
print(soup.find_all('div',align="center")[9:])

for k in soup.find_all('div',align="center")[9:]:
    k=k.get_text()
    k=k.replace('\n','')
    k=k.replace(' ','')
    print (k)


la=[]
#获取球员属性
for k in soup.find_all('div',align="center")[9:]:

    k=k.get_text()
    k=k.replace('\n','')
    k=k.replace(' ','')
    print (k)
    la.append(k)


print (len(la))

print ('#############')
print([la[i] for i in range(0, len(la), 8)])

print([la[i] for i in range(0+1, len(la)+1, 8)])

nation=[la[i] for i in range(0+1, len(la)+1, 8)]


print([la[i] for i in range(0+2, len(la)+2, 8)])
goal=[la[i] for i in range(0+2, len(la)+2, 8)]

print([la[i] for i in range(0+4, len(la)+4, 8)])
red=[la[i] for i in range(0+5, len(la)+5, 8)]
print([la[i] for i in range(0+5, len(la)+5, 8)])
yellow=[la[i] for i in range(0+6, len(la)+6, 8)]
print([la[i] for i in range(0+6, len(la)+6, 8)])




print ('#############')


df= pd.DataFrame({'nation':nation,'goal':goal,'yellow':yellow,'red':red},index=name)
print (df)

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值