python BS4 收集上港队球员赛季表现

最新推荐文章于 2023-05-26 14:44:35 发布

伊玛目的门徒

最新推荐文章于 2023-05-26 14:44:35 发布

阅读量338

点赞数

分类专栏： python 爬虫文章标签： BeautifulSoup4

本文链接：https://blog.csdn.net/qq_37195257/article/details/86665367

版权

python 同时被 2 个专栏收录

85 篇文章 6 订阅

订阅专栏

爬虫

29 篇文章 0 订阅

订阅专栏

from bs4 import BeautifulSoup
import requests
import pandas as pd

url='http://zq.win007.com/cn/team/PlayerData/7642.html'
url2='http://zq.win007.com/cn/team/PlayerDataAjax.aspx?SclassID=60&matchSeason=2018&teamID=7642'

header={'user-agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.XXXX.XXX Safari/537.36'}

html=requests.get(url2,headers=header).text
#print (html)
soup=BeautifulSoup(html,'lxml') #用BeautifulSoup来解析获取的子页面html代码


name=[]
nation=[]
age=[]
goal=[]
yellow=[]
red=[]



#获取球员名
for k in soup.find_all('div',align="left"):
    k=k.get_text()
    k=k.replace('\n','')
    k=k.replace(' ','')
    print (k)
    name.append(k)

print (soup.find_all('div',align="center"))
print (len(soup.find_all('div',align="center")))
print(soup.find_all('div',align="center")[9:])

for k in soup.find_all('div',align="center")[9:]:
    k=k.get_text()
    k=k.replace('\n','')
    k=k.replace(' ','')
    print (k)


la=[]
#获取球员属性
for k in soup.find_all('div',align="center")[9:]:

    k=k.get_text()
    k=k.replace('\n','')
    k=k.replace(' ','')
    print (k)
    la.append(k)


print (len(la))

print ('#############')
print([la[i] for i in range(0, len(la), 8)])

print([la[i] for i in range(0+1, len(la)+1, 8)])

nation=[la[i] for i in range(0+1, len(la)+1, 8)]


print([la[i] for i in range(0+2, len(la)+2, 8)])
goal=[la[i] for i in range(0+2, len(la)+2, 8)]

print([la[i] for i in range(0+4, len(la)+4, 8)])
red=[la[i] for i in range(0+5, len(la)+5, 8)]
print([la[i] for i in range(0+5, len(la)+5, 8)])
yellow=[la[i] for i in range(0+6, len(la)+6, 8)]
print([la[i] for i in range(0+6, len(la)+6, 8)])




print ('#############')


df= pd.DataFrame({'nation':nation,'goal':goal,'yellow':yellow,'red':red},index=name)
print (df)