from bs4 import BeautifulSoup
import requests
import pandas as pd
url='http://zq.win007.com/cn/team/PlayerData/7642.html'
url2='http://zq.win007.com/cn/team/PlayerDataAjax.aspx?SclassID=60&matchSeason=2018&teamID=7642'
header={'user-agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.XXXX.XXX Safari/537.36'}
html=requests.get(url2,headers=header).text
#print (html)
soup=BeautifulSoup(html,'lxml') #用BeautifulSoup来解析获取的子页面html代码
name=[]
nation=[]
age=[]
goal=[]
yellow=[]
red=[]
#获取球员名
for k in soup.find_all('div',align="left"):
k=k.get_text()
k=k.replace('\n','')
k=k.replace(' ','')
print (k)
name.append(k)
print (soup.find_all('div',align="center"))
print (len(soup.find_all('div',align="center")))
print(soup.find_all('div',align="center")[9:])
for k in soup.find_all('div',align="center")[9:]:
k=k.get_text()
k=k.replace('\n','')
k=k.replace(' ','')
print (k)
la=[]
#获取球员属性
for k in soup.find_all('div',align="center")[9:]:
k=k.get_text()
k=k.replace('\n','')
k=k.replace(' ','')
print (k)
la.append(k)
print (len(la))
print ('#############')
print([la[i] for i in range(0, len(la), 8)])
print([la[i] for i in range(0+1, len(la)+1, 8)])
nation=[la[i] for i in range(0+1, len(la)+1, 8)]
print([la[i] for i in range(0+2, len(la)+2, 8)])
goal=[la[i] for i in range(0+2, len(la)+2, 8)]
print([la[i] for i in range(0+4, len(la)+4, 8)])
red=[la[i] for i in range(0+5, len(la)+5, 8)]
print([la[i] for i in range(0+5, len(la)+5, 8)])
yellow=[la[i] for i in range(0+6, len(la)+6, 8)]
print([la[i] for i in range(0+6, len(la)+6, 8)])
print ('#############')
df= pd.DataFrame({'nation':nation,'goal':goal,'yellow':yellow,'red':red},index=name)
print (df)