爬取中国基金网数据时中文部分出现乱码
原code如下:
url=r'http://data.chinafund.cn/'
urlString= urlopen(url)
soup= BeautifulSoup(urlString, 'html.parser')
nameList= soup.findAll('div',{'id':'content'}) #print(nameList)
for name in nameList:
nameString= name.getText(',') #get raw data
nameString= nameString.replace('--','0')
#'--' means NA on this website. replaced as '0' to easy the next steps like float()
nameString= nameString.splitlines() # split lines by '\r'(the default method)
#print(nameString)
data=[] #data empty list
for line in nameString:
lines= line.split(',') #split text by ','
data&#