代码如下: import urllib.request import ssl ssl._create_default_https_context = ssl._create_unverified_context # 获取网页源码 url = 'https://quote.stockstar.com/global/chinastock.htm' request = urllib.request.Request(url=url) response = urllib.request.urlopen(request) content = response.read().decode('gb2312') # 获取源码中的数据 from lxml import etree tree = etree.HTML(content) # //td[@class="align_left"]/text() result = tree.xpath('//tbody[@id="datalist1"]/tr/td[@class="align_left"]/text()') print(result)
网页源码:
<tbody id="datalist1" class="tbody_center">
<tr>
<td style=" word-break: normal|break-all; width:168px;line-height: 16px" class="align_left ">旭明光电&