from urllib.request import urlopen
url = 'http://py4e-data.dr-chuck.net/comments_56193.html'# 读取全部回复
html = urlopen(url).read()
print(html)
解析Tag数据
html = urllib.request.urlopen(url).read()
# html是服务器返回的全部内容
soup = BeautifulSoup(html, 'html.parser')
# 选取所有<a></a>
tags = soup('a')
for tag in tags:
# 读取<a href="">的内容
print(tag.get('href', None))
import json
# Note that Google is increasingly requiring keys# for this API
serviceurl = 'http://py4e-data.dr-chuck.net/geojson?'whileTrue:
address = 'Universidade do Minho'if len(address) < 1: break
url = serviceurl + urllib.parse.urlencode(
{'address': address})
print('Retrieving', url)
uh = urllib.request.urlopen(url)
data = uh.read().decode()
import json
from urllib.request import urlopen
hfile = urlopen('http://py4e-data.dr-chuck.net/comments_56196.json')
data = hfile.read()
data = json.loads(data)
sum = 0for item in data['comments']:
sum += int(item['count'])
print(sum)