Coursera Using python to access Web data

XML and json:

 

1. 略

 

2.

 

import re
A = open('regex_sum_17593.txt')
sum = 0
B = []
for line in A:
    B = re.findall('[0-9]+',line)
    if len(B) > 0:
        for i in range(len(B)):
            sum = sum + int(B[i])
print(sum)

 

 

 

 

 

 

3. 

 

import socket

mysock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
mysock.connect(('data.pr4e.org', 80))
cmd = 'GET http://data.pr4e.org/intro-short.txt HTTP/1.0\r\n\r\n'.encode()
mysock.send(cmd)

while True:
    data = mysock.recv(512)
    if (len(data) < 1):
        break
    print(data.decode())
mysock.close()

 

 
4.1
 
from urllib.request import urlopen from bs4 import BeautifulSoup import ssl # Ignore SSL certificate errors ctx = ssl.create_default_context() ctx.check_hostname = False ctx.verify_mode = ssl.CERT_NONE url = input('Enter - ') html = urlopen(url, context=ctx).read() # html.parser is the HTML parser included in the standard Python 3 library. # information on other HTML parsers is here: # http://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser soup = BeautifulSoup(html, "html.parser") # Retrieve all of the anchor tags tags = soup('span') Sum = 0 count = 0 for tag in tags: Sum = Sum + int(tag.contents[0]) count = count + 1 print('Count', count) print('Sum', Sum)
 
 

4.2

 
import urllib.request, urllib.parse, urllib.error from bs4 import BeautifulSoup import ssl # Ignore SSL certificate errors ctx = ssl.create_default_context() ctx.check_hostname = False ctx.verify_mode = ssl.CERT_NONE url = input('Enter - ') times = int(input('Enter count: ')) pos = int(input('Enter position: ')) html = urllib.request.urlopen(url, context=ctx).read() soup = BeautifulSoup(html, 'html.parser') print(url) # Retrieve all of the anchor tags tags = soup('a') for i in range(times): url = tags[pos-1].get('href', None) print(url) html = urllib.request.urlopen(url, context=ctx).read() soup = BeautifulSoup(html, 'html.parser') tags = soup('a') 

 

 

5.

 

 

 
import urllib.request, urllib.parse, urllib.error import xml.etree.ElementTree as ET num,count = 0,0 url = input('Enter location: ') print('Retrieving', url) uh = urllib.request.urlopen(url) data = uh.read().decode() print('Retrieved', len(data), 'characters') tree = ET.fromstring(data) results = tree.findall('comments/comment') for item in results: aa = item.find('count').text num = num + int(aa) count = count + 1 print('Count:',count) print('Sum:',num)
 
6.1
 
import urllib.request, urllib.parse, urllib.error import xml.etree.ElementTree as ET num,count = 0,0 url = input('Enter location: ') print('Retrieving', url) uh = urllib.request.urlopen(url) data = uh.read().decode() print('Retrieved', len(data), 'characters') tree = ET.fromstring(data) results = tree.findall('comments/comment') for item in results: aa = item.find('count').text num = num + int(aa) count = count + 1 print('Count:',count) print('Sum:',num)
 

6.2

 
import urllib.request, urllib.parse, urllib.error import json # Note that Google is increasingly requiring keys # for this API serviceurl = 'http://py4e-data.dr-chuck.net/geojson?' while True: address = input('Enter location: ') if len(address) < 1: break url = serviceurl + urllib.parse.urlencode( {'address': address}) print('Retrieving', url) uh = urllib.request.urlopen(url) data = uh.read().decode() print('Retrieved', len(data), 'characters') try: js = json.loads(data) except: js = None if not js or 'status' not in js or js['status'] != 'OK': print('==== Failure To Retrieve ====') print(data) continue aa = json.dumps(js, indent=4) #由一行调整缩进为多行易读模式 lat = js["results"][0]["place_id"] print('Place id', lat)
 
 

 

  • 2
    点赞
  • 11
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值