Coursera Using python to access Web data

最新推荐文章于 2022-02-18 11:27:01 发布

黑山白雪m

最新推荐文章于 2022-02-18 11:27:01 发布

阅读量2.2k

点赞数 2

分类专栏： Python 文章标签： Coursera Python

本文链接：https://blog.csdn.net/mmm305658979/article/details/78154629

版权

Python 专栏收录该内容

22 篇文章 1 订阅

订阅专栏

XML and json:

1. 略

import re
A = open('regex_sum_17593.txt')
sum = 0
B = []
for line in A:
    B = re.findall('[0-9]+',line)
    if len(B) > 0:
        for i in range(len(B)):
            sum = sum + int(B[i])
print(sum)

import socket

mysock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
mysock.connect(('data.pr4e.org', 80))
cmd = 'GET http://data.pr4e.org/intro-short.txt HTTP/1.0\r\n\r\n'.encode()
mysock.send(cmd)

while True:
    data = mysock.recv(512)
    if (len(data) < 1):
        break
    print(data.decode())
mysock.close()

4.1

from urllib.request import urlopen from bs4 import BeautifulSoup import ssl # Ignore SSL certificate errors ctx = ssl.create_default_context() ctx.check_hostname = False ctx.verify_mode = ssl.CERT_NONE url = input('Enter - ') html = urlopen(url, context=ctx).read() # html.parser is the HTML parser included in the standard Python 3 library. # information on other HTML parsers is here: # http://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser soup = BeautifulSoup(html, "html.parser") # Retrieve all of the anchor tags tags = soup('span') Sum = 0 count = 0 for tag in tags: Sum = Sum + int(tag.contents[0]) count = count + 1 print('Count', count) print('Sum', Sum)

4.2

import urllib.request, urllib.parse, urllib.error from bs4 import BeautifulSoup import ssl # Ignore SSL certificate errors ctx = ssl.create_default_context() ctx.check_hostname = False ctx.verify_mode = ssl.CERT_NONE url = input('Enter - ') times = int(input('Enter count: ')) pos = int(input('Enter position: ')) html = urllib.request.urlopen(url, context=ctx).read() soup = BeautifulSoup(html, 'html.parser') print(url) # Retrieve all of the anchor tags tags = soup('a') for i in range(times): url = tags[pos-1].get('href', None) print(url) html = urllib.request.urlopen(url, context=ctx).read() soup = BeautifulSoup(html, 'html.parser') tags = soup('a')

5.

import urllib.request, urllib.parse, urllib.error import xml.etree.ElementTree as ET num,count = 0,0 url = input('Enter location: ') print('Retrieving', url) uh = urllib.request.urlopen(url) data = uh.read().decode() print('Retrieved', len(data), 'characters') tree = ET.fromstring(data) results = tree.findall('comments/comment') for item in results: aa = item.find('count').text num = num + int(aa) count = count + 1 print('Count:',count) print('Sum:',num)

6.1

import urllib.request, urllib.parse, urllib.error import xml.etree.ElementTree as ET num,count = 0,0 url = input('Enter location: ') print('Retrieving', url) uh = urllib.request.urlopen(url) data = uh.read().decode() print('Retrieved', len(data), 'characters') tree = ET.fromstring(data) results = tree.findall('comments/comment') for item in results: aa = item.find('count').text num = num + int(aa) count = count + 1 print('Count:',count) print('Sum:',num)

6.2

import urllib.request, urllib.parse, urllib.error import json # Note that Google is increasingly requiring keys # for this API serviceurl = 'http://py4e-data.dr-chuck.net/geojson?' while True: address = input('Enter location: ') if len(address) < 1: break url = serviceurl + urllib.parse.urlencode( {'address': address}) print('Retrieving', url) uh = urllib.request.urlopen(url) data = uh.read().decode() print('Retrieved', len(data), 'characters') try: js = json.loads(data) except: js = None if not js or 'status' not in js or js['status'] != 'OK': print('==== Failure To Retrieve ====') print(data) continue aa = json.dumps(js, indent=4) #由一行调整缩进为多行易读模式 lat = js["results"][0]["place_id"] print('Place id', lat)

黑山白雪m

关注

2
点赞
踩
11

收藏

觉得还不错? 一键收藏
0
评论
Coursera Using python to access Web data

XML and json:1. 略2.import reA = open('regex_sum_17593.txt')sum = 0B = []for line in A: B = re.findall('[0-9]+',line) if len(B) > 0: for i in range(len(B)): ...
复制链接

扫一扫