Python核心编程第三版第一章答案

包含了本人不断尝试更加严谨的过程:
1-1 re.match(’[bh][aiu]t’, ‘bat’).group()
1-2 re.match(’[A-Z][a-z]+ [A-Z][a-z]+’, ‘Xhen Fiagu’).group()
1-3 re.match(’[A-Z][a-z-]+, [A-Z]’, ‘Fia, X’).group()
1-4 re.match(’^[a-zA-Z_]\w+’, ‘sendMssage’).group()
1-5
>>> a = ‘\d+(\s\w+)+’
>>> b = ‘2254 ddfv Jdwk JNs JKNB’
>>> re.match(a, b)
1-6
>>> b = ‘http://www.yahoo.com/
>>> a = ‘http[s]??/www.(\w+.)+com/’
>>> re.match(a, b)
<re.Match object; span=(0, 21), match=‘http://www.yahoo.com/’>
>>> re.match(a, b).group()
选做:
>>> a = ‘http[s]??/www.(\w+.)+\w+/’
>>> b = ‘https://www.ewid.dewo.edu.cn/
>>> re.match(a, b).group()
https://www.ewid.dewo.edu.cn/
>>> b = ‘http://127.0.0.1:88/zhandian/index.html
>>> re.match(a, b).group()
http://127.0.0.1:88/zhandian/
>>> a = ‘\w+://(\w+|\d+[./])(\w+|\d+)(:\d+/)?(\w+|\d+[./])(/)?’
>>> re.match(a, b).group()
http://127.0.0.1:88/zhandian/
1-7 re.match(’[±]?\d+’, ‘-110’)
1-8 1-9 书后面
1-10
>>> b = 2+4j
>>> a = ‘(\d*[±])?(\d+)?j?’
>>> re.match(a, b)
Traceback (most recent call last):
File “<pyshell#12>”, line 1, in
re.match(a, b)
File “D:\python\lib\re.py”, line 173, in match
return _compile(pattern, flags).match(string)
TypeError: expected string or bytes-like object
>>> b = ‘2+4j’
>>> re.match(a, b)
<re.Match object; span=(0, 4), match=‘2+4j’>
>>> b = ‘-2j’
>>> re.match(a, b)
<re.Match object; span=(0, 3), match=’-2j’>
>>> b = ‘5’
>>> re.match(a, b)
<re.Match object; span=(0, 1), match=‘5’>
1-11
>>> b = ‘liying@ouc.edu.cn’
>>> a = ‘[\w\b]+@([\w\b]+.)+[\w\b]+’
>>> re.match(a, b)
<re.Match object; span=(0, 17), match=‘liying@ouc.edu.cn’>
1-12同1-6
1-13
>>> a = ‘’’
>>> re.split(a, ‘<class ‘builtin_function_or_method’>’)[1]
‘builtin_function_or_method’
1-14
>>> b = ‘11’
>>> a = ‘1[0-2]|0?[1-9]’
>>> re.match(a, b)
<re.Match object; span=(0, 2), match=‘11’>
>>> b = ‘10’
>>> re.match(a, b)
<re.Match object; span=(0, 2), match=‘10’>
>>> b = ‘1’
>>> re.match(a, b)
<re.Match object; span=(0, 1), match=‘1’>
>>> b = ‘05’
>>> re.match(a, b)
<re.Match object; span=(0, 2), match=‘05’>
1-15
>>> b = ‘6259-0637-5561-1733’
>>> a = ‘\d\d\d\d-\d\d\d\d-\d\d\d\d-\d\d\d\d|\d\d\d\d-\d\d\d\d\d\d-\d\d\d\d\d’
>>> re.match(a, b).group()
‘6259-0637-5561-1733’
>>> b = ‘6259-063722-55612-’
>>> re.match(a, b).group()
‘6259-063722-55612’
1-16~1-27
参见
https://blog.csdn.net/python_dream/article/details/78669906
1-28会了1-29自然就会了
1-29
>>> b = ‘(800)555-1212’
>>> a = ‘((\d\d\d-)?\d\d\d-\d\d\d\d)|(((\d{3}))?\d\d\d-\d\d\d\d)’
>>> re.match(a, b).group()
‘(800)555-1212’
>>> b = ‘800-555-1212’
>>> re.match(a, b).group()
‘800-555-1212’
>>> b = ‘555-1212’
>>> re.match(a, b).group()
‘555-1212’
1-30

import re
import webbrowser as web

with open('1-30.html', 'w') as f:
    f.write(
        '\
        <!DOCTYPE html>\
<html lang="en">\
<head>\
	<meta charset="UTF-8">\
	<title>Document</title>\
</head>\
<body>\
	<a href="http://www.ouc.edu.cn">Ocean University of China</a>\
	<a href="http://www.jandan.com/ooxx">Sister\'s picture</a>\
	<a href="http://www.w3school.com.cn">w3shcool</a>\
\
</body>'
        )
            
web.open('1-30.html')

1-31
不做,理由:不玩推特
1-32
在之前写电影排行榜爬虫的基础上做了些修改,质量不是很高但基本功能可以实现

import requests
import bs4
import re

def open_url(url):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36 Edge/17.17134'}
    res = requests.get(url, headers=headers)
    return res

def find_books(res):
    soup = bs4.BeautifulSoup(res.text, 'html.parser')

    books = []
    targets = soup.find_all("div", class_="a-section a-spacing-small")

    for each in targets:
        
        code = str(each.img)

        if code != None:
            code = re.search('alt="(.+)" height', code).group()
            code = code.split('"')
            print(code[1])
            books.append(code[1])
        
        #code = re.split(code[1], '\" height')
        #books.append(re.search(code, 'img alt="(.+)"'))
        

    ranks = []
    targets = soup.find_all("div", class_="a-row a-size-small")

    for each in targets:
        ranks.append(' 作者:%s ' % each.text)
    
        
    messages = []
    targets = soup.find_all("div", class_="hd")
    for each in targets:
        try:
            messages.append(each.p.text.split('\n')[1].strip() + \
                            each.p.text.split('\n')[2].strip())
        except:
            continue

    result = []
    length = len(books)

    for i in range(length):

        result.append(str(i+1) + '.' + books[i] + ranks[2*i] + '\n')

    return result

def find_depth(res):
    soup = bs4.BeautifulSoup(res.text, 'html.parser')
    depth = soup.find('li', class_='a-last').previous_sibling.text

    #return int(depth)
    return 1

def main():
    host = "https://www.amazon.cn/gp/bestsellers/books/"
    res = open_url(host)
    depth = find_depth(res)

    result = []
    for i in range(depth):
        url = host + '/?start=' + str(25 * i)
        res = open_url(url)
        result.extend(find_books(res))

    with open("图书排名.txt", 'w', encoding = "utf-8") as f:
        for each in result:
            f.write(each)

if __name__ == "__main__":
    main()
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值