[Python笔记]Web相关-CSDN博客

本文链接：https://blog.csdn.net/qq_18892629/article/details/89388997

#! python3

import webbrowser,sys,pyperclip
if len(sys.argv)>1:
    address = ' '.join(sys.argv[1:])
else:
    address = pyperclip.paste()
webbrowser.open(
    'https://www.baidu.com/s?ie=utf-8&f=8&rsv_bp=1&ch=&tn=baiduerr&bar=&wd='
     + address)

根据书上的，就改了个URL
使用bat文件

@py.exe C:\Python37\getAddress.py %*

需要注意的是，与上一次使用bat时，使用的是pyw，这俩个后缀的区别是什么？

自己想写一个批量获取验证码的脚本，失败

下面是书上给的实例，批量下载网页的漫画

#! python3
# downloadXkcd.py - Downloads every single XKCD comic.

import requests, os, bs4

url = 'http://xkcd.com' # starting url
os.makedirs('xkcd', exist_ok=True) # store comics in ./xkcd
while not url.endswith('#'):
    # Download the page.
    print('Downloading page %s...' % url)
    res = requests.get(url)
    res.raise_for_status()

    soup = bs4.BeautifulSoup(res.text)

    # Find the URL of the comic image.
    comicElem = soup.select('#comic img')
    if comicElem == []:
        print('Could not find comic image.')
    else:
        comicUrl ='http:'+comicElem[0].get('src')
        # Download the image.
        print('Downloading image %s...' % (comicUrl))
        res = requests.get(comicUrl)
        res.raise_for_status()

        # Save the image to ./xkcd
        imageFile = open(os.path.join('xkcd', os.path.basename(comicUrl)), 'wb')
        for chunk in res.iter_content(100000):
            imageFile.write(chunk)
        imageFile.close()

    # Get the Prev button's url.
    prevLink = soup.select('a[rel="prev"]')[0]
    url = 'http://xkcd.com' + prevLink.get('href')

print('Done.')

这句是需要注意的地方，原句为comicUrl =comicElem[0].get(‘src’)

comicUrl ='http:'+comicElem[0].get('src')

这个试着改了改，下载了网页的验证码

#! python3
# downloadXkcd.py - Downloads every single XKCD comic.

import requests, os, bs4
import random
url = 'http://cas.sstir.cn/cas/login?service=http%3A%2F%2Fwww.sstir.cn%2FlogCheck.jsp%3FbackUrl%3Dhttp%3A%2F%2Fwww.sstir.cn%2F' # starting url
os.makedirs('xkcd', exist_ok=True) # store comics in ./xkcd
while not url.endswith('#'):
    # Download the page.
    print('Downloading page %s...' % url)
    res = requests.get(url)
    res.raise_for_status()

    soup = bs4.BeautifulSoup(res.text)

    # Find the URL of the comic image.
    comicElem = soup.select('#authcode img')
    if comicElem == []:
        print('Could not find comic image.')
    else:
        rr = random.random()
        rrr = round(rr,16)
        comicUrl = 'http://cas.sstir.cn/cas/authImage?'+ str(rrr)
        # Download the image.
        print('Downloading image %s...' % (comicUrl))
        res = requests.get(comicUrl)
        res.raise_for_status()

        # Save the image to ./xkcd
        imageFile = open(os.path.join('xkcd', os.path.basename(comicElem[0].get('src')+str(round(random.random(),3))) + '.bmp'), 'wb')
        for chunk in res.iter_content(100000):
            imageFile.write(chunk)
        imageFile.close()

print('Done.')