# -*- coding:utf-8 -*-
import urllib
import requests
import re
import os
def getHtml(url):
html = requests.get(url).text
#requests.get(url) 拿到这个网址的信息 .text 则是把网页信息以文本形式输出
# urls = re.findall('"objURL":"(.*?jpg)|(.*?JPG)"',html, re.S)
# urls = re.findall('"objURL":"(.*?jpg|JPG)"',html, re.S)
# urls = re.findall('"objURL":"(.*?.jpg)"',html, re.S)
urls = list()
# urls = re.findall('"objURL":"(\S*?jpg)"',html, re.S)
# urls = re.findall('"objURL":"(\S*?jpg)"|"objURL":"(\S*?JPG)"',html, re.S)
urls = re.findall('"objURL":"(\S*?jpg|\S*?JPG)"',html, re.S) #能够识别jpg 和JPG
# urls = re.findall('"objURL":"(\S*?jpg)"',html, re.S)
# my_jpg = re.compile('"objURL":"(.*?jpg)"',re.I)
# urls = re.findall(my_jpg.match,html, re.S)
#匹配正则条件 / 匹配的目标文件 / 使用re.S参数以后,正则表达式会将这个字符串作为一个整体,在整体中进行匹配。
return urls
#urls的返回值就是一串串的字符串
a = getHtml('http://image.baidu.com/search/index?tn=baiduimage&ps=1&ct=201326592&lm=-1&cl=2&nc=1&ie=utf-8&word=动态壁纸')
print(a)
# pattern = re.compile(r'([a-z]+) ([a-z]+)', re.I) # re.I 表示忽略大小写
# m = pattern.match('Hello World Wide Web')
# print(m)
python练习代码12
最新推荐文章于 2020-11-14 02:52:36 发布