学习Tensorflow也有一段时间了,将这段时间的学习内容做一个简单的review,利用Tensorflow 实现对图像的一个 二分类。
1.样本的获得
# -*- coding: cp936 -*-
import re
import urllib2
import urllib
import time
def getImage(urls):
ix = 1
for url in urls:
print url
header = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36',
'referer':'https://image.baidu.com'
}
req = urllib2.Request(url, headers=header)
res = urllib2.urlopen(req).read()
path = './%s.jpg'%ix
f = open(path , "wb")
f.write(res)
f.close()
time.sleep(1.5)
ix += 1
def getHtml(urls):
header = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36',
'referer':'https://image.baidu.com'
}
request1 = urllib2.Request(url, headers=header)
urllib.urlretrieve(request1, '0001.jpg')
#html = urllib2.urlopen(request).read()
return html
def matchHtml(url):
header = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36',
'referer':'https://image.baidu.com'
}
request = urllib2.Request(url, headers=header)
html = urllib2.urlopen(request).read()
# urls = re.findall(r'src="(\S+)" width="570" height="356" changedsi', html)
#urls = re.findall(r'{"thumbURL":"(\S+)","replaceUrl":', html)
#urls = re.findall(r'shtml","murl":"(\S+)","', html)
thumbURL = r'{"thumbURL":"(\S+)","replaceUrl"'
middleURL = r'"middleURL":"(\S+)",'
objURL = r'n'
urls = re.findall(middleURL, html)
return urls
def downloadPic(urls):
ix = 1
for url in urls:
print url
getHtml(url)
#urllib.urlretrieve(url,'./%s.jpg'%ix)
ix+=1
# url = 'https://tieba.baidu.com/p/741081023?red_tag=3556747195'
# urls = matchHtml(url)
# downloadPic(urls)
# ellaurl = 'http://image.baidu.com/search/index?tn=baiduimage&ps=1&ct=201326592&lm=-1&cl=2&nc=1&ie=utf-8&word=ella'
# url = 'http://img1.imgtn.bdimg.com/it/u=116650530,2899061761&fm=26&gp=0.jpg'
# getImage(url)
ellaurl = 'http://image.baidu.com/search/index?tn=baiduimage&ps=1&ct=201326592&lm=-1&cl=2&nc=1&ie=utf-8&word=ella'
urls = matchHtml(ellaurl)
getImage(urls)
2.图相的处理
识别出图像中的人脸, 并保存下来
3.利用Tensorflow实现图像分类