import urllib2
import pandas as pd
import os
导入pandas库读取csv文件
data = pd.read_csv('train_attrs(2).csv')
data.head()
id | img_url | xml_url | |
---|---|---|---|
0 | 2 | http://img1.ytgrading.com/sample/attr/2017/12/… | http://img1.ytgrading.com/sample/attr/2017/12/… |
1 | 3 | http://img1.ytgrading.com/sample/attr/2017/12/… | http://img1.ytgrading.com/sample/attr/2017/12/… |
2 | 4 | http://img1.ytgrading.com/sample/attr/2017/12/… | http://img1.ytgrading.com/sample/attr/2017/12/… |
3 | 5 | http://img1.ytgrading.com/sample/attr/2017/12/… | http://img1.ytgrading.com/sample/attr/2017/12/… |
4 | 6 | http://img1.ytgrading.com/sample/attr/2017/12/… | http://img1.ytgrading.com/sample/attr/2017/12/… |
data.iloc[1]
id 3
img_url http://img1.ytgrading.com/sample/attr/2017/12/...
xml_url http://img1.ytgrading.com/sample/attr/2017/12/...
Name: 1, dtype: object
导入urllib2.urlopen下载jpg文件以及xml文件
for i in range(data.shape[0]):
id1, pic, xml = data.iloc[i]
f = urllib2.urlopen(pic)
da = f.read()
with open('./pic/{0}.jpg'.format(id1), 'wb') as code:
code.write(da)
f = urllib2.urlopen(xml)
da = f.read()
with open('./xml/{0}.xml'.format(id1), 'wb') as code:
code.write(da)
if i % 100 == 0:
print i
0
100
200
300