# coding:utf-8
import urllib2
import urllib
import cookielib
import sys
import json
import xlwt
# 璁剧疆 cookie
cj = cookielib.CookieJar()
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
headers = {"User-agent":"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1"}
base_url = "https://api.data.gov/ed/collegescorecard/v1/schools/?sort=2011.earnings.6_yrs_after_entry.percent_greater_than_25000%3Adesc&page="
rest_url = "&school.operating=1&2013.student.size__range=0..&2013.academics.program_available.assoc_or_bachelors=true&school.degrees_awarded.predominant__range=1..3&school.degrees_awarded.highest__range=2..4&fields=id%2Cschool.name%2Cschool.city%2Cschool.state%2C2013.student.size%2Cschool.ownership%2Cschool.degrees_awarded.predominant%2C2013.cost.avg_net_price.overall%2C2013.completion.rate_suppressed.overall%2C2011.earnings.10_yrs_after_entry.median%2C2011.earnings.6_yrs_after_entry.percent_greater_than_25000%2Cschool.under_investigation&api_key=Xxf2NKtwfcXUd8K2hqawnlur6c0YY93xsNFwq0Dy"
# url = baseurl + str(3) + resturl
def update(num1, num2):
wb = xlwt.Workbook(encoding='utf-8')
sheet1 = wb.add_sheet("Tables")
sheet1.write(0, 0, u"school.name")
sheet1.write(0, 1, u"2013.cost.avg_net_price.overall")
sheet1.write(0, 2, u"school.ownership")
sheet1.write(0, 3, u"2013.student.size")
sheet1.write(0, 4, u"2011.earnings.10_yrs_after_entry.median")
sheet1.write(0, 5, u"school.under_investigation")
sheet1.write(0, 6, u"school.degrees_awarded.predominant")
sheet1.write(0, 7, u"school.state")
sheet1.write(0, 8, u"2011.earnings.6_yrs_after_entry.percent_greater_than_25000")
sheet1.write(0, 9, u"2013.completion.rate_suppressed.overall")
sheet1.write(0, 10, u"school.city")
sheet1.write(0, 11, u"id")
i = 1
for url_i in range(num1, num2+1):
url = base_url + "%d"%url_i + rest_url
print str(url_i)+":downloading "+url
request = urllib2.Request( url=url,headers = headers)
f = opener.open(request)
html = f.read()
my_dict = json.loads(html)
# print my_dict['results']
# j = 0
print str(url_i)+":got response"
try:
for dict_i in my_dict['results']:
#print dict_i['school.name']
sheet1.write(i, 0, dict_i[u"school.name"])
sheet1.write(i, 1, dict_i[u"2013.cost.avg_net_price.overall"])
sheet1.write(i, 2, dict_i[u"school.ownership"])
sheet1.write(i, 3, dict_i[u"2013.student.size"])
sheet1.write(i, 4, dict_i[u"2011.earnings.10_yrs_after_entry.median"])
sheet1.write(i, 5, dict_i[u"school.under_investigation"])
sheet1.write(i, 6, dict_i[u"school.degrees_awarded.predominant"])
sheet1.write(i, 7, dict_i[u"school.state"])
sheet1.write(i, 8, dict_i[u"2011.earnings.6_yrs_after_entry.percent_greater_than_25000"])
sheet1.write(i, 9, dict_i[u"2013.completion.rate_suppressed.overall"])
sheet1.write(i, 10,dict_i[ u"school.city"])
sheet1.write(i, 11,dict_i[ u"id"])
i += 1
wb.save(u"school.xls")
except:
pass
wb.save(u"school"+str(num1)+"-"+str(num2)+".xls")
update(0,212)
mcm C 数据抓取
最新推荐文章于 2023-02-18 11:03:50 发布