背景
任务列表以文本的方式进行存储,需根据对应的行和列所在位置的数据进行读取,并作为待调用接口的入参。本文中的任务为4W行的数据,每行数据,按列依次为id1,id2,id3,id4。以这4个参数作为接口的入参,并对返回的数据进行解析和处理。
代码
方案1
# -*- coding: utf-8 -*-
import codecs
f1 = codecs.open('my.txt', 'r', 'utf-8')
lines1 = f1.readlines()
f1.close()
fo = open("myresult.txt", "wb")#存储结果
d1 = {}
singer1 = []
song1 =[]
hash1 = []
time1 = []
i = 0
for line1 in lines1:
l = line1.strip().split('\t')
length = len(l)
s = l[2].strip()
hash1.append(s)
singer1.append(l[1].strip())
song1.append(l[0].strip())
time1.append(l[3].strip())
d1[s] = {}
d1[s]['keyword'] = l[1].strip() + ' - ' + l[0].strip()
d1[s]['timeth'] = l[3].strip()
f2 = codecs.open('my2-new.txt', 'r', 'utf-8')
lines2 = f2.readlines()
f2.close()
d2 = {}
for line2 in lines2:
l = line2.strip().split('\t')
length = len(l)
s = l[0].strip()
d2[s] = l[1].strip()
import json
import urllib2
import urllib
i = 0
for h in range(len(hash1)):
i = i + 1
print(i)
keyword = singer1[h] + ' - ' + song1[h]
keyword1 = keyword
keyword = keyword.encode("utf-8")
keyword = urllib.quote(keyword)
timeleng = time1[h]
url = 'http://123.com/search?ver=1&hash=' + hash1[h] + '&key=' + keyword + '&time=' + str(timeleng)
cnx = urllib2.urlopen(url)
ret = cnx.read()
cnx.close()
ret = json.loads(ret)
leng1 = len(ret['candidates'])
krcid = 0
yuankrc = d2.get(hash1[h])
if leng1 < 1:#注意判断是否返回结果是空的?
krcid = 0
else:
krcid = ret['candidates'][0]['id']
final1 = keyword1 + '\t' + hash1[h] + '\t' + str(krcid) + '\t' + str(yuankrc) + '\r\n'
utfstr = final1.encode("utf-8")
fo.write(utfstr)
fo.close()
方案2
将读取的任务,以字典形式进行存储。
import codecs
f1 = codecs.open('my.txt', 'r', 'utf-8')
lines1 = f1.readlines()
f1.close()
fo = open("myresult.txt", "wb")#存储结果
d1 = {}
i = 0
#主要差别在这里,是以字典的形式进行存储的.s为关键字,而keyword和timelength作为一组value。
for line1 in lines1:
l = line1.strip().split('\t')
length = len(l)
s = l[2].strip()
d1[s] = {}
d1[s]['keyword'] = l[1].strip() + ' - ' + l[0].strip()
d1[s]['timeth'] = l[3].strip()
print(len(lines1))
f2 = codecs.open('my2-new.txt', 'r', 'utf-8')
lines2 = f2.readlines()
f2.close()
d2 = {}
for line2 in lines2:
l = line2.strip().split('\t')
length = len(l)
s = l[0].strip()
d2[s] = l[1].strip()
import json
import urllib2
import base64
import re
import urllib
i = 0
for h in d1:
#print(d1[h]['keyword'])
i = i + 1
keyword = d1[h]['keyword']
keyword1 = keyword
keyword = keyword.encode("utf-8")
keyword = urllib.quote(keyword)
timeleng = d1[h]['timeth']
url = 'http://lyrics.kugou.com/search?ver=1&hash=' + h + '&keyword=' + keyword + '&duration=' + str(timeleng)
cnx = urllib2.urlopen(url)
ret = cnx.read()
cnx.close()
ret = json.loads(ret)
leng1 = len(ret['candidates'])
krcid = 0
yuankrc = d2.get(hash1[h])
if leng1 < 1:
krcid = 0
else:
krcid = ret['candidates'][0]['id']
final1 = keyword1 + '\t' + hash1[h] + '\t' + str(krcid) + '\t' + str(yuankrc) + '\r\n'
#print(type(final1))
utfstr = final1.encode("utf-8")
# print(type(utfstr))
fo.write(utfstr)
fo.close()
注意
1、url的编码
2、写到磁盘的时候,注意编码方式,否则容易报错
3、注意判断接口返回结果是否为空,需要对接口进行容错处理