# -*- coding: utf-8 -*-
"""
Created on Mon Dec 17 11:41:40 2018
@author: badminton.Zhou
"""
import time
import random
import pandas as pd
from lxml import etree
import requests
import re
import json
yesterday = str(pd.datetime.today().date()-pd.Timedelta(days=1)).replace('-','')
x = 'data/b_%s.xlsx'%yesterday
df = pd.read_excel('16.xlsx')
au = 'audio_qq_id_%s.xlsx'%yesterday
au = 'data/audio_a_%s.csv'%yesterday
def get_id(search_name,max_retry=5):
url = '{}'.format(search_name)
try:
r = requests.get(url = url,headers = headers,verify=True)
r1 = requests.get(url = url_detail,headers=headers)
html = r1.text
e = etree.HTML(html)
print(data_id)
df1 = pd.DataFrame([song_name, singer_name, data_id]).T
df1.to_csv(o_csv, index=False, header=False, mode='a', sep='\t', encoding='utf-8')
print(singer_name)
time.sleep(random.random()+1.5)
except Exception as e:
if max_retry > 0:
time.sleep(random.random()+5)
print(type(e))
return get_id(search_name,max_retry=max_retry-1)
def search_name():
for song , singer in zip(df,df['歌']):
search_name = song + singer
print(search_name)
get_id(search_name=search_name)
def save_data():
df2 = pd.read_csv(audio_csv,sep='\t',engine='python',encoding='utf-8',names=['song_name','singer_name','data_id'])
with pd.ExcelWriter(audio_x) as f:
df2.to_excel(f,index=False,sheet_name='comments')
df3 = pd.concat([df,df2],axis=1)
df3.to_excel(au,index=False)
if __name__ == '__main__':
search_name()
save_data()