import re
import sqlite3
import time
import pandas as pd
import requests
from time import sleep
import json
from datetime import datetime
requests.packages.urllib3.disable_warnings()
def get_start(conn, table_name):
start = -1
try:
df2 = pd.read_sql_query(f'''select * from "{table_name}" where "已确认"='1' ''', con=conn)
ind=[int(i) for i in list(df2['索引'])]
start = max(ind)
except:
pass
return start
def test_ready():
try_times = 0
for try_times in range(9999):
tem_url = base_url + "&k=人间烟火"
try:
response = requests.request("GET", tem_url, headers=headers, data={}, timeout=15)
except:
sleep(2)
print("获取失败,正在重试...")
song_json = response.json()
if 'singer' in str(song_json):
break
else:
print(f'{datetime.now().strftime("%Y-%m-%d %H:%M:%S")}')
sleep(30)
return try_times
def update_ready(table_name, i):
index = ','.join([f'{i}' for i in range(i - batch, i)])
query = f'''update "{table_name}" set "已确认"='1' where "索引" in ({index} )'''
cursor=conn.cursor()
cursor.execute(query)
cursor.close()
conn.commit()
def delete_ready(table_name):
try:
query = f'''delete from "{table_name}" where "已确认"='0' '''
cursor = conn.cursor()
cursor.execute(query)
cursor.close()
conn.commit()
except Exception as e:
print(e)
conn = sqlite3.connect("source.db")
headers = {
'Referer': '',
'User-Agent': '',
'Connection': 'keep-alive',
'Content-Type': 'application/json;charset=UTF-8',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Cache-Control': 'max-age=60',
'Accept': '*/*',
'Host': '',
}
base_url = ""
batch = 100
def main():
test_ready()
df = pd.read_sql_query("select * from source", con=conn)
table_name = '歌单获取2'
delete_ready(table_name)
start = get_start(conn, table_name)
df = df.astype(str)
for i, row in df.iterrows():
if i <= start:
continue
if i != 0 and i % batch == 0:
try_times = test_ready()
if try_times:
main()
else:
update_ready(table_name, i)
song = row['songname'] + row['choric_singer']
status = 0
url = base_url + f"&k={song}"
while not status:
try:
response = requests.request("GET", url, headers=headers, data={}, timeout=15)
status = response.status_code
except:
sleep(2)
print("获取失败,正在重试...")
if status != 200:
song = re.sub(r'[()()\\"\'\s]', '', song)
url = base_url + f"&k={song}"
response = requests.request("GET", url, headers=headers, data={}, timeout=15)
status = response.status_code
if status != 200:
continue
song_json = response.json()
dic = {"索引": [i], "搜索词": [song], "结果": [str(song_json)], "已确认": ['0']}
print(i, song, str(song_json))
song_df = pd.DataFrame(dic)
song_df = song_df.astype(str)
if i == 0:
song_df.to_sql(name="歌单获取2", con=conn, if_exists="replace", index=False)
else:
song_df.to_sql(name="歌单获取2", con=conn, if_exists="append", index=False)
# sleep(1)
try_times = test_ready()
if try_times:
main()
else:
update_ready(table_name, i)
exit()
if __name__ == '__main__':
main()
【spider requests】
最新推荐文章于 2024-10-17 10:02:06 发布