import time import requests import pymysql # 数据库链接 def conn_sql(): conn = pymysql.connect( user='root', password='root', host='localhost', database='my_project', port=3306 ) return conn # 判断数据库表是否存在 def table_exists(table_name): conn = conn_sql() cursor = conn.cursor() sql = 'show tables;' cursor.execute(sql) # 获取所有表 tables = cursor.fetchall() # print(tables) data_list = [tup[0] for tup in tables] if cursor: cursor.close() if conn: conn.close() if table_name in data_list: return 1 else: return 0 # 判断创建表 def create_table(table_name): conn = conn_sql() try: if table_exists(table_name) == 0: # 创建表 sql1 = f""" create table {table_name}( id int(7) primary key auto_increment, release_date varchar(10), title varchar(30), score int(3), vote_count int(10), types varchar(30), url varchar(100) ) """ cursor = conn.cursor() cursor.execute(sql1) print(f'{table_name}表格创建成功') else: print('表已存在') except Exception as e: print(e) finally: if conn: conn.close() # 添加数据记录 def add_mysql(dic, table_name): conn = conn_sql() try: cursor = conn.cursor() lis = [tuple(d.values()) for d in dic] # print(lis) sql = f'insert into {table_name}(release_date,title,score,vote_count,types,url) values(%s,%s,%s,%s,%s,%s)' cursor.executemany(sql, lis) conn.commit() print('数据写入成功') except Exception as e: print(e) conn.rollback() finally: if cursor: cursor.close() if conn: conn.close() # 获取豆瓣网页数据 def get_data(url, headers): move_list = [] resp = requests.get(url, headers=headers) time.sleep(1) json_data = resp.json() for j in json_data: title = j['title'] score = j['score'] vote_count = j['vote_count'] release_date = j['release_date'] types = j['types'][0] url = j['url'] move_dict = { 'release_date': release_date, 'title': title, 'score': score, 'vote_count': vote_count, 'types': types, 'url': url, } move_list.append(move_dict) # print(move_list) return move_list def main(): # 表的名称 table_name = 'douban' # 判断表 create_table(table_name) for i in range(1, 440, 20): url = f'https://movie.douban.com/j/chart/top_list?type=5&interval_id=100%3A90&action=&start={i}&limit=20' headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) ' 'Chrome/120.0.0.0', 'Referer': 'https://movie.douban.com/typerank?', 'X-Requested-With': 'XMLHttpRequest' } dic_data = get_data(url, headers=headers) add_mysql(dic_data, table_name) if __name__ == '__main__': main()
豆瓣-数据库保存数据
最新推荐文章于 2024-05-21 20:34:46 发布