豆瓣-数据库保存数据

import time
import requests
import pymysql

# 数据库链接
def conn_sql():
    conn = pymysql.connect(
        user='root',
        password='root',
        host='localhost',
        database='my_project',
        port=3306
    )
    return conn


# 判断数据库表是否存在
def table_exists(table_name):
    conn = conn_sql()
    cursor = conn.cursor()
    sql = 'show tables;'
    cursor.execute(sql)
    # 获取所有表
    tables = cursor.fetchall()
    # print(tables)
    data_list = [tup[0] for tup in tables]
    if cursor:
        cursor.close()
    if conn:
        conn.close()
    if table_name in data_list:
        return 1
    else:
        return 0


# 判断创建表
def create_table(table_name):
    conn = conn_sql()
    try:
        if table_exists(table_name) == 0:
            # 创建表
            sql1 = f"""
                    create table {table_name}(
                    id int(7) primary key auto_increment,
                    release_date varchar(10),
                    title varchar(30),
                    score int(3),
                    vote_count int(10),
                    types varchar(30),
                    url varchar(100)
                    )
            """
            cursor = conn.cursor()
            cursor.execute(sql1)
            print(f'{table_name}表格创建成功')
        else:
            print('表已存在')
    except Exception as e:
        print(e)
    finally:
        if conn:
            conn.close()


# 添加数据记录
def add_mysql(dic, table_name):
    conn = conn_sql()
    try:
        cursor = conn.cursor()
        lis = [tuple(d.values()) for d in dic]
        # print(lis)
        sql = f'insert into {table_name}(release_date,title,score,vote_count,types,url) values(%s,%s,%s,%s,%s,%s)'
        cursor.executemany(sql, lis)
        conn.commit()
        print('数据写入成功')
    except Exception as e:
        print(e)
        conn.rollback()
    finally:
        if cursor:
            cursor.close()
        if conn:
            conn.close()


# 获取豆瓣网页数据
def get_data(url, headers):
    move_list = []
    resp = requests.get(url, headers=headers)
    time.sleep(1)
    json_data = resp.json()
    for j in json_data:
        title = j['title']
        score = j['score']
        vote_count = j['vote_count']
        release_date = j['release_date']
        types = j['types'][0]
        url = j['url']
        move_dict = {
            'release_date': release_date,
            'title': title,
            'score': score,
            'vote_count': vote_count,
            'types': types,
            'url': url,
        }
        move_list.append(move_dict)
    # print(move_list)
    return move_list


def main():
    # 表的名称
    table_name = 'douban'
    # 判断表
    create_table(table_name)
    for i in range(1, 440, 20):
        url = f'https://movie.douban.com/j/chart/top_list?type=5&interval_id=100%3A90&action=&start={i}&limit=20'
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
                          'Chrome/120.0.0.0',
            'Referer': 'https://movie.douban.com/typerank?',
            'X-Requested-With': 'XMLHttpRequest'
        }
        dic_data = get_data(url, headers=headers)
        add_mysql(dic_data, table_name)


if __name__ == '__main__':
    main()
  • 6
    点赞
  • 7
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 2
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

py_way

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值