记一次简单的python爬取3D,大乐透等历年的数据,代码中有不足之处感谢点出。
import linecache
import os
import re
from faker import Faker
from concurrent.futures import ThreadPoolExecutor, as_completed
from requests.adapters import HTTPAdapter
from tqdm import tqdm
import requests
import sqlite3
import urllib.parse
def creat_table(table_name):
conn = sqlite3.connect('douyin.db')
c = conn.cursor()
c.execute(f'''CREATE TABLE IF NOT EXISTS t_{table_name}
(ID INTEGER PRIMARY KEY AUTOINCREMENT,VID TEXT NOT NULL);''')
conn.commit()
conn.close()
def insert_data(table_name, vid):
conn = sqlite3.connect('douyin.db')
c = conn.cursor()
cursor = c.execute(f"SELECT vid from t_{table_name}")
already_have = False
for row in cursor:
if vid in row:
already_have = True
if already_have is False:
c.execute(f"INSERT INTO t_{table_name} (ID,VID) VALUES (null,{vid})")
conn.commit()
conn.close()
def selet_data(table_name):
conn = sqlite3.connect('douyin.db')
c = conn.cursor()
cursor = c.execute(f"SELECT VID from t_{table_name}")
vid_list = [out_exp[0] for out_exp in cursor]
return vid_list
class Douyin:
def __init__(self, url):
self.share_url = url
self.headers = {
'User-Agent': "Mozilla/5.0 (iPhone; U; CPU like Mac OS X; en) AppleWebKit/420+ (KHTML, like Gecko) Version/3.0 Mobile/1C28 Safari/419.3"
}
self.sec_uid = None
self.uid = None
self.nick_name = None
def get_user_info(self):
resp = requests.get(self.share_url, headers=self.headers)
self.sec_uid = 'sec_uid=' +