python爬取基金_Python爬虫爬取基金数据

1 程序结构介绍

代码结构图:

代码结构图

代码目录结构:

代码目录结构

结果图:

结果图

2 数据源网站

3 GitHub地址

4 文件代码:

main.py

from get_fund_code import *

from get_fund_data import *

from MysqlDB import MysqlFundCode

import time

def main():

s_time = time.time()

print("程序正在运行....")

mysql_code = MysqlFundCode()

fund_code_lists = mysql_code.get_code_and_name_and_type("指数型")

save_to_mysql(fund_code_lists=fund_code_lists)

e_time = time.time()

print("一共运行了{}秒".format(e_time-s_time))

if __name__ == '__main__':

main()

get_fund_data.py

import logging

import sys

from bs4 import BeautifulSoup as bs

from MysqlDB import MysqlFundDetailData

from toolkit import LOG_FORMAT,DATE_FORMAT,get_year_mon_day,get_class_name,get_HTML_content

def get_refer_fund_detail_data(fund_code,start_date="2019-09-28",end_date="2019-12-28"):

'''

:param fund_code:

:param start_date:

:param end_date:

:return:

date,

fund_name,

latest_nvalue_pu,

latest_sum_nvalue,

last_nvalue_pu,

last_sum_nvalue,

daily_growth,

daily_growth_rate

'''

try:

url = "https://www.dayfund.cn/fundvalue/{}.html?sdate={}&edate={}".format(fund_code, start_date, end_date)

resp = get_HTML_content(url)

soup = bs(resp,"lxml")

trs = soup.find_all("table",attrs={"class":"mt1 clear"})[0]

# fund_name = soup.find("h1",attrs={"class":"myfundTitle"}).string

# t = re.sub(r"\(","_",fund_name)

# fund_name = re.sub(r"\)","",t)

_soup = bs(str(trs),"lxml")

lis = _soup.find_all("tr")

fund_lists = []

count = 0

for i in lis:

fund_dict = {}

if count == 1:

count += 1

pass

t = list(i)

if len(t) >= 17:

date = t[1].string

latest_nvalue_pu = t[7].string

latest_sum_nvalue = t[9].string

last_nvalue_pu = t[11].string

last_sum_nvalue = t[13].string

daily_growth = t[15].string

daily_growth_rate = t[17].string

fund_dict["date"] = date

fund_dict["latest_nvalue_pu"] = latest_nvalue_pu

fund_dict["latest_sum_nvalue"] = latest_sum_nvalue

fund_dict["last_nvalue_pu"] = last_nvalue_pu

fund_dict["last_sum_nvalue"] = last_sum_nvalue

fund_dict["daily_growth"] = daily_growth

fund_dict["daily_growth_rate"] = daily_growth_rate

fund_lists.append(fund_dict)

# logging.info("{} | {} appended into fund_lists".format(date,latest_nvalue_pu))

return fund_lists[1:]

except Exception as e:

logging.error("{} | {}".format(e,sys._getframe().f_code.co_name))

def save_to_mysql(start_time=None,end_time=None, fund_code_lists=[]):

if start_time == None:

start_time = get_year_mon_day(y=1)

if end_time == None:

end_time = get_year_mon_day()

if len(fund_code_lists) != 0:

mysql = MysqlFundDetailData()

for info in fund_code_lists:

fund_code = info[0]

table_name = info[-1]

fund_lists = get_refer_fund_detail_data(fund_code,start_time,end_time)

if not mysql.check_table_if_exist(table_name):

logging.info("{}表没有创建,正在创建... | {}".format(table_name, sys._getframe().f_code.co_name))

mysql.create_table(table_name)

logging.info("创建成功!正在将数据写入{}中... | {}".format(table_name,sys._getframe().f_code.co_name))

for i in fund_lists:

mysql.insert_into_table(table_name,i)

logging.info("{}。写入成功。 | {}".format(table_name,sys._getframe().f_code.co_name))

else:

logging.info("表已存在,正在将数据写入{}中... | {}".format(table_name, sys._getframe().f_code.co_name))

for i in fund_lists:

mysql.insert_into_table(table_name, i)

logging.info("{}。写入成功。 | {}".format(table_name, sys._getframe().f_code.co_name))

# for i in fund_lists:

# mysql.insert_into_table(table_name,i)

# print("{},done".format(table_name))

else:

logging.info("列表为空,没有爬取到数据。| {}".format(sys._getframe().f_code.co_name))

def get_name_data():

s = '''

fund_type = i[2]

date = i[3]

nvalue_pu = i[4]

day_growth_rate = i[5]

a_week_rate = i[6]

a_month_rate = i[7]

_3_month_rate = i[8]

_6_month_rate = i[9]

a_year_rate = i[10]

_2_year_rate = i[11]

_3_year_rate = i[12]

from_this_year = i[13]

from_found_year = i[14]

poundage = i[-2]

purchase_money = i[-5]

'''

l = s.split("\n")

for i in l:

li = i.strip().split("=")

if len(li) == 2:

name = li[0].strip()

data = li[-1].strip()

print("temp_dict[\"{}\"] = {}".format(name, data))

get_fund_code.py

import demjson,re

import logging,sys

import os,time

from MysqlDB import MysqlFundCode

from toolkit import LOG_FORMAT,DATE_FORMAT,get_year_mon_day,get_class_name,get_HTML_content

def get_fund_code_lists_by_page(page):

try:

'''https://fundapi.eastmoney.com/fundtradenew.aspx

?ft=zs&sc=1n&st=desc&pi=1&pn=100&cp=&ct=&cd=&ms=&fr=&plevel=&fst=&ftype=&fr1=&fl=0&isab=1

https://fundapi.eastmoney.com/fundtradenew.aspx?ft=zs&sc=1n&st=desc&pi=3&pn=100&cp=&ct=&cd=&ms=&fr=&plevel=&fst=&ftype=&fr1=&fl=0&isab=1'''

url = "https://fundapi.eastmoney.com/fundtradenew.aspx?ft=zs&sc=1n&st=desc&pi={}&pn=100&cp=&ct=&cd=&ms=&fr=&plevel=&fst=&ftype=&fr1=&fl=0&isab=1".format(page)

content = get_HTML_content(url)

_ = re.sub("\|"," ",content[15:-1])

d = demjson.decode(_)

fund_info_lists = []

for i in d["datas"]:

i = i.split(" ")

temp_dict = {}

temp_dict["fund_code"] = i[0]

temp_dict["fund_name"] = i[1]

temp_dict["fund_type"] = i[2]

temp_dict["date"] = i[3]

temp_dict["nvalue_pu"] = i[4]

temp_dict["day_growth_rate"] = i[5]

temp_dict["a_week_rate"] = i[6]

temp_dict["a_month_rate"] = i[7]

temp_dict["_3_month_rate"] = i[8]

temp_dict["_6_month_rate"] = i[9]

temp_dict["a_year_rate"] = i[10]

temp_dict["_2_year_rate"] = i[11]

temp_dict["_3_year_rate"] = i[12]

temp_dict["from_this_year"] = i[13]

temp_dict["from_found_year"] = i[14]

temp_dict["poundage"] = i[-2]

temp_dict["purchase_money"] = i[-5]

fund_info_lists.append(temp_dict)

logging.info("{} | {}".format("第 {} 页数据抓取完成。".format(page),sys._getframe().f_code.co_name))

return fund_info_lists

except Exception as e:

logging.error("{} | {}".format(e, sys._getframe().f_code.co_name))

def get_total_page_num():

try:

url = "https://fundapi.eastmoney.com/fundtradenew.aspx?ft=zs&sc=1n&st=desc&pi=1&pn=100&cp=&ct=&cd=&ms=&fr=&plevel=&fst=&ftype=&fr1=&fl=0&isab=1"

content = get_HTML_content(url)

_ = re.sub("\|", " ", content[15:-1])

d = demjson.decode(_)

total_page = (int(d["allPages"]))

logging.info("{} pages | {}".format(total_page, sys._getframe().f_code.co_name))

return total_page

except Exception as e:

logging.error("{} | {}".format(e, sys._getframe().f_code.co_name))

def get_all_fund_lists():

all_fund_lists = []

for page in range(1,get_total_page_num() + 1):

all_fund_lists.append(get_fund_code_lists_by_page(page))

return all_fund_lists

def write_all_fund_lists_into_file(filename="all_fund_lists.txt"):

if os.path.exists(filename):

with open(filename,"a+",encoding="utf-8") as file:

logging.info("{} 文件存在,正在追加... | {}".format(filename, sys._getframe().f_code.co_name))

file.write("\n\n")

file.write("-"*20 + "这是新加的数据,时间:{}".format(time.ctime()) + "\n\n")

for fund_list in get_all_fund_lists():

file.write(str(fund_list))

file.write("\n")

file.write("\n" + "-"*20)

else:

with open(filename,"w",encoding="utf-8") as file:

logging.info("{} 文件不存在,正在创建并写数据... | {}".format(filename, sys._getframe().f_code.co_name))

for fund_list in get_all_fund_lists():

file.write(str(fund_list))

file.write("\n")

def get_name_data():

s = '''fund_dict["date"] = date

fund_dict["latest_nvalue_pu"] = latest_nvalue_pu

fund_dict["latest_sum_nvalue"] = latest_sum_nvalue

fund_dict["last_nvalue_pu"] = last_nvalue_pu

fund_dict["last_sum_nvalue"] = last_sum_nvalue

fund_dict["daily_growth"] = daily_growth

fund_dict["daily_growth_rate"] = daily_growth_rate'''

li = s.split("\n")

tt = ""

data = ""

values = ""

import re

for i in li:

t = i.split("=")[0].strip()

t = re.sub("fund_dict\[\"", "", t)

t = re.sub("\"\]", "", t)

tt += t + ","

d = "{}=each_data[\"{}\"],".format(t, t)

data += d

v = r"\'{" + t + r"}\'"

values += v + ","

sql = r"insert into {table_name} " + "({}) values({})".format(tt,values)

print(sql)

print(values)

print(data)

print(tt)

def get_sql():

s = "date,fund_name,latest_nvalue_pu,latest_sum_nvalue,last_nvalue_pu,last_sum_nvalue,daily_growth,daily_growth_rate"

li = s.split(",")

s = ""

for i in li:

'''`fund_code` VARCHAR(50) NULL,'''

t = "`" + i + "`" + "VARCHAR(50) NULL," + "\n"

s += t

print(s)

def save_to_mysql():

mysql = MysqlFundCode()

table_name = get_fund_code_lists_by_page(1)[1]["fund_type"]

all_fund_lists = get_all_fund_lists()

if not mysql.check_table_if_exist(table_name=table_name):

logging.info("{}表没有创建,正在创建... | {}".format(table_name, sys._getframe().f_code.co_name))

mysql.create_table(table_name)

logging.info("创建成功!正在将数据写入{}中... | {}".format(table_name,sys._getframe().f_code.co_name))

for fund_list in all_fund_lists:

for each_data in fund_list:

# print("now --> {}".format(each_data))

mysql.insert_into_table(table_name,each_data)

# logging.info("{} | {}".format(each_data, sys._getframe().f_code.co_name))

logging.info("{}。写入成功。 | {}".format(table_name, sys._getframe().f_code.co_name))

else:

logging.info("表已存在,正在将数据写入{}中... | {}".format(table_name, sys._getframe().f_code.co_name))

for fund_list in all_fund_lists:

for each_data in fund_list:

# print("now --> {}".format(each_data))

mysql.insert_into_table(table_name,each_data)

logging.info("{}。写入成功。 | {}".format(table_name, sys._getframe().f_code.co_name))

MysqlDB.py

import pymysql,logging,sys

from toolkit import LOG_FORMAT,DATE_FORMAT,get_year_mon_day,get_class_name

class MysqlFundCode():

def __init__(self):

self.host = "127.0.0.1"

self.user = "root"

self.password = "root"

self.database = "fund_data"

self.charset = "utf8mb4"

self.port = 3306

self.count = 0

def DB(self):

return pymysql.connect(self.host,self.user, self.password, self.database, self.port, charset=self.charset)

def insert_into_table(self, table_name,each_data):

mysqlDB = self.DB()

# logging.info("{} | {} | {}".format(mysqlDB, get_class_name(self), sys._getframe().f_code.co_name))

# if not isinstance(each_data,list):

# each_data = eval(each_data)

sql = '''

insert into {table_name}(fund_code,fund_name,fund_type,date,nvalue_pu,day_growth_rate,

a_week_rate,a_month_rate,_3_month_rate,_6_month_rate,a_year_rate,_2_year_rate,_3_year_rate,

from_this_year,from_found_year,poundage,purchase_money)

values(\'{fund_code}\',\'{fund_name}\',\'{fund_type}\',\'{date}\',\'{nvalue_pu}\',

\'{day_growth_rate}\',\'{a_week_rate}\',\'{a_month_rate}\',\'{_3_month_rate}\',

\'{_6_month_rate}\',\'{a_year_rate}\',\'{_2_year_rate}\',\'{_3_year_rate}\',

\'{from_this_year}\',\'{from_found_year}\',\'{poundage}\',\'{purchase_money}\')

'''.format(table_name=table_name,fund_code=each_data["fund_code"],fund_name=each_data["fund_name"],

fund_type=each_data["fund_type"],date=each_data["date"],nvalue_pu=each_data["nvalue_pu"],

day_growth_rate=each_data["day_growth_rate"],a_week_rate=each_data["a_week_rate"],

a_month_rate=each_data["a_month_rate"],_3_month_rate=each_data["_3_month_rate"],_6_month_rate=each_data["_6_month_rate"],

a_year_rate=each_data["a_year_rate"],_2_year_rate=each_data["_2_year_rate"],_3_year_rate=each_data["_3_year_rate"],

from_this_year=each_data["from_this_year"],from_found_year=each_data["from_found_year"],poundage=each_data["poundage"],

purchase_money=each_data["purchase_money"])

# print(sql)

try:

with mysqlDB.cursor() as cursor:

info = cursor.execute(sql)

mysqlDB.commit()

if cursor.rowcount >= 1:

self.count += 1

else:

pass

except Exception as e:

logging.error("{} | {} | {}".format(e, get_class_name(self), sys._getframe().f_code.co_name))

finally:

mysqlDB.close()

def create_table(self,table_name):

mysqlDB = self.DB()

# logging.info("{} | {} | {}".format(mysqlDB, get_class_name(self), sys._getframe().f_code.co_name))

'''

date,fund_name,latest_nvalue_pu,latest_sum_nvalue,last_nvalue_pu,

last_sum_nvalue,daily_growth,daily_growth_rate

'''

'''

temp_dict["fund_code"] = i[0]

temp_dict["fund_name"] = i[1]

temp_dict["fund_type"] = i[2]

temp_dict["date"] = i[3]

temp_dict["nvalue_pu"] = i[4]

temp_dict["day_growth_rate"] = i[5]

temp_dict["a_week_rate"] = i[6]

temp_dict["a_month_rate"] = i[7]

temp_dict["_3_month_rate"] = i[8]

temp_dict["_6_month_rate"] = i[9]

temp_dict["a_year_rate"] = i[10]

temp_dict["_2_year_rate"] = i[11]

temp_dict["_3_year_rate"] = i[12]

temp_dict["from_this_year"] = i[13]

temp_dict["from_found_year"] = i[14]

temp_dict["poundage"] = i[-2]

temp_dict["purchase_money"] = i[-5]

'''

sql = '''

CREATE TABLE IF NOT EXISTS `{table_name}`(

`id` bigint NOT NULL AUTO_INCREMENT ,

`fund_code` VARCHAR(40) NULL,

`fund_name` VARCHAR(100) NULL,

`fund_type` VARCHAR(40) NULL,

`date` VARCHAR(40) NULL,

`nvalue_pu` VARCHAR(40) NULL,

`day_growth_rate` VARCHAR(40) NULL,

`a_week_rate` VARCHAR(40) NULL,

`a_month_rate` VARCHAR(40) NULL,

`_3_month_rate` VARCHAR(40) NULL,

`_6_month_rate` VARCHAR(40) NULL,

`a_year_rate` VARCHAR(40) NULL,

`_2_year_rate` VARCHAR(40) NULL,

`_3_year_rate` VARCHAR(40) NULL,

`from_this_year` VARCHAR(40) NULL,

`from_found_year` VARCHAR(40) NULL,

`poundage` VARCHAR(40) NULL,

`purchase_money` VARCHAR(40) NULL,

PRIMARY KEY ( `id` )

)ENGINE=InnoDB DEFAULT CHARSET=utf8;

'''.format(table_name=table_name)

try:

with mysqlDB.cursor() as cursor:

cursor.execute(sql)

return True

except Exception as e:

logging.error("{} | {} | {}".format(e, get_class_name(self), sys._getframe().f_code.co_name))

return False

finally:

mysqlDB.close()

def check_table_if_exist(self,table_name):

mysqlDB = self.DB()

# logging.info("{} | {} | {}".format(mysqlDB,get_class_name(self),sys._getframe().f_code.co_name))

sql = "show tables"

try:

with mysqlDB.cursor() as cursor:

cursor.execute(sql)

_tables = cursor.fetchall()

table_lists = []

for i in _tables:

table_lists.append(i[0])

# print("all tables:{}".format(len(table_lists)))

for _ in table_lists:

if table_name in _:

return True

return False

except Exception as e:

logging.error("{} | {} | {}".format(e, get_class_name(self), sys._getframe().f_code.co_name))

finally:

mysqlDB.close()

def get_code_and_name_and_type(self,table_name):

mysqlDB = self.DB()

# logging.info("{} | {} | {}".format(mysqlDB, get_class_name(self), sys._getframe().f_code.co_name))

sql = "SELECT fund_code,fund_name,fund_type FROM {}".format(table_name)

try:

with mysqlDB.cursor() as cursor:

cursor.execute(sql)

info = cursor.fetchall()

'''

info是一个类型的数据

'''

return_info = []

for each in info:

_ = []

fund_code = each[0]

_table_name = "{}_{}_{}".format(each[0],each[1],each[2])

_.append(fund_code)

_.append(_table_name)

return_info.append(_)

return return_info

except Exception as e:

logging.error("{} | {} | {}".format(e, get_class_name(self), sys._getframe().f_code.co_name))

finally:

mysqlDB.close()

def show_data_rows(self):

mysqlDB = self.DB()

# logging.info("{} | {} | {}".format(mysqlDB, get_class_name(self), sys._getframe().f_code.co_name))

sql_1 = "show tables"

total_count = 0

try:

with mysqlDB.cursor() as cursor:

cursor.execute(sql_1)

_tables = cursor.fetchall()

table_lists = []

for i in _tables:

table_lists.append(i[0])

for i in table_lists:

sql_2 = "select count(*) from {}".format(i)

cursor.execute(sql_2)

res = cursor.fetchall()

num = res[0][0]

total_count = total_count + num

print("_"*20)

print("from now on,there are {} lines data in database.".format(self.good_to_show(total_count)))

print("_" * 20)

except Exception as e:

logging.error("{} | {} | {}".format(e, get_class_name(self), sys._getframe().f_code.co_name))

finally:

mysqlDB.close()

def show_insert_rows(self):

print("_" * 20)

print("there total insert {} lines data in database.".format(self.good_to_show(self.count)))

print("_" * 20)

@staticmethod

def good_to_show(num):

_s = str(num)

if len(_s) == 5:

head = _s[0]

tail = _s[1]

total = head + "." + tail + "万"

return total

elif len(_s) == 6:

head = _s[0:2]

tail = _s[2]

total = head + "." + tail + "万"

return total

elif len(_s) == 7:

head = _s[0]

tail = _s[1]

total = head + "." + tail + "百万"

return total

elif len(_s) == 8:

head = _s[0]

tail = _s[1]

total = head + "." + tail + "千万"

return total

elif len(_s) == 9:

head = _s[0]

tail = _s[1]

total = head + "." + tail + "亿"

return total

elif len(_s) > 9:

head = _s[0:-8]

tail = _s[1]

total = head + "." + tail + "亿"

return total

else:

return str(num)

class MysqlFundDetailData():

def __init__(self):

self.host = "127.0.0.1"

self.user = "root"

self.password = "root"

self.database = "fund_data"

self.charset = "utf8mb4"

self.port = 3306

self.count = 0

def DB(self):

return pymysql.connect(self.host,self.user, self.password, self.database, self.port, charset=self.charset)

def insert_into_table(self, table_name,each_data):

mysqlDB = self.DB()

# logging.info("{} | {} | {}".format(mysqlDB, get_class_name(self), sys._getframe().f_code.co_name))

# if not isinstance(each_data,list):

# each_data = eval(each_data)

sql = '''insert into {table_name}(date, latest_nvalue_pu, latest_sum_nvalue, last_nvalue_pu, last_sum_nvalue,

daily_growth,daily_growth_rate)

values(\'{date}\',\'{latest_nvalue_pu}\',\'{latest_sum_nvalue}\',\'{last_nvalue_pu}\',

\'{last_sum_nvalue}\',\'{daily_growth}\',\'{daily_growth_rate}\')

'''.format(table_name=table_name, date=each_data["date"], latest_nvalue_pu=each_data["latest_nvalue_pu"],

latest_sum_nvalue=each_data["latest_sum_nvalue"], last_nvalue_pu=each_data["last_nvalue_pu"],

last_sum_nvalue=each_data["last_sum_nvalue"], daily_growth=each_data["daily_growth"],

daily_growth_rate=each_data["daily_growth_rate"])

# print(sql)

try:

with mysqlDB.cursor() as cursor:

info = cursor.execute(sql)

mysqlDB.commit()

if cursor.rowcount >= 1:

self.count += 1

else:

pass

except Exception as e:

logging.error("{} | {} | {}".format(e, get_class_name(self), sys._getframe().f_code.co_name))

finally:

mysqlDB.close()

def create_table(self,table_name):

mysqlDB = self.DB()

# logging.info("{} | {} | {}".format(mysqlDB, get_class_name(self), sys._getframe().f_code.co_name))

'''

date,fund_name,latest_nvalue_pu,latest_sum_nvalue,last_nvalue_pu,

last_sum_nvalue,daily_growth,daily_growth_rate

'''

'''

temp_dict["fund_code"] = i[0]

temp_dict["fund_name"] = i[1]

temp_dict["fund_type"] = i[2]

temp_dict["date"] = i[3]

temp_dict["nvalue_pu"] = i[4]

temp_dict["day_growth_rate"] = i[5]

temp_dict["a_week_rate"] = i[6]

temp_dict["a_month_rate"] = i[7]

temp_dict["_3_month_rate"] = i[8]

temp_dict["_6_month_rate"] = i[9]

temp_dict["a_year_rate"] = i[10]

temp_dict["_2_year_rate"] = i[11]

temp_dict["_3_year_rate"] = i[12]

temp_dict["from_this_year"] = i[13]

temp_dict["from_found_year"] = i[14]

temp_dict["poundage"] = i[-2]

temp_dict["purchase_money"] = i[-5]

'''

sql = '''

CREATE TABLE IF NOT EXISTS `{table_name}`(

`id` bigint NOT NULL AUTO_INCREMENT ,

`date`VARCHAR(50) NULL,

`latest_nvalue_pu`VARCHAR(50) NULL,

`latest_sum_nvalue`VARCHAR(50) NULL,

`last_nvalue_pu`VARCHAR(50) NULL,

`last_sum_nvalue`VARCHAR(50) NULL,

`daily_growth`VARCHAR(50) NULL,

`daily_growth_rate`VARCHAR(50) NULL,

PRIMARY KEY ( `id` )

)ENGINE=InnoDB DEFAULT CHARSET=utf8;

'''.format(table_name=table_name)

try:

with mysqlDB.cursor() as cursor:

cursor.execute(sql)

return True

except Exception as e:

logging.error("{} | {} | {}".format(e, get_class_name(self), sys._getframe().f_code.co_name))

return False

finally:

mysqlDB.close()

def check_table_if_exist(self,table_name):

mysqlDB = self.DB()

# logging.info("{} | {} | {}".format(mysqlDB, get_class_name(self), sys._getframe().f_code.co_name))

sql = "show tables"

try:

with mysqlDB.cursor() as cursor:

cursor.execute(sql)

_tables = cursor.fetchall()

table_lists = []

for i in _tables:

table_lists.append(i[0])

# print("all tables:{}".format(len(table_lists)))

for _ in table_lists:

if table_name in _:

return True

return False

except Exception as e:

logging.error("{} | {} | {}".format(e, get_class_name(self), sys._getframe().f_code.co_name))

finally:

mysqlDB.close()

toolkit.py

import time,sys

import requests

from random import randint

import logging

LOG_FORMAT = "%(asctime)s - %(levelname)s - %(message)s"

DATE_FORMAT = "%m/%d/%Y %H:%M:%S %p"

def get_year_mon_day(y = 0,m = 0,d = 0):

t = time.localtime()

year = t.tm_year

month = t.tm_mon

day = t.tm_mday

_time = "{}-{}-{}".format(int(year) - y, month - m, day - d)

return _time

filename = "log/mylog_{}.log".format(get_year_mon_day())

logging.basicConfig(filename=filename, level=logging.DEBUG, format=LOG_FORMAT, datefmt=DATE_FORMAT)

def get_class_name(self):

return self.__class__.__name__

def getUser_Agent():

headers = [

'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50', #safari 5.1 – MAC

'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50', #safari 5.1 – Windows

'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0', #IE 9.0

'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0)', #IE 8.0

'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)', #IE 7.0

'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)', # IE6.0

'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.70 Safari/537.36',

'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:2.0.1) Gecko/20100101 Firefox/4.0.1', #Firefox 4.0.1 – MAC

'Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1', #Firefox 4.0.1 – Windows

'Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; en) Presto/2.8.131 Version/11.11', #Opera 11.11 – MAC

'Opera/9.80 (Windows NT 6.1; U; en) Presto/2.8.131 Version/11.11', #Opera 11.11 – Windows

'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11', #Chrome 17.0 – MAC

'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Maxthon 2.0)', #傲游(Maxthon)

'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; TencentTraveler 4.0)', #腾讯TT

'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)', #世界之窗(The World) 2.x

'ozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; The World)', #世界之窗(The World) 3.x

'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SE 2.X MetaSr 1.0; SE 2.X MetaSr 1.0; .NET CLR 2.0.50727; SE 2.X MetaSr 1.0)', #搜狗浏览器 1.x

'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; 360SE)', #360浏览器

'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Avant Browser)', #Avant

'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)', #Green Browser

##移动端用户代理

'Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5', #safari iOS 4.33 – iPhone

'Mozilla/5.0 (iPod; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5', #safari iOS 4.33 – iPod Touch

'Mozilla/5.0 (iPad; U; CPU OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5', #safari iOS 4.33 – iPad

'Mozilla/5.0 (Linux; U; Android 2.3.7; en-us; Nexus One Build/FRF91) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1', #Android N1

'MQQBrowser/26 Mozilla/5.0 (Linux; U; Android 2.3.7; zh-cn; MB200 Build/GRJ22; CyanogenMod-7) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1', #Android QQ浏览器 For android

'Opera/9.80 (Android 2.3.4; Linux; Opera Mobi/build-1107180945; U; en-GB) Presto/2.8.149 Version/11.10', #Android Opera Mobile

'Mozilla/5.0 (BlackBerry; U; BlackBerry 9800; en) AppleWebKit/534.1+ (KHTML, like Gecko) Version/6.0.0.337 Mobile Safari/534.1+', #Android Pad Moto Xoom

'Mozilla/5.0 (hp-tablet; Linux; hpwOS/3.0.0; U; en-US) AppleWebKit/534.6 (KHTML, like Gecko) wOSBrowser/233.70 Safari/534.6 TouchPad/1.0', #WebOS HP Touchpad

'Mozilla/5.0 (SymbianOS/9.4; Series60/5.0 NokiaN97-1/20.0.019; Profile/MIDP-2.1 Configuration/CLDC-1.1) AppleWebKit/525 (KHTML, like Gecko) BrowserNG/7.1.18124', #Nokia N97

'Mozilla/5.0 (compatible; MSIE 9.0; Windows Phone OS 7.5; Trident/5.0; IEMobile/9.0; HTC; Titan)', #Windows Phone Mango

'UCWEB7.0.2.37/28/999', #UC无

'Openwave/ UCWEB7.0.2.37/28/999', #UCOpenwave

'Mozilla/4.0 (compatible; MSIE 6.0; ) Opera/UCWEB7.0.2.37/28/999', #UC Opera

"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; AcooBrowser; .NET CLR 1.1.4322; .NET CLR 2.0.50727)",

"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; Acoo Browser; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.0.04506)",

"Mozilla/4.0 (compatible; MSIE 7.0; AOL 9.5; AOLBuild 4337.35; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)",

"Mozilla/5.0 (Windows; U; MSIE 9.0; Windows NT 9.0; en-US)",

"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 2.0.50727; Media Center PC 6.0)",

"Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 1.0.3705; .NET CLR 1.1.4322)",

"Mozilla/4.0 (compatible; MSIE 7.0b; Windows NT 5.2; .NET CLR 1.1.4322; .NET CLR 2.0.50727; InfoPath.2; .NET CLR 3.0.04506.30)",

"Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN) AppleWebKit/523.15 (KHTML, like Gecko, Safari/419.3) Arora/0.3 (Change: 287 c9dfb30)",

"Mozilla/5.0 (X11; U; Linux; en-US) AppleWebKit/527+ (KHTML, like Gecko, Safari/419.3) Arora/0.6",

"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.2pre) Gecko/20070215 K-Ninja/2.1.1",

"Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9) Gecko/20080705 Firefox/3.0 Kapiko/3.0",

"Mozilla/5.0 (X11; Linux i686; U;) Gecko/20070322 Kazehakase/0.4.5",

"Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.8) Gecko Fedora/1.9.0.8-1.fc10 Kazehakase/0.5.6",

"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11",

"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.20 (KHTML, like Gecko) Chrome/19.0.1036.7 Safari/535.20",

"Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; fr) Presto/2.9.168 Version/11.52",

"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.11 TaoBrowser/2.0 Safari/536.11",

"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.71 Safari/537.1 LBBROWSER",

"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; LBBROWSER)",

"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E; LBBROWSER)",

"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.84 Safari/535.11 LBBROWSER",

"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E)",

"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; QQBrowser/7.0.3698.400)",

"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)",

"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SV1; QQDownload 732; .NET4.0C; .NET4.0E; 360SE)",

"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)",

"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E)",

"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.89 Safari/537.1",

"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.89 Safari/537.1",

"Mozilla/5.0 (iPad; U; CPU OS 4_2_1 like Mac OS X; zh-cn) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8C148 Safari/6533.18.5",

"Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:2.0b13pre) Gecko/20110307 Firefox/4.0b13pre",

"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:16.0) Gecko/20100101 Firefox/16.0",

"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11",

"Mozilla/5.0 (X11; U; Linux x86_64; zh-CN; rv:1.9.2.10) Gecko/20100922 Ubuntu/10.10 (maverick) Firefox/3.6.10",

"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36",

]

return {'User-Agent':headers[randint(0,len(headers)-1)]}

headers = getUser_Agent()

def get_HTML_content(url):

resp = requests.get(url = url, headers=headers).content.decode("utf-8")

return resp

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值