刚学Python做爬虫练手时,遇到这个问题,求各位大神支招~?
code:
from bs4 import BeautifulSoup
from urllib import request
from datetime import datetime
# 抓取网页!
response = request.urlopen("http://fund.eastmoney.com/fund.html")
html = response.read()
html = html.decode('gbk') # 这一步是为啥?
with open("./htmls/1.txt", 'wb') as f:
f.write(html.encode('utf8'))
f.close()
with open("./htmls/1.txt", 'rb') as f:
html = f.read().decode('utf8')
f.close()
soup = BeautifulSoup(html, "html.parser")
fCodes = soup.find("table", id="oTable").tbody.find_all("td", "bzdm") # 基金编码
result = ()
for fCode in fCodes:
result += (
{
"fcode": fCode.get_text(),
"fname": fCode.next_sibling.find("a").get_text(),
"NAV": fCode.next_sibling.next_sibling.get_text(),
"ACCNAV": fCode.next_sibling.next_sibling.next_sibling.get_text() if fCode.next_sibling.next_sibling.next_sibling.get_text() != '---' else 0.0000,
"updatetime": datetime.now().isoformat(sep=' ', timespec="seconds")
},)
# print(result)
import pymysql
from pymysql.cursors import Cursor, SSCursor
# from common.config import dbconfig
# connection = pymysql.connect(**dbconfig)
# Connect to the database
connection = pymysql.connect(host='localhost',
user='root',
password='root',
db='ins',
charset='utf8',
cursorclass=pymysql.cursors.DictCursor)
cursor = Cursor(connection)
sql = """insert into myfund(fcode, fname,NAV,ACCNAV,updatetime)
values(%(fcode)s,%(fname)s,%(NAV)s,%(ACCNAV)s,%(updatetime)s)
ON duplicate KEY UPDATE `updatetime`=%(updatetime)s,NAV=%(NAV)s,ACCNAV=%(ACCNAV)s"""
res = cursor.executemany(sql, result)
cursor.fetchall()
connection.commit()
print(res)
connection.close()
#报错
#
Traceback (most recent call last):
File "/Users/carl/wwwroot/TestAction/PythonActions/jtthinkPythonActions/day08_tuple/mypro/__main__db.py", line 68, in
res = cursor.executemany(sql, result)
File "/Users/carl/wwwroot/TestAction/PythonActions/venvActions/lib/python3.6/site-packages/pymysql/cursors.py", line 192, in executemany
self._get_db().encoding)
File "/Users/carl/wwwroot/TestAction/PythonActions/venvActions/lib/python3.6/site-packages/pymysql/cursors.py", line 229, in _do_execute_many
rows += self.execute(sql + postfix)
File "/Users/carl/wwwroot/TestAction/PythonActions/venvActions/lib/python3.6/site-packages/pymysql/cursors.py", line 165, in execute
result = self._query(query)
File "/Users/carl/wwwroot/TestAction/PythonActions/venvActions/lib/python3.6/site-packages/pymysql/cursors.py", line 321, in _query
conn.query(q)
File "/Users/carl/wwwroot/TestAction/PythonActions/venvActions/lib/python3.6/site-packages/pymysql/connections.py", line 860, in query
self._affected_rows = self._read_query_result(unbuffered=unbuffered)
File "/Users/carl/wwwroot/TestAction/PythonActions/venvActions/lib/python3.6/site-packages/pymysql/connections.py", line 1061, in _read_query_result
result.read()
File "/Users/carl/wwwroot/TestAction/PythonActions/venvActions/lib/python3.6/site-packages/pymysql/connections.py", line 1349, in read
first_packet = self.connection._read_packet()
File "/Users/carl/wwwroot/TestAction/PythonActions/venvActions/lib/python3.6/site-packages/pymysql/connections.py", line 1018, in _read_packet
packet.check_error()
File "/Users/carl/wwwroot/TestAction/PythonActions/venvActions/lib/python3.6/site-packages/pymysql/connections.py", line 384, in check_error
err.raise_mysql_exception(self._data)
File "/Users/carl/wwwroot/TestAction/PythonActions/venvActions/lib/python3.6/site-packages/pymysql/err.py", line 107, in raise_mysql_exception
raise errorclass(errno, errval)
pymysql.err.ProgrammingError: (1064, "You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near '%(updatetime)s,NAV=%(NAV)s,ACCNAV=%(ACCNAV)s' at line 3")
#数据可以插入进去,但要删除ON Duplicate key update,并清空表
数据库表结构:
-- auto-generated definition
CREATE TABLE myfund
(
fcode VARCHAR(20) NOT NULL,
fname VARCHAR(20) NULL,
NAV DECIMAL(10, 4) NULL
COMMENT '单位净值',
ACCNAV DECIMAL(10, 4) NULL
COMMENT '累计净值',
updatetime DATETIME NULL,
fdate DATETIME NOT NULL
COMMENT '基金日期',
DGR VARCHAR(20) NULL
COMMENT '日增长率',
DGV VARCHAR(20) NULL
COMMENT '日增长值',
fee VARCHAR(20) NULL,
PRIMARY KEY (fcode, fdate)
)
ENGINE = InnoDB;