具体代码如下,请帮忙解决一些,谢谢了
# -*- coding:utf-8 -*-
'''
MBA院校新闻收集器
'''
import re # 网络连接模块
import bs4 # DOM解析模块
import pymysql # 数据库连接模块
import urllib # 网络访问模块
import urllib.request # 网络访问模块
import urllib.error # 网络访问模块
import time # 时间模块
import random # 随机数模块
import lxml # lxml模块
from lxml.html.clean import Cleaner, clean_html # 清除模块
import selenium
# from datetime import *
# 数据库连接参数
db_config = {
'host': '127.0.0.1',
'port': '3306',
'username': 'root',
'password': 'error.error',
'database': 'hxedu',
'charset': 'utf8'
}
# 连接数据库
connect = pymysql.Connect(
host=db_config['host'], # 数据库地址 localhost/127.0.0.1
port=int(db_config['port']), # 数据库端口
user=db_config['username'], # 连接数据库的用户名
passwd=db_config['password'], # 链接数据库的密码
db=db_config['database'], # 数据库名称
charset=db_config['charset'] # 数据库编码
)
cursor = connect.cursor()
cursor.execute("select * from bs_college_rules WHERE isdelete=0") # 查询获取url规则表中的所有数据
results = cursor.fetchall()
for row in results :
collegeid = row[1]
index_url = row[2]
html_code = row[3]
home_url = row[4]
columns_url = row[5]
column_img = row[6]
column_parent = row[7]
column_title = row[8]
column_keywords = row[9]
column_description =