python代码输错怎么修改_【求助】求大佬帮忙看看这个代码中错误如何修改?

该楼层疑似违规已被系统折叠 隐藏此楼查看此楼

#!/usr/bin/env python

# -*- coding:utf-8 -*-

from bs4 import BeautifulSoup

import urllib.request

import urllib

import urllib.error

import re

import sys

headers = {

"User-Agent":"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36"

}

url = 'https://book.douban.com/top250'

class bookTop250:

def __init__(self):

#设置默认编码格式为utf-8

self.start = 0

self.param = 'no-cache'

self.headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36'}

self.movieList = []

self.filePath = 'E:/crawler/DoubanTop250.txt'

def getPage(self):

try:

URL = 'https://book.douban.com/top250' + str(self.start)

req = urllib.request(url=URL, headers=self.headers)

response = urllib.request.urlopen(req)

page = response.read().decode('utf-8')

pageNum = (self.start + 25) / 25

print( '正在抓取第' + str(pageNum) + '页数据...')

self.start += 25

return page

except urllib.error.URLError as e:

if hasattr(e, 'reason'):

print('抓取失败,具体原因:', e.reason)

def getMovie(self):

pattern = re.compile(u'

.*?.*?'

+u'

.*?[\n][\s]+[\n][\s]+'

+u'(.*?)[\n][\s]+(.*?)'

, re.S)

while self.start <= 225:

page = self.getPage()

movies = re.findall(pattern, page)

for movie in movies:

self.movieList.append([movie[0], movie[1], movie[2].lstrip(),

movie[3].lstrip, movie[4]])

def writeTxt(self):

fileTop250 = open(self.filePath, 'w')

try:

for movie in self.movieList:

fileTop250.write('shuming:' + movie[0] + '\r\n')

fileTop250.write('zuozhe:' + movie[1] + '\r\n')

fileTop250.write('pingfen:' + movie[2] + '\r\n')

fileTop250.write('renshu:' + movie[3] + '\r\n')

fileTop250.write('导演姓名:' + movie[4] + '\r\n')

print('文件写入成功...')

finally:

fileTop250.close()

def main(self):

print('正在从豆瓣电影Top250抓取数据...')

self.getMovie()

self.writeTxt()

print('抓取完毕...')

DouBanSpider = bookTop250()

DouBanSpider.main()

在pycharm上运行后报错如下,百度了半天不知如何修改了?求大佬解答如何修改才能正确运行

6834544e9258d109e58f12eeda58ccbf6d814dda.jpg

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值