python3.7保存不了_python3.7爬取墨菲定律保存在本地txt

#!/usr/local/bin/python3.7

# -*- coding: utf-8 -*-

# @Time: 2019/07/15

# @Function 获取在线文本内容

import requests

from bs4 import BeautifulSoup

import re

import codecs

url = 'https://www.shuhaige.com/7518/'

header = {

'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36',

'Origin': 'https://www.shuhaige.com',

'Host': 'www.shuhaige.com'

}

# 设置代理服务器

proxies = {

'http:': 'http://121.232.146.184',

'https:': 'https://144.255.48.197'

}

def getContent():

contents = requests.get(url, headers=header).text

html = BeautifulSoup(contents, 'html.parser')

lists = html.select('dl')[0].select('a')

for list in lists:

itemUrl = f'https://www.shuhaige.com{list["href"]}'

itemContent = requests.get(itemUrl, headers=header).text

itemHtml = BeautifulSoup(itemContent, 'html.parser')

saveToTxt(itemHtml.select('div .content')[0], list.string)

# 写入文本文件

def saveToTxt(comments, title):

commentsList = ''

for item in comments:

comment_info = f'{item}'.replace(f'
', '')

comment_info = re.sub(f'

.*

', '', comment_info)

commentsList += comment_info

with codecs.open(f'MoFeiDingLv/{title}.txt', 'w', encoding='utf-8') as file:

file.writelines(commentsList)

print(f'{title}写入文件成功!')

getContent()

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值