总结:
<1>if __name__ == '__main__':
target = 'http://www.biqukan.com/1_1094/5403177.html'
req = requests.get(url=target)
req.encoding = req.apparent_encoding
print(req.text)
<2>python 本地安装 beautifulsoup方法!
(1)
1、在python官网找到beautifulsoup模块的下载页面,点击"downloap"将该模块的安装包下载到本地
2、将该安装包解压,然后在打开cmd,并通过cmd进入到该安装包解压后的文件夹目录下
3、在该文件目录下输入"python setup.py install",进行beautilfulsoup4模块的安装,当安装完成后会看到有"Finished"字样
4、安装完成后,在cmd中运行Python,然后输入"form bs4 import BeautilfulSoup" 导入该模块,如果成功安装的话将没有任何打印信息,否则会有相应的错误信息打印
(2)
$wget http://www.crummy.com/software/BeautifulSoup/bs4/download/4.3/beautifulsoup4-4.3.2.tar.gz
$tar zxvf beautifulsoup4-4.3.2.tar.gz
$cd beautifulsoup4-4.3.2
$python setup.py install
$python
>> from bs4 import BeautifulSoup
(3)
打开cmd,然后在cmd中输入命令“pip install beautilfulsoup4”,就可以成功安装beautilfulsoup4,不过该版本不一定会是4.4.1,版本会是当前python库中的最新版本。
<3>python 中tab 和四个空格 不 相等
<4> 小数转换位百分比
#方法一
a = 0.3214323
bb = "%.2f%%" % (a * 100)
print bb
# 输出结果是32.14%
#方法二
a = 0.3214323
b = str(a*100) + '%'
print b
# 输出结果是32.14323%
# 如果想保留小数点后两位
c = str(a)[:4]+ '%'
print c
# 输出结果是0.32%
比如将 0.1234 转化为 12.34% 的形式:
rate = 0.1234
print('%.2f%%' % (rate * 100))
说明:第一个百分号和 .2f 相连,表示浮点数类型保留小数点后两位格式化输出;然后的两个连续的%%,则最终会输出一个%号出来,有对%进行转义的含义;
小说下载代码:
#!usr/bin/env python
from bs4 import BeautifulSoup
import requests,sys
'''
if __name__ == '__main__':
target = 'http://www.biqukan.com/1_1094/5403177.html'
req = requests.get(url=target)
req.encoding = req.apparent_encoding
html = req.text
bf = BeautifulSoup(html)
texts = bf.find_all('div', class_ = 'showtxt')
# print(texts)
print(texts[0].text.replace('\xa0'*8,'\n\n')) '''
'''
if __name__ == "__main__":
target = 'http://www.biqukan.com/1_1094/'
req = requests.get(url = target)
req.encoding = req.apparent_encoding
html = req.text
div_bf = BeautifulSoup(html)
div = div_bf.find_all('div', class_ = 'listmain')
print(div[0])'''
'''
if __name__ == "__main__":
server = 'http://www.biqukan.com/'
target = 'http://www.biqukan.com/1_1094/'
req = requests.get(url = target)
req.encoding = req.apparent_encoding
html = req.text
div_bf = BeautifulSoup(html)
div = div_bf.find_all('div', class_ = 'listmain')
a_bf = BeautifulSoup(str(div[0]))
a = a_bf.find_all('a')
for each in a:
print(each.string, server + each.get('href'))'''
''' readme download yi-nian-yong-heng '''
class downloader(object):
def __init__(self):
self.server = 'http://www.biqukan.com/'
self.target = 'http://www.biqukan.com/1_1094/'
self.names = []
self.urls = []
self.nums = 0
'''get_download_url '''
def get_download_url(self):
req = requests.get(url = self.target)
req.encoding = req.apparent_encoding
html = req.text
div_bf = BeautifulSoup(html,"html.parser")
div = div_bf.find_all('div', class_ = 'listmain')
a_bf = BeautifulSoup(str(div[0]),"html.parser")
a = a_bf.find_all('a')
self.nums = len(a[5:10])
for each in a[5:10]:
self.names.append(each.string)
self.urls.append(self.server + each.get('href'))
''' get_contents per chapter'''
def get_contents(self, target):
req = requests.get(url = target)
req.encoding = req.apparent_encoding
html = req.text
bf = BeautifulSoup(html,"html.parser")
texts = bf.find_all('div', class_ = 'showtxt')
texts = texts[0].text.replace('\xa0'*8,'\n\n')
return texts
''' dolad to local'''
def writer(self, name, path, text):
write_flag = True
with open(path, 'a', encoding='utf-8') as f:
f.write(name + '\n')
f.writelines(text)
f.write('\n\n')
if __name__ == "__main__":
dl = downloader()
dl.get_download_url()
txt = " download begin tatal {} chapters"
print(txt.format(dl.nums))
for i in range(dl.nums):
dl.writer(dl.names[i], 'my.txt', dl.get_contents(dl.urls[i]))
sys.stdout.write(" download:%.3f%%" % float(i/dl.nums*100) + '\r')
sys.stdout.flush()
print(' download end')