习题讲解

最新推荐文章于 2023-10-19 21:34:56 发布

弯弯的丝瓜

最新推荐文章于 2023-10-19 21:34:56 发布

阅读量230

点赞数

分类专栏： python

本文链接：https://blog.csdn.net/u012516571/article/details/79796470

版权

python 专栏收录该内容

29 篇文章 0 订阅

订阅专栏

"""
1.定义一个func(url,folder_path)获取url地址的内容，保存到folder_pathd的文件目录下，并随机产生一个文件名
2.定义一个func(folder_path),合并该目录下所有的文件，生成一个all.txt
3.定义一个func(url),分析该url内容里面有多少个连接
4.定义一个func(url),获取他？的参数，并返回一个dict
5.定义一个func(folder),删除该folder下的所有文件。
"""
import os
import urllib
import random

def save_url_content(url,folder_path = None)

if not (url.startwith('http://') or url.startwith('https://'))::
return 'error url'
if not os.path.isdir(folder_path):
return 'error file path'
d = urllib.urlopen(url)
content = d.read()
rand_filename = 'test_%s'random.randint(1,1000)//数据会直接拼接在字符串后面
file_path = os.path.join(folder_path,rand_filename)//生成文件名
f = open(file_path,'w')//发现没有会创建
d.write(content)
d.close()
return file_path

print save_url_conten('http://www.baidu.com','../test')//百度网页的内容就拿下来了

"""
3,统计超链接个数
"""

def get_url_list(url)
if not (url.startwith('http://') or url.startwith('https://')):
return 'error url'
d = urllib.urlopen(url)
content = d.read()
return len(content.split('<a href=')) - 1

"""

2.定义一个func(folder_path),合并该目录下所有的文件，生成一个all.txt
使用递归解决

"""
import os
def merge(folder_path):
for f in os.list(folder_path):
file_path = os.path.join(folder_path,f)//将两个字符串拼接起来
if os.path.isdir(file_path)
merge(folder_path)
else:
merge_file = open('tmp/merge_test','ab+')
content = open(file_path,'r').read()
merge_file.write(conten)
merge_fiel.close()

open (filename,'rb')

"""
4.定义一个func(url),获取他？后的参数，并返回一个dict

"""
import urlparse//解析url
def qs(url):
query = urlparse.urlparse(url).query
return dict([(k,v[0]) for k,v in urlparse.parse_qs(query).items()])

"""

5.定义一个func(folder),删除该folder下的所有文件。

"""

def rmdir(folder):
if not os.path.exists(folder_path):
return 'error dir'
for f in os.listdir(folder):
file_path = os.path.join(folder,f)
if os.path.isdir(file_path ):
rmdir(file_path)
else:
os.remove(file_path)