task5 python类、对象、正则表达式等

最新推荐文章于 2024-06-15 16:55:36 发布

连弋然

最新推荐文章于 2024-06-15 16:55:36 发布

阅读量103

点赞数

本文链接：https://blog.csdn.net/lianyiran/article/details/94379420

版权

1）类和对象：用来描述具有相同的属性和方法的对象的集合。它定义了该集合中每个对象所共有的属性和方法。对象是类的实例。https://www.liaoxuefeng.com/wiki/897692888725344/923030500932512

a)类的创建，class 和大写字母开头的类的名称，紧接着（object），也可以单独一个（），表示类继承于（object），其中类中，方法之外定义的属性，为类属性。

b)实例的创建则是类名+（）实现，同时，通过构造方法可再给实例添加属性，如范例一。

c)关于类的访问权限问题，范例二-A、B、C。另外，还有类的特性，静态方法，类方法等，可以参考这个链接的内容：https://www.cnblogs.com/wangyongsong/p/6750454.html

d)继承和多态：范例四

2）正则表达式：用一种描述性的语言来给字符串定义一个规则，凡是符合规则的字符串，可以认为其匹配该规则，https://www.liaoxuefeng.com/wiki/897692888725344/923056128128864

3）re模块：Python中对正则表达式的操作库，在字符串前加上r可以避免Python本身对字符串转义的问题，（注意积累经验，例如python中replace是对字符串的操作，而不支持正则表达式，需要用到re模块中的sub）
范例五

4）http请求：在python中主要借助的是requests模块进行网页内容的获取，想以关于网站的构建的相关内容，可以参考：http://www.w3school.com.cn/

范例一：类的属性访问权限

#对于类的使用，包括实例属性，类属性
class Calculator:
    def __init__(self):#使用self这个单词是习惯,init（构造方法）只能用一次
        self.name = 'Good calculator'#实例属性,外部可调用
    def add(self,x,y):#实例方法
        result = x + y
        print(result)
    def minus(self,x,y):
        result = x - y
        self.mm = 14#外部无法调用
        print(result)
    def times(self,x,y):
        print(x * y)
    def divide(self,x,y):
        print(x / y)
#创建实例
cal = Calculator()
cal.choice = '-minus_operation'#实例属性
print(cal.name,cal.choice)
# print(Calculator().minus(21,5).mm)#mm无法调用
cal.minus(21,5)#实例名称.实例方法进行方法调用

运行

Good calculator -minus_operation
16

范例二-A：类的访问权限，单下划线开头的类的属性，类本身和子类可以访问

#类的访问权限
class Swan:
    '''
    天鹅类
    '''
    _neck_swan = '天鹅的脖子很长'  #受保护类的属性
    def __init__(self):
        print("__init__()：",Swan._neck_swan)    #访问受保护类型的属性

swan = Swan()   #创建Swan类的实例
print("直接访问：",swan._neck_swan)    #通过实例名访问受保护类型的属性

运行

__init__()： 天鹅的脖子很长
直接访问： 天鹅的脖子很长

范例二-B：双下划线开头的类的属性，类本身可以访问，实例名无法访问，但是可以用类的实例名.类名访问

#类的访问权限
class Swan:
    '''
    天鹅类
    '''
    __neck_swan = '天鹅的脖子很长' #私有类型的属性
    def __init__(self):
        print("__init__()：",Swan.__neck_swan)   #访问私有类型的属性
    def my(self):
        print("my方法：", Swan.__neck_swan)

swan = Swan()   #创建Swan类的实例
# print("直接访问：",swan.__neck_swan)    #通过实例名访问私有类型的属性，无法访问
print("添加类名后访问：",swan._Swan__neck_swan)
swan._Swan__neck_swan = '脖子不长'    #修改私有类型的属性,但不建议该操作
swan.my()   #私有类型的属性不变
print("修改后：",swan._Swan__neck_swan)    #通过实例名添加类名后访问私有类型的属性

运行

__init__()： 天鹅的脖子很长
添加类名后访问： 天鹅的脖子很长
my方法： 天鹅的脖子很长
修改后： 脖子不长

范例二-C：创建只读的私有属性

#创建只读的私有属性
class TVShow:
    def __init__(self,show):
        self.__show = show
    @property
    def show(self):
        return self.__show  #返回私有属性
tvshow = TVShow("正在播放")  #创建类的实例
print("默认：",tvshow.show)    #获取属性值，且属性值不能修改

运行

默认： 正在播放

范例三：

#创建用于计算的属性
class Rect:
    def __init__(self,width,height): #构造方法
        self.width = width
        self.height = height
    @property
    def area(self):
        return self.width*self.height #计算矩形面积
rect = Rect(800,600) #创建类的实例
print("面积为：",rect.area) #输出面积
# rect.area = 100 #无法进行赋值操作
# print("面积为：",rect.area)

运行

面积为： 480000

范例四：继承和多态（范例转自：https://www.runoob.com/python3/python3-class.html）
关于继承父类的调用方法：https://www.runoob.com/w3cnote/python-extends-init.html

# 类的定义
class People:
    # 定义的基本属性
    name = ''
    age = 0
    __weight = 0 # 定义私有属性，在类外部无法直接进行访问
    def __init__(self, n, a, w):
        self.name = n
        self.age = a
        self.__weight = w
    def speak(self):
        print("%s 说: 我 %d 岁。" % (self.name, self.age))
# 单继承示例
class Student(People):
    grade = ''
    def __init__(self, n, a, w, g):
        People.__init__(self, n, a, w)  #调用父类的构造方法
        self.grade = g
    # 覆写父类的方法
    def speak(self):
        print("%s 说: 我 %d 岁了，我在读 %d 年级" % (self.name, self.age, self.grade))

s = Student('ken',10,60,3)
s.speak()
# print(s._People__weight)
# print(s.name)

运行

ken 说: 我 10 岁了，我在读 3 年级

范例五：正则表达式的匹配

import re
text = ''
file = open('C:/untitled/python-attempt path/poem.txt')
for line in file:
    text = text + line
file.close()

result = re.findall(' (a[a-z][a-z]) |(A[a-z][a-z]) ',text)#正则表达式的匹配

final_result = set()#利用集合删掉重复元素
for pair in result:
    if pair[0] not in final_result:
        final_result.add(pair[0])
    if pair[1] not in final_result:
        final_result.add(pair[1])
final_result.remove('')
print(final_result)
print(len(final_result))

运行

{'All', 'ash', 'air', 'all', 'and', 'are', 'And'}
7

范例六：正则表达式匹配特性

import re
ans = re.split(r'\s+', 'a b   c')
print(ans)
ans = re.split(r'[\s\,\;\.]+', 'a,.; b  ,.; c')#斜杠加标点即可把所有符号删除
print(ans)
ans = re.match(r'^(\d+)(0*)$', '102300').groups()#贪婪匹配
print(ans)
ans = re.match(r'^(\d+?)(0*)$', '102300').groups()#非贪婪匹配
print(ans)

运行

['a', 'b', 'c']
['a', 'b', 'c']
('102300', '')
('1023', '00')

范例七：根据网页信息进行爬虫开发

# 爬虫开发
#分析数据加载流程：分析目标数据对应的url
#下载数据：清洗，处理数据；数据持久化
import requests
import re
#目标数据：网站和页面
url = 'http://book.zongheng.com/showchapter/84061.html'#网址，end加eight
#模拟浏览器发送http请求
response = requests.get(url)
#编码方式
response.encoding = 'utf-8'
#目标小说主页的网页源码
html = response.text
# print(html) #观察网页内容
#获取小说的名字
title = re.findall(r'<h1>(.*?)</h1>',html)[0]
#新建一个文件
my_text = open('C:/untitled/python-attempt path/%s.txt' % title, 'w', encoding = 'utf-8')
#获取每一个章节的信息（章节，url）
dl = re.findall(r'<li class=" col-4">(.*?)</li>',html,re.S)
chapter_info = []
#循环每个章节，分别下载
for i in range(len(dl)):
    chapter_info.append(re.findall(r'<a  href="(.*?)" target="_blank" title="(.*?) 字数：(.*?)</a>',dl[i]))
    chapter_url,chapter_title = chapter_info[i][0][0:2]    #注意不包括chapter_info[i][0][2]这个元素
    chapter_response = requests.get(chapter_url)
    chapter_response.encoding = 'utf-8'
    chapter_html = chapter_response.text

    #提取章节内容-方法一
    chapter_content = re.findall('<div class="content" itemprop="acticleBody">(.*?)</div>', chapter_html, re.S)  #不是很好的选择，中间会多出很多<p></p>，空格等，需要数据清洗
    print(chapter_content)#清洗之前
    chapter_content[0] = chapter_content[0].replace(' ','')    #数据清洗
    chapter_content[0] = chapter_content[0].replace('\r\n', '')
    chapter_content[0] = chapter_content[0].replace('<p>', '')
    chapter_content[0] = chapter_content[0].replace('</p>', '\n')#根据文段的分布，将末尾的符号变成换行
    print(chapter_content)#清洗之后
    my_text.write(chapter_title + '\n')
    my_text.write(chapter_content[0]+'\n')

    #提取章节内容-方法二
    # chapter_content = re.findall(r'<p>(.*?)</p>',chapter_html, re.S)  #会多出来两行汉字，这段话下面的部分，需要删除掉，但避免了清洗
    # my_text.write(chapter_title+'\n')
    # for j in range((len(chapter_content)-2)):
    #     my_text.write(chapter_content[j]+'\n')
my_text.close()

运行