python代码

最新推荐文章于 2024-06-20 09:44:04 发布

秒入睡

最新推荐文章于 2024-06-20 09:44:04 发布

阅读量560

点赞数

文章标签： python 开发语言

本文链接：https://blog.csdn.net/weixin_65564854/article/details/131325225

版权

1、正则

import re
aa1="""柳宗元（773年—819年11月28日），字子厚，汉族，祖籍河东郡（今山西省运城市永济、芮城一带）人。
代表作有《溪居》《江雪》《渔翁》。"""
bb1=re.findall('《\w+》',aa1)
print(bb1)

aa2="""李艳的微信号是454456，手机号是13534556698。
王莹的微信号是7879756，手机号是13566666666。
刘强的微信号是34569875，手机号是18966677777。
孙小峰的微信号是6196，手机号是18988888888。"""
bb2=re.findall('微信号是(\d+)',aa2)
bb3=re.findall('手机号是(\d+)',aa2)
bb4=re.findall('(\w+)微信号是(\d+)，手机号是(\d+)',aa2)
print(bb2)
print(bb3)
print(bb4)

cc="""2021年6月1日起，新修订的《中华人民共和国未成年人保护法》《中华人民共和国预 防未成年人犯罪法》(以下称“两法”)正式施行。
2021年10月23日，十三届全国人大常委会第三十一次会议表决通过了《中华人民共和国家庭教育促进法》。
2022年5月10日，庆祝中国共产主义青年团成立100周年大会在北京人民大会堂隆重举行。
2023年3月4日晚，感动中国2022年度获奖人物揭晓！他们是：钱七虎，邓小岚，杨宁，沈忠芳，徐淙祥，“银发知播”群体，徐梦桃，陈清泉，陆鸿，林占熺。在过去的极不平凡的2022年，他们或在危难中逆行，或在逆境中坚守，以凡人之力，书写中国人的年度精神史诗，感动了国人，震撼了世界。"""
dd=re.findall('\d+年\d+月\d+日',cc)
print(dd)

ii="""一氧化碳，一种碳氧化合物，通常状况下为是无色、无臭、无味的气体。物理性质上，一氧化碳的熔点为-205℃，沸点为-191.5℃，难溶于水。化学性质上，一氧化碳既有还原性，又有氧化性，能发生氧化反应（燃烧反应）、歧化反应等；同时具有毒性，较高浓度时能使人出现不同程度中毒症状，危害人体的脑、心、肝、肾、肺及其他组织，甚至电击样死亡，人吸入最低致死浓度为5000ppm（5分钟）。工业上，一氧化碳是一碳化学的基础，可由焦炭氧气法等方法制得，主要用于生产甲醇和光气以及有机合成等。"""
nn=ii.replace("一氧化碳","CO")       #替换
print(nn)

2、爬虫

import requests
from lxml import etree

url='https://www.meishij.net/chufang/diy/'              # 网址
headers={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36'}                                         # 伪装
html=requests.get(url=url,headers=headers).text#字符串
#遇到乱码,变成二进制   html=requests.get(url=url,headers=headers).content.decode()
aa=etree.HTML(html)  #把字符串变成HTML对象
nn=aa.xpath('//div[@class="listtyle1_list clearfix"]/div')
#print(len(nn))
totalist=[]
for ii in nn:    #for循环，遍历所有节点
    title = ii.xpath('.//strong//text()')[0].strip()    #第一个点的意思是当前节点路径！         #strip()字符串方法 删除字符串前后（左右两侧）的空格或特殊字符
    zuozhe = ii.xpath('.//em//text()')[0].strip()
    buzhou = ii.xpath('.//li[1]//text()')[0].split("/")
    step = buzhou[0].strip()
    shijian = buzhou[1].strip()
    buzhou2 = ii.xpath('.//li[2]//text()')[0].split("/")
    zuofa = buzhou[0].strip()
    taste = buzhou[1].strip()
    mylist=[title,zuozhe,step,shijian,zuofa,taste]
    totalist.append(mylist)
print(totalist)

import csv
headers=['菜名','作者','步骤','时间','方法','味道']
with open("mydata.csv","w",newline="") as f:         #newline去除中间的空行
    ff=csv.writer(f)
    ff.writerow(headers)
    ff.writerows(totalist)

3、爬图片

import requests
from lxml import etree
# import random   #随机模块,不考

url='https://www.meishij.net/chufang/diy/'
headers={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36'}
html=requests.get(url=url,headers=headers).text
aa=etree.HTML(html)
nn=aa.xpath('//div[@class="listtyle1_list clearfix"]/div')

for ii in nn:
    title = ii.xpath('.//strong//text()')[0].strip()
    pic = ii.xpath('.//img/@src')[0].strip()            #src是属性，属性前加@
    # print(title,pic)
    neirong = requests.get(url=pic, headers=headers).content
    with open('{}.jpg'.format(title), "wb") as f:
    #with open('{}{}.jpg'.format(title,random.randint(1,20)),"wb") as f:
        f.write(neirong)

4、可视化

import matplotlib.pyplot as plt

names=['张三','李四','赵武','张丽','王鹏','孙丽丽']
shuju=[98,60,74,85,30,10]
nn=[0,0,0,0.25,0,0]
print(names)
print(shuju)
plt.rcParams['font.sans-serif']=['SimHei']
plt.pie(shuju,labels=names,explode=nn,labeldistance=0.75,autopct='%.2f%%',shadow=True)
plt.title('成绩表',fontsize=25)
# plt.savefig('name.png')  保存图片
plt.axis('equal')
plt.legend(loc=4)
plt.show()