"""
p031:提取商品价格
"""
import re
if __name__ == '__main__':
content = """
小明上街买菜
买了1斤黄瓜花了8元
买了2斤葡萄花了13.5元
买了3斤白菜花了5.4元
"""
# 要求提取(1、黄瓜、8) (2、葡萄、13.5) (3、葡萄、5.4)
for line in content.split("\n"):
pattern = r"(\d)斤(.*)花了(\d+(\.\d+)?)元"
match = re.search(pattern, line)
if match:
print(match.group())
print(f"{match.group(1)}\t{match.group(2)}\t{match.group(3)}")
"""
p032: 给文章手机号打马赛克效果
"""
import re
if __name__ == '__main__':
content = """
青春23是什么,每个人都有12345678自己的见解。我们只是爱借523645着青2342春的名义再13953652564肆无忌惮一次,用时光绘22342画着未
来的颜色,用汗水打造13987456985着56456理想的殿堂,用岁月承载134569525365着曾经的梦想,再将他们一一打磨,雕刻成最美丽123456952
65478的模样。我们只是想14425这青葱的岁月,留下些2342回忆,这样的回忆,是能够让13777777777我们在多年之后笑着流泪,说,我不曾后悔。
"""
pattern = r"(1[3-9])\d{9}"
print(re.sub(pattern, r"\1******", content)) # \1 第一个括号的内容
"""
p033: 实现英文分词计算词频
"""
import re
import pandas as pd
if __name__ == '__main__':
with open("./p033_english_file.txt", encoding="utf-8") as fp:
content = fp.read()
# print(content)
# print(content.split())
words = re.split(r"[\s.()-?]+", content)
print(pd.Series(words).value_counts()[:20])
"""
p034:实现中文文章分词
"""
import jieba
import re
if __name__ == '__main__':
content = """
央视网消息:目前,全国累计报告接种新冠病毒疫苗人口已覆盖全国总人口的89.63%。新冠病毒疫苗在我国疫情防控中起到了什么作用?
下一步接种工作有什么计划?国家卫生健康委疾控局一级巡视员贺青华在1月15日的国务院联防联控机制发布会上做了介绍。
国家卫生健康委疾控局一级巡视员 贺青华:近期国内多起聚集性疫情都存在着隐匿传播时间较长后才发现的情况,一方面由于
境外疫情输入压力增大,疫情输入方式和途径多样化;另一方面,与我们国家人群疫苗接种率高、感染后有的没有症状,有的
出现症状比较晚、症状相对较轻,所以发现难度相对增大。为什么有这种情况,最主要还是因为接种了疫苗以后,疫苗刺激机体
产生的体液免疫和细胞免疫就要行动起来,阻止外来物种在体内繁殖扩散,免疫屏障成功了,就没有症状,或者是症状很轻。
到目前为止,我们国家发生的散发疫情,相对疫情发生地的几十万、几百万甚至上千万人口的城市(来说),那只是少数,所以
现有的研究和防控实践证明,在实施新冠疫苗接种的生物学措施的同时,做好个人日常卫生防护措施,是能够有效防止个人感染新冠
病毒的。下一步,国家卫生健康委将继续指导各地科学规范、安全有序地开展疫苗接种。
"""
content = re.sub(r"[\s:?。;(),、]", "", content)
word_list = list(jieba.cut(content))
print(word_list)
"""
p035:统计一本小说的人名
"""
import jieba.posseg as posseg
import pandas as pd
if __name__ == '__main__':
# content = """李明喜欢韩梅梅,他俩早恋了"""
with open("./p035_鹿鼎记.txt", encoding="GBK", errors="ignore") as fp:
content = fp.read()
# print(content)
names = []
for word, flag in list(posseg.cut(content)):
if flag == "nr":
names.append(word)
# print(word, flag)
print(pd.Series(names).value_counts()[:10])
"""
p036:实现论文查重小程序
"""
import jieba.analyse
python_article = r"./p036_python_article.txt"
flask_article = r"./p036_flask_article.txt"
java_article = r"./p036_java_article.txt"
shortvideo_article = r"./p036_shortvideo_article.txt"
def get_keywords_from_article(fname):
with open(fname, encoding="utf-8") as fp:
content = fp.read()
# print(content)
return jieba.analyse.extract_tags(content, 50)
def compute_sim(wordsa, wordsb):
jiaoji = set(wordsa).intersection(set(wordsb))
bingji = set(wordsa).union(set(wordsb))
return round(len(jiaoji)/len(bingji)*100, 2)
if __name__ == '__main__':
python_keywords = get_keywords_from_article(python_article)
flask_keywords = get_keywords_from_article(flask_article)
java_keywords = get_keywords_from_article(java_article)
shortvideo_keywords = get_keywords_from_article(shortvideo_article)
# print(python_keywords, flask_keywords, java_keywords, shortvideo_keywords)
print("python vs python:", compute_sim(python_keywords, python_keywords))
print("python vs flask:", compute_sim(python_keywords, flask_keywords))
print("python vs java:", compute_sim(python_keywords, java_keywords))
print("python vs shortvideo:", compute_sim(python_keywords, shortvideo_keywords))
"""
p037:控制电脑屏幕不锁屏
#鼠标手动移至4个角落时候,会FailSafeException
"""
import pyautogui
import random
import time
while True:
x = random.randint(-300, 300)
y = random.randint(-300, 300)
print(x, y)
pyautogui.moveTo(x, y)
time.sleep(2)
"""
p038:个人密码管理器
"""
import sys
import json
import os
import string
import random
password_file = "p038_my_password.txt"
password_dict = {}
def get_password_dict():
if os.path.isfile(password_file):
with open(password_file, encoding="utf-8") as fin:
return json.loads(fin.read())
else:
return {}
def show_password():
password_dict = get_password_dict()
for key, value in password_dict.items():
print(f"网站:{key}, 密码:{value}")
def get_new_password(pwd_length=15):
chars = list(string.ascii_letters +string.digits + string.punctuation)
random.shuffle(chars)
return ''.join(chars[:pwd_length])
def add_password(website):
password_dict = get_password_dict()
password_dict[website] = get_new_password()
with open(password_file, mode="w", encoding="utf-8") as fout:
fout.write(json.dumps(password_dict))
if __name__ == '__main__':
print(sys.argv)
if len(sys.argv) == 2 and sys.argv[1] == "show":
show_password()
if len(sys.argv) == 3 and sys.argv[1] == "add":
add_password(sys.argv[2])
"""
p039:获取外网ip
"""
import requests
import json
if __name__ == '__main__':
url = "http://httpbin.org/ip"
r = requests.get(url)
# print(r.text)
ip = json.loads(r.text)["origin"]
print("我的外网IP:", ip)