# 定义标题与窗口大小
self.setWindowTitle("数据清洗")
self.setGeometry(100, 100, 1280, 720)
self.stacked_widget = QStackedWidget(self)
self.setCentralWidget(self.stacked_widget)
self.label = QLabel(datetime.now().strftime("%Y-%m-%d"), self)
self.label.setGeometry(10, 5, 130, 50)
self.label.setFont(QFont("等线", 18)) # 设置字体和字号
self.clean_data = QPushButton('数据清洗', self)
self.clean_data.setFont(QFont('等线', 11))
self.clean_data.setGeometry(155, 10, 130, 30)
self.clean_data.clicked.connect(self.show_data_cleaning)
self.zip_data = QPushButton('压缩/解压', self)
self.zip_data.setFont(QFont('等线', 11))
self.zip_data.setGeometry(300, 10, 130, 30)
self.zip_data.clicked.connect(self.show_zip_page)
self.zip_data = QPushButton('合并excel', self)
self.zip_data.setFont(QFont('等线', 11))
self.zip_data.setGeometry(445, 10, 130, 30)
self.zip_data.clicked.connect(self.select_directory)
self.line = SeparatorLine(self)
self.line.setGeometry(150, 0, 2, 10000) # 添加垂直分割线
self.line_horizontal = SeparatorLine1(self)
# noinspection PyTypeChecker
self.line_horizontal.setGeometry(150, self.height() / 8, 10000, 2) # 添加水平分割线
def show_data_cleaning(self):
if not self.data_cleaning:
self.data_cleaning = DataCleaning()
self.stacked_widget.addWidget(self.data_cleaning)
self.stacked_widget.setCurrentWidget(self.data_cleaning)
def show_zip_page(self):
if not self.Compression:
self.Compression = CompressionDecompression()
self.stacked_widget.addWidget(self.Compression)
self.stacked_widget.setCurrentWidget(self.Compression)
def select_directory(self):
if not self.file_directory:
self.file_directory = SelectDirectory()
self.stacked_widget.addWidget(self.file_directory)
self.stacked_widget.setCurrentWidget(self.file_directory)
清洗
class DataCleaning(Layout):
def init(self):
super().init()
self.save_path = None
self.file_path = None
self.layout = QVBoxLayout()
self.progress_bar = QProgressBar(self)
self.progress_bar.setGeometry(155, 235, 668, 28)
self.progress_bar.setVisible(False)
self.button_select = QPushButton("打开", self)
self.button_select.setFont(QFont('等线', 11))
self.button_select.setGeometry(10, 60, 130, 30)
self.button_select.clicked.connect(self.select_file)
self.button_save = QPushButton("另存为", self)
self.button_save.setFont(QFont('等线', 11))
self.button_save.setGeometry(10, 95, 130, 30)
self.button_save.clicked.connect(self.save_file)
self.button_run = QPushButton("开始清洗", self)
self.button_run.setFont(QFont('等线', 11))
self.button_run.setGeometry(10, 130, 130, 30)
self.button_run.clicked.connect(self.run)
self.button = QPushButton("清洗附加选项", self)
self.button.setFont(QFont('等线', 11))
self.button.setGeometry(155, 60, 130, 30)
self.button.clicked.connect(self.button_clicked)
self.label_1_1 = QLabel(self)
self.label_1_1.setGeometry(290, 50, 700, 50)
self.label_1_1.setFont(QFont("等线", 14)) # 设置字体和字号
self.label_1_1.setVisible(False)
self.input_text = QLineEdit(self) # 创建一个输入框
self.input_text.setGeometry(155, 125, 130, 30)
self.input_text.setVisible(False)
self.input_text.setValidator(QIntValidator())
self.label_info = QLabel("清洗指定列", self) # 创建一个标签
self.label_info.setFont(QFont('等线', 11))
self.label_info.setGeometry(155, 95, 130, 30) # 设置标签位置和大小
self.label_info.setVisible(False) # 初始时隐藏标签
self.label_info_1 = QLabel("根据指定列去重", self)
self.label_info_1.setFont(QFont('等线', 11))
self.label_info_1.setGeometry(155, 155, 130, 30)
self.label_info_1.setVisible(False)
self.input_text_1 = QLineEdit(self)
self.input_text_1.setGeometry(155, 185, 130, 30)
self.input_text_1.setVisible(False)
self.input_text_1.setValidator(QIntValidator())
# 指示灯
self.red_light = QLabel(self)
self.red_light.setGeometry(10, 170, 20, 20)
self.red_light.setStyleSheet("background-color: red")
self.red_light.setVisible(False)
self.green_light = QLabel(self)
self.green_light.setGeometry(35, 170, 20, 20)
self.green_light.setStyleSheet("background-color: green")
self.green_light.setVisible(False)
self.line_horizontal_1 = SeparatorLine1(self)
self.line_horizontal_1.setGeometry(150, 225, 10000, 2)
self.line_horizontal_1.setVisible(False)
def button_clicked(self):
self.label_1_1.setText('(温馨提示,下方的输入框只能输入数字,以下参数非必须参数,可不填)')
self.input_text.setVisible(not self.input_text.isVisible()) # 切换输入框的可见性
self.label_info.setVisible(self.input_text.isVisible())
self.input_text_1.setVisible(not self.input_text_1.isVisible()) # 切换输入框的可见性
self.label_info_1.setVisible(self.input_text_1.isVisible())
self.label_1_1.setVisible(not self.label_1_1.isVisible())
self.label_1_1.setVisible(self.label_1_1.isVisible())
def select_file(self): # 选择文件
file_dialog = QFileDialog()
file_path, _ = file_dialog.getOpenFileName(self, "选择文件", "", "All Files (*)")
if file_path:
self.file_path = file_path
def save_file(self): # 保存文件
file_dialog = QFileDialog()
file_dialog.setWindowTitle("另存为") # 修改对话框标题
file_dialog.setNameFilters(["Text Files (*.txt)", "CSV Files (*.csv)",
"Excel Files (*.xlsx *.xls)", "JSON Files (*.json *.jsonl)"])
file_dialog.selectNameFilter("Excel Files (*.xlsx")
if file_dialog.exec():
self.save_path = file_dialog.selectedFiles()[0]
if os.path.exists(self.save_path):
# noinspection PyUnresolvedReferences
result = QMessageBox.warning(self, "警告", "文件已存在,是否覆盖?",
QMessageBox.Yes | QMessageBox.No)
# noinspection PyUnresolvedReferences
if result == QMessageBox.Yes:
bag.Bag.save_excel([], self.save_path)
else:
self.save_path = ''
else:
bag.Bag.save_excel([], self.save_path)
def run(self): # 开始清洗
resp = judge(self.file_path)
total = len(resp)
def clean_1(ls, col_number, col_name):
self.red_light.setVisible(True)
result = []
for i, info in enumerate(tqdm(ls)):
mid = []
for value in info[col_number].split('\n'):
if re.sub(r'\s', '', value):
text = value.lstrip(',.?!;:,。?;:')
text1 = text.strip()
text2 = emoji.replace_emoji(text1, replace='<emoji>')
mid.append(text2)
else:
pass
info[col_number] = '\n'.join(mid)
result.append(info)
# 更新进度条的值
progress = int((i + 1) / total * 100)
self.progress_bar.setValue(progress)
QApplication.processEvents() # 刷新界面
sign = []
new_result = [item for item in result if item[col_name] not in sign and not sign.append(item[col_name])]
self.red_light.setVisible(False)
self.green_light.setVisible(True)
if bool(self.save_path):
bag.Bag.save_excel(new_result, self.save_path)
else:
self.save_file()
bag.Bag.save_excel(new_result, self.save_path)
time.sleep(1)
self.green_light.setVisible(False)
def clean_2(ls, col_number):
self.red_light.setVisible(True)
result = []
for i, info in enumerate(tqdm(ls)):
mid = []
for value in info[col_number].split('\n'):
if re.sub(r'\s', '', value):
text = value.lstrip(',.?!;:,。?;:')
text1 = text.strip()
text2 = emoji.replace_emoji(text1, replace='<emoji>')
mid.append(text2)
else:
pass
info[col_number] = '\n'.join(mid)
result.append(info)
progress = int((i + 1) / total * 100)
self.progress_bar.setValue(progress)
QApplication.processEvents() # 刷新界面
self.red_light.setVisible(False)
self.green_light.setVisible(True)
if bool(self.save_path):
bag.Bag.save_excel(result, self.save_path)
else:
self.save_file()
bag.Bag.save_excel(result, self.save_path)
time.sleep(1)
self.green_light.setVisible(False)
def clean_3(ls, col_name):
self.red_light.setVisible(True)
result = []
for i, info in enumerate(tqdm(ls)):
mid1 = []
for info1 in info:
mid = []
for value in info1.split('\n'):
if re.sub(r'\s', '', value):
text = value.lstrip(',.?!;:,。?;:')
text1 = text.strip()
text2 = emoji.replace_emoji(text1, replace='<emoji>')
mid.append(text2)
else:
pass
mid1.append('\n'.join(mid))
result.append(mid1)
progress = int((i + 1) / total * 100)
self.progress_bar.setValue(progress)
QApplication.processEvents() # 刷新界面
sign = []
new_result = [item for item in result if item[col_name] not in sign and not sign.append(item[col_name])]
self.red_light.setVisible(False)
self.green_light.setVisible(True)
if bool(self.save_path):
bag.Bag.save_excel(new_result, self.save_path)
else:
self.save_file()
bag.Bag.save_excel(new_result, self.save_path)
time.sleep(1)
self.green_light.setVisible(False)
def clean_4(ls):
self.red_light.setVisible(True)
result = []
for i, info in enumerate(tqdm(ls)):
mid1 = []
for info1 in info:
mid = []
for value in info1.split('\n'):
if re.sub(r'\s', '', value):
text = value.lstrip(',.?!;:,。?;:')
text1 = text.strip()
text2 = emoji.replace_emoji(text1, replace='<emoji>')
mid.append(text2)
else:
pass
mid1.append('\n'.join(mid))
result.append(mid1)
progress = int((i + 1) / total * 100)
self.progress_bar.setValue(progress)
QApplication.processEvents() # 刷新界面
self.red_light.setVisible(False)
self.green_light.setVisible(True)
if bool(self.save_path):
bag.Bag.save_excel(result, self.save_path)
else:
self.save_file()
bag.Bag.save_excel(result, self.save_path)
time.sleep(1)
self.green_light.setVisible(False)
additional_options = self.input_text.text() # 获取输入框的文本内容
additional_options_1 = self.input_text_1.text()
col_num = additional_options
tag_name = additional_options_1
"""判断是否夹带清洗附加条件"""
target = bool(col_num)
target_1 = bool(tag_name)
# 进度条
self.progress_bar.setVisible(True)
self.line_horizontal_1.setVisible(True)
# 处理异常
try:
if target and target_1: # 清洗指定列以及根据某某标签去重
clean_1(resp, int(col_num)-1, int(tag_name)-1)
elif target and not target_1: # 清洗子指定列,不去重
clean_2(resp, int(col_num)-1)
elif not target and target_1: # 清洗全文再根据某某列去重
clean_3(resp, int(tag_name)-1)
else: # 清洗全文,不去重
clean_4(resp)
QMessageBox.information(self, "完成", "数据清洗完成")
except Exception as e:
error_message = str(e)
QMessageBox.critical(self, "错误", error_message)
压缩、解压
class CompressionDecompression(Layout):
def init(self):
super().init()
self.layout = QVBoxLayout()
self.file_path = None
self.button_compress = QPushButton("压缩文件", self)
self.button_compress.setFont(QFont('等线', 11))
self.button_compress.setGeometry(10, 60, 130, 30)
self.button_compress.clicked.connect(self.on_button_clicked)
self.button_extract = QPushButton("解压文件", self)
self.button_extract.setFont(QFont('等线', 11))
self.button_extract.setGeometry(10, 95, 130, 30)
self.button_extract.clicked.connect(self.on_button_clicked1)
def on_button_clicked(self):
selected_file = bool(self.file_path)
if selected_file:
extract_path = QFileDialog.getExistingDirectory(self, "另存为")
if bool(extract_path):
unzip_file(selected_file, extract_path)
QMessageBox.information(self, 'success', '解压成功!')
else:
QMessageBox.critical(self, 'error', '解压失败')
else:
self.select_file()
extract_path = QFileDialog.getExistingDirectory(self, "另存为")
if bool(extract_path):
unzip_file(self.file_path, extract_path)
QMessageBox.information(self, 'success', '解压成功!')
else:
QMessageBox.critical(self, 'error', '解压失败')
def on_button_clicked1(self):
selected_file = bool(self.file_path)
if selected_file:
extract_path = QFileDialog.getExistingDirectory(window, "选择文件夹", options=QFileDialog.ShowDirsOnly)
if bool(extract_path):
self.compress_to_zip(selected_file, extract_path)
QMessageBox.information(self, 'success', '压缩成功!')
else:
QMessageBox.critical(self, 'error', '压缩失败')
else:
self.select_folder()
extract_path = QFileDialog.getExistingDirectory(window, "选择文件夹", options=QFileDialog.ShowDirsOnly)
if bool(extract_path):
self.compress_to_zip(self.file_path, extract_path)
QMessageBox.critical(self, 'success', '压缩成功!')
else:
QMessageBox.critical(self, 'error', '压缩失败')
def select_file(self): # 选择文件
file_dialog = QFileDialog()
file_path, _ = file_dialog.getOpenFileName(self, "选择文件", "", "All Files (*)")
if file_path:
一、Python所有方向的学习路线
Python所有方向的技术点做的整理,形成各个领域的知识点汇总,它的用处就在于,你可以按照下面的知识点去找对应的学习资源,保证自己学得较为全面。
二、Python必备开发工具
工具都帮大家整理好了,安装就可直接上手!
三、最新Python学习笔记
当我学到一定基础,有自己的理解能力的时候,会去阅读一些前辈整理的书籍或者手写的笔记资料,这些笔记详细记载了他们对一些技术点的理解,这些理解是比较独到,可以学到不一样的思路。
四、Python视频合集
观看全面零基础学习视频,看视频学习是最快捷也是最有效果的方式,跟着视频中老师的思路,从基础到深入,还是很容易入门的。
五、实战案例
纸上得来终觉浅,要学会跟着视频一起敲,要动手实操,才能将自己的所学运用到实际当中去,这时候可以搞点实战案例来学习。
六、面试宝典
简历模板![在这里插入图片描述](https://img-blog.csdnimg.cn/646863996ac44da8af500c049bb72fbd.png#pic_center)
网上学习资料一大堆,但如果学到的知识不成体系,遇到问题时只是浅尝辄止,不再深入研究,那么很难做到真正的技术提升。
一个人可以走的很快,但一群人才能走的更远!不论你是正从事IT行业的老鸟或是对IT行业感兴趣的新人,都欢迎加入我们的的圈子(技术交流、学习资源、职场吐槽、大厂内推、面试辅导),让我们一起学习成长!