xlsx转csv
import pandas as pd
def xlsx_to_csv_pd(filename):
data_xls = pd.read_excel(filename, index_col=0)
csv_filename = filename[:-5] + '.csv' # csv文件全名
data_xls.to_csv(csv_filename, encoding='utf-8')
if __name__ == '__main__':
filename = "xxxxxxxx.xlsx" # xlsx文件全名
xlsx_to_csv_pd(filename)
xlsx写入txt
import pandas as pd
sheet_name_list = ["2月28日","3月1日", "3月2日"]
with open("title.txt", "w", encoding="utf-8") as f:
for i in range(len(sheet_name_list)):
# 打开xlsx文件读取数据,指定sheet
data = pd.read_excel('./2月25日至3月4日模板.xlsx',
sheet_name=sheet_name_list[i])
for index, row in data.iterrows():
title = row["标题"]
f.write(title+"\n")
合并文件夹内的所有xlsx文件
from tqdm import tqdm
import os
import pandas as pd
def get_data(path):
df_list = []
for file in tqdm(os.listdir(path)):
file_path = os.path.join(path, file)
df = pd.read_csv(file_path, engine='python', encoding='utf-8')
df_list.append(df)
df = pd.concat(df_list)
return df
TEST_PATH = 'xxx/xxx/xxxxx'
test_df = get_data(TEST_PATH)
test_df.to_csv(path_or_buf="test.csv", index=False)
list中所有元素串成一个字符串
list_str = ['我','爱', '学习']
s = ''.join(list_str)
pritn(s)
list中的元素都变成字符串
list_num = [2, 3, 4]]
list_str = [str(i) for i in list_num]
beautifulsoup获取网页正文
import requests
from bs4 import BeautifulSoup
newsurl = "https://baijiahao.baidu.com/s?id=1692067102120399651&wfr=spider&for=pc"
res = requests.get(newsurl)
res.encoding = 'utf-8'
#print(res.text)
soup = BeautifulSoup(res.text, 'html.parser')
article = []
for p in soup.select('.bjh-p')[:-1]:
article.append(p.text.strip())
print(article)
''.join(article)
参考链接:python爬虫——获取正文内容