1.读取文件
1.1安装库文件
pip install openpyxl
pip install xlrd
1.2加载excel到df
Index_col = 0 设置为使第一列索引
df = pd.read_excel('./data/sample.xlsx', index_col=None)
读取所有sheet
df_sheet_multi = pd.read_excel('问答.xlsx', sheet_name=None, index_col=None, header=None)
# keys=[i for i in df_sheet_multi.keys()]
keys=list(df_sheet_multi)
读取指定sheet,按名称和按索引
df_sheet_multi['sheet2']
print(df_sheet_multi[list(df_sheet_multi)[0]])
2.读取指定列
sheet=df_sheet_multi['python面试']
columns=list(sheet.columns)
for i in columns:
c=sheet[i]
c=c.replace(np.nan, '')
f_4=c[0:4].values
3.集合运算
s.add(1) ###增加一个元素
s.update([5,2,3,6]) ###添加多个元素
s1 = {1,2,3}
s2 = {2,3,4}
print('交集:',s1.intersection(s2))
print('交集:',s1 & s2)
print('并集',s1.union(s2))
print('并集',s1 | s2)
print('差集',s1.difference(s2)) ###s1 - (s1&s2)
print('差集',s2.difference(s1)) ###s2 - (s1&s2)
4.csv追加数据,读取数据
4.1
def write_csv():
path = "aa.csv"
with open(path,'a+') as f:
csv_write = csv.writer(f)
data_row = ["1","2"]
csv_write.writerow(data_row)
def read_csv():
path = "aa.csv"
with open(path,"rb") as f:
csv_read = csv.reader(f)
for line in csv_read:
print line
读取指定列
sheet = pd.read_csv(path)
columns=list(sheet.columns)
for i in columns:
c=sheet[i]
# sheet[columns[i]].values
4.2两列表转df;一行行读取数据
from pandas.core.frame import DataFrame
a=[[1,2,3,4],[5,6,7,8]]#包含两个不同的子列表[1,2,3,4]和[5,6,7,8]
data=DataFrame(a)#这时候是以行为标准写入的
data=data.T
# print(data)
for index, row in data.iterrows():
print(row.values[1])
5.字符串排除
if '业主' in '' or '手机号' in '' or '' in {} or '' in {}:
continue
else:
插入数据库
6.插入数据库
import pymysql
# 打开数据库连接
try:
db = pymysql.connect(host='localhost', user='root', passwd='666666', port=3306, db='Mysql8')
print('连接成功!')
except:
print('something wrong!')
# 使用 cursor() 方法创建一个游标对象 cursor
cursor = db.cursor()
# SQL 插入语句
sql = """INSERT INTO EMPLOYEE(FIRST_NAME,
LAST_NAME, AGE, SEX, INCOME)
VALUES ('Mac', 'Mohan', 20, 'M', 2000)"""
try:
# 执行sql语句
cursor.execute(sql)
# 提交到数据库执行
db.commit()
print('数据插入成功!')
except:
# 如果发生错误则回滚
db.rollback()
print('数据插入错误!')
# 关闭数据库连接
db.close()
import datetime
import jieba
import psycopg2
if __name__ == '__main__':
conn = psycopg2.connect(
database="book",
user='postgres',
password='123456',
host='127.0.0.1',
port='5432'
)
cursor = conn.cursor()
cursor.execute('select title,id from library3 ;')
mydata = cursor.fetchall()
cursor.close()
with open(r'./stopwords/hit_stopwords.txt', 'r', encoding='utf8') as f:
stopword = set([i for i in f.read().split('\n')])
cursor = conn.cursor()
for i in mydata:
# print(i[0])
# print([j for j in jieba.cut_for_search(i[0]) if j.strip() and j.strip() not in stopword])
l = [j for j in jieba.cut_for_search(i[0]) if j.strip() and j.strip() not in stopword]
# l=[j for j in jieba.cut(i[0],cut_all=True) if j.strip() and j.strip() not in stopword]
ll = [f'"{k}":{v + 1}' for v, k in enumerate(l)]
# print(ll)
query = (f"UPDATE library3 SET title_word= %s WHERE id = %s;")
cursor.execute(query, tuple([' '.join(ll), i[1]]))
cursor.close()
conn.commit()
conn.close()