文本文件的数据存取
1.CSV
import csv
#读
with open("TXT_COMMA.txt") as cf:
lines=csv.reader(cf)
for line in lines:
print(line)
#写
headers=['A','B']
rows=[(1,4),(2,5),(3,6)]
f=open("TXT_COMMA2.txt ",'a+')
wf=csv.writer(f)
wf.writerow(headers)
wf.writerows(rows)
f.close()
2.Numpy
import numpy
#读
A=numpy.loadtxt("TXT_COMMA.txt", dtype=str, delimiter=",", unpack=False)
print(A)
#写
data=[['A','B'],['1','2'],['3','4'],['5','6']]
numpy.savetxt("TXT_COMMA3.txt", data, delimiter=",", newline="\r\n", fmt="%s,%s")
3.Pandas
import pandas as pd
#读
A=pd.read_csv('TXT_COMMA.txt')
print(A)
#写
A=pd.read_csv('TXT_COMMA.txt')
A.to_csv('TXT_COMMA4.txt')
4.JSON
import json
#写
obj = [[1,2,3],123,123.123,'abc',{'key1':(1,2,3),'key2':(4,5,6)}]
fp = open('test.json', 'w')
json.dump(obj, fp)
fp.close()
#读
obj = json.load(open('test.json'))
print(obj)
#读
import pandas as pd
A=pd.read_json('test2.json')
print(A)
#写
A=pd.DataFrame([['a',123],['b',456],['c',789]])
fp = open('test3.json', 'w')
fp.write(A.to_json(force_ascii=False))
fp.close()
Excel文件的数据存取
import xlwings as xw
#读
app=xw.App(visible=True, add_book=False)
app.display_alerts=False
app.screen_updating=False
wb=app.books.open('test.xlsx')
data=app.books[0].sheets[0].range('A1:D6').value
print(data)
wb.close()
app.quit()
#写
import xlwings as xw
xw.Book()
sht=xw.books[0].sheets.active
# 将1,2,3分别写入A1,B1,C1单元格中
sht.range('A1').value=[1,2,3]
# 将1,2,3分别写入A1,A2,A3单元格中
sht.range('A1').options(transpose=True).value=[1,2,3]
xw.books[0].save('test2.xlsx')
xw.books[0].close()
wb = xw.Book()
sht = wb.sheets[0]
info_list = [['20190001','已揽收','Beijing'],
['20190002','已发货','Shanghai'],
['20191234','已揽收','Tianjin'],
['20192234','已发货','Chengdu'],
['20195678','正在派送','Chongqing']]
titles = [['包裹号','状态','地点']]
sht.range('a1').value = titles
sht.range('a2').value = info_list
wb.save('test2.xlsx')
爬虫
from bs4 import BeautifulSoup
import requests
import pandas as pd
content = requests.get('http://www.zol.com.cn/').content
soup = BeautifulSoup(content, 'html.parser', from_encoding='utf-8')
ul = soup.find(id="focus_news_1").find("ul", class_="news-list")
text = []
href = []
for item in ul.find_all('a'):
text.append(item.string)
href.append(item.get('href'))
print(text)
print(href)
df = pd.DataFrame({'Text': pd.Series(text), 'Href': pd.Series(href)})
df.to_csv('zol_TopNews.csv', encoding='utf-8-sig')
MySQL
import mysql.connector
#数据库的连接
try:
con=mysql.connector.connect(host='MySQL数据库服务器地址',port=3306,user='数据库用户名',password='数据库密码',database='数据库名称',charset='utf8')
print(con.connection_id)
con.close()
except mysql.connector.Error as e:
print(e)
#用字典方式连接
try:
config={
'host':'MySQL数据库服务器地址',
'port':3306,
'user':'数据库用户名',
'password':'数据库密码',
'database':'数据库名称',
'charset':'utf8'
}
con=mysql.connector.connect(**config)
print(con.connection_id)
cursor=con.cursor()
sql=("INSERT INTO USERS(name,age) VALUES ('Alice',20)")
cursor.execute(sql)
con.commit()
print(cursor.lastrowid)
#元祖
sql1=("INSERT INTO USERS(name,age) VALUES (%s,%s)")
data=('Bob',21)
cursor.execute(sql1,data)
con.commit()
print(cursor.lastrowid)
sql2=("INSERT INTO USERS(name,age) VALUES (%(name)s,%(age)s)")
data={'name':'Calvin', 'age':22}
cursor.execute(sql2,data)
con.commit()
print(cursor.lastrowid)
except connector.Error as e:
print(e)
#批量存储
sql=("INSERT INTO USERS(name,age) VALUES (%(name)s,%(age)s)")
data=[{'name':'Calvin', 'age':22},{'name':'Douglas', 'age':23},{'name':'Einstein', 'age':24}]
cursor.executemany(sql,data)
con.commit()
#取数据
sql=("select userid,name,age from users")
cursor.execute(sql)
result=cursor.fetchall()
for user in result:
print(user)
sql=("select userid,name,age from users where userid='4'")
cursor.execute(sql)
result=cursor.fetchone()
print(result)
#删除数据
sql=("delete from users where userid='4'")
cursor.execute(sql)
con.commit()
#更新数据
sql=("update users set name=%(name)s where userid=%(userid)s")
data={'userid':5, 'name': 'Doctor'}
cursor.execute(sql, data)
con.commit()
cursor.close()
con.close()
except connector.Error as e:
print(e)
ORM
from sqlalchemy import create_engine
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column, Integer, String
#建表
connect = create_engine("mysql+mysqlconnector://数据库用户名:数据库密码@MySQL数据库服务器地址:3306/数据库名称", encoding="utf-8", echo=True)
Base = declarative_base()
class Course(Base):
__tablename__ = "courses"
courseid = Column(Integer, primary_key=True)
title = Column(String(50))
Base.metadata.create_all(connect)
#添加数据
session_class = sessionmaker(bind=connect)
session = session_class()
course = Course(title="计算机文化基础")
session.add(course)
session.commit()
#查询数据
data = session.query(Course).filter_by(courseid="1").first()
print(str(data.courseid)+', '+data.title)
dataall = session.query(Course).filter(Course.courseid>1).all()
for course in dataall:
print(str(course.courseid)+', '+course.title)
#修改数据
data.title = "Python语言程序设计"
session.commit()
#删除数据
session.query(Course).filter(Course.courseid>2).delete()
session.commit()
来源《python数据分析与可视化》