Python数据化运营之读取数据一_python中readdata函数-CSDN博客

本文链接：https://blog.csdn.net/qq_42169061/article/details/104854982

1.从文本文件读取运营数据

1.1使用read、readline、readlines读取数据

file_name = 'text.txt'

file_object = open(file_name)
read_data = file_object.read()
print(read_data)

file_object = open(file_name)
readline_data = file_object.readline()
print(readline_data)

file_object = open(file_name)
readlines_data = file_object.readlines()
print(readlines_data)

fn = open('text.txt')  # 获得文件对象
print(fn.tell())  # 输出指针位置
line1 = fn.readline()  # 获得文件第一行数据
print(line1)  # 输出第一行数据
print(fn.tell())  # 输出指针位置
line2 = fn.readline()  # 获得文件第二行数据
print(line2)  # 输出第二行数据
print(fn.tell())  # 输出指针位置
fn.close()  # 关闭文件对象

1.2. 使用Numpy的loadtxt、load、fromfile读取数据

import numpy as np  # 导入numpy库

file_name = 'numpy_data.txt'  # 定义数据文件
data = np.loadtxt(file_name, dtype='float32', delimiter=' ')  # 获取数据
print(data)  # 打印数据

import numpy as np  # 导入numpy库

write_data = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]])  # 定义要存储的数据
np.save('load_data', write_data)  # 保存为npy数据文件
read_data = np.load('load_data.npy')  # 读取npy文件
print(read_data)  # 输出读取的数据

import numpy as np  # 导入numpy库

file_name = 'numpy_data.txt'  # 定义数据文件
data = np.loadtxt(file_name, dtype='float32', delimiter=' ')  # 获取数据
tofile_name = 'binary'  # 定义导出二进制文件名
data.tofile(tofile_name)  # 导出二进制文件
fromfile_data = np.fromfile(tofile_name, dtype='float32')  # 读取二进制文件
print(fromfile_data)  # 打印数据

1.3 使用Pandas的read_csv、read_fwf、read_table读取数据

import pandas as pd  # 导入Pandas库

csv_data = pd.read_csv('csv_data.csv', names=['col1', 'col2', 'col3', 'col4', 'col5'])  
# 读取csv数据
print(csv_data)  # 打印输出数据

import pandas as pd  # 导入Pandas库

fwf_data = pd.read_fwf('fwf_data', widths=[5, 5, 5, 5], names=['col1', 'col2', 'col3', 'col4'])  # 读取csv数据
print(fwf_data)  # 打印输出数据

import pandas as pd  # 导入Pandas库

table_data = pd.read_table('table_data.txt', sep=';', names=['col1', 'col2', 'col3', 'col4', 'col5'])  # 读取csv数据
print(table_data)  # 打印输出数据

2.从Excel获取运营数据

# 导入库
import xlrd

# 打开文件
xlsx = xlrd.open_workbook('demo.xlsx')
# 查看所有sheet列表
print('All sheets: %s' % xlsx.sheet_names())

# 查看sheet1的数据概况
sheet1 = xlsx.sheets()[0]  # 获得第一张sheet，索引从0开始
sheet1_name = sheet1.name  # 获得名称
sheet1_cols = sheet1.ncols  # 获得列数
sheet1_nrows = sheet1.nrows  # 获得行数
print('Sheet1 Name: %s\nSheet1 cols: %s\nSheet1 rows: %s' % (sheet1_name, sheet1_cols, sheet1_nrows))

# 查看sheet1的特定切片数据
sheet1_nrows4 = sheet1.row_values(4)  # 获得第4行数据
sheet1_cols2 = sheet1.col_values(2)  # 获得第2列数据
cell23 = sheet1.row(2)[3].value  # 查看第3行第4列数据
print('Row 4: %s\nCol 2: %s\nCell 1: %s\n' % (sheet1_nrows4, sheet1_cols2, cell23))

# 查看sheet1的数据明细
for i in range(sheet1_nrows):  # 逐行打印sheet1数据
    print(sheet1.row_values(i))

3.从关系型数据库MySQL读取运营数据

import pymysql  # 导入库

config = {'host': '127.0.0.1',  # 默认127.0.0.1
          'user': 'root',  # 用户名
          'password': '123456',  # 密码
          'port': 3306,  # 端口，默认为3306
          'database': 'python_data',  # 数据库名称
          'charset': 'utf8'  # 字符编码
          }
cnn = pymysql.connect(**config)  # 建立mysql连接
cursor = cnn.cursor()  # 获得游标
sql = "SELECT * FROM `order`"  # SQL语句
cursor.execute(sql)  # 执行sql语句
data = cursor.fetchall()  # 通过fetchall方法获得数据
for i in data[:2]:  # 打印输出前2条数据
    print(i)
cursor.close()  # 关闭游标
cnn.close()  # 关闭连接

4.从非关系型数据库MongoDB读取运营数据

from pymongo import MongoClient  # 导入库

client = MongoClient('10.0.0.54', 27017)  # 输出实际的服务器IP地址以及端口，建立连接，并初始化环境变量
db = client.python_data  # 选择test_py库
orders = db.ordersets  # 选择orders集合
terms = [{"user": "tony", "id": "31020", "age": "30", "products": ["215120", "245101", "128410"],
          "date": "2017-04-06"},
         {"user": "lucy", "id": "32210", "age": "29", "products": ["541001", "340740", "450111"],
          "date": "2017-04-06"}]  # 定义一条数据集合用于插入
orders.insert_many(terms)  # 插入数据
print(orders.find_one())  # 获取一文档数据
for i in orders.find():  # 获取所有文档数据并展示
    print(i)