【数据准备和特征工程】1-1感知文件中的数据

1.1.1 CSV文件

path = "../data/cities.csv" # 随便找的一个csv文件
import csv
f = open(path)
data = csv.reader(f)    # ①
for line in data:
    print(line)
['name', ' area', ' population', ' longd', ' latd']
['Nanjing', ' 6582.31', ' 8004680', ' 118.78', ' 32.04']
['Wuxi', ' 4787.61', ' 6372624', ' 120.29', ' 31.59']
['Xuzhou', ' 11764.88', ' 8580500', ' 117.2', ' 34.26']
['Changzhou', ' 4384.57', ' 4591972', ' 119.95', ' 31.79']
['Soochow', ' 8488.42', ' 10465994', ' 120.62', ' 31.32']
['Nantong', ' 8001', ' 7282835', ' 120.86', ' 32.01']
['Lianyungang', ' 7615.29', ' 4393914', ' 119.16', ' 34.59']
['Huaian', ' 9949.97', ' 4799889', ' 119.15', ' 33.5']
['Yancheng', ' 16972.42', ' 7260240', ' 120.13', ' 33.38']
['Yangzhou', ' 6591.21', ' 4459760', ' 119.42', ' 32.39']
['Zhenjiang', ' 3840.32', ' 3113384', ' 119.44', ' 32.2']
['Taizhou', ' 5787.26', ' 4618558', ' 119.9', ' 32.49']
['Suqian', ' 8555', ' 4715553', ' 118.3', ' 33.96']
import pandas as pd
# 使用pandas库读取csv文件
df = pd.read_csv(path)
df
nameareapopulationlongdlatd
0Nanjing6582.318004680118.7832.04
1Wuxi4787.616372624120.2931.59
2Xuzhou11764.888580500117.2034.26
3Changzhou4384.574591972119.9531.79
4Soochow8488.4210465994120.6231.32
5Nantong8001.007282835120.8632.01
6Lianyungang7615.294393914119.1634.59
7Huaian9949.974799889119.1533.50
8Yancheng16972.427260240120.1333.38
9Yangzhou6591.214459760119.4232.39
10Zhenjiang3840.323113384119.4432.20
11Taizhou5787.264618558119.9032.49
12Suqian8555.004715553118.3033.96

1.1.2 Excel文件

path = "../data/多分类鸢尾花.xlsx"
flower_df = pd.read_excel(path)
flower_df.head()
萼片长萼片宽花瓣长花瓣宽种类
05.03.31.40.2山鸢尾
16.73.14.41.4变色鸢尾
26.32.74.91.8维吉尼亚鸢尾
34.42.91.40.2山鸢尾
47.72.66.92.3维吉尼亚鸢尾

1.1.3 图形文件

# 使用pillow库
from PIL import Image    # ○16

color_image = Image.open("../data/starryNight.jpg")    # ○17
color_image


png

从url中读取

Image.open的参数可以是文件路径fp,也可以的字节数组bytes,下面演示从图床中读取文件并展示

import requests
from io import BytesIO
req = requests.get("https://pic-1257412153.cos.ap-nanjing.myqcloud.com/beautiful/大炎,赦封神明!_87744911.jpg")
if req.status_code != 200:
    print("图片请求错误,请尝试加入header")
byte_img = BytesIO(req.content)
img = Image.open(byte_img)
img


png

使用opencv读取图片

读取方式:imread(filename, flag)

cv::ImreadModes {
cv::IMREAD_UNCHANGED = -1,
cv::IMREAD_GRAYSCALE = 0,
cv::IMREAD_COLOR = 1,
cv::IMREAD_ANYDEPTH = 2,
cv::IMREAD_ANYCOLOR = 4,
cv::IMREAD_LOAD_GDAL = 8,
cv::IMREAD_REDUCED_GRAYSCALE_2 = 16,
cv::IMREAD_REDUCED_COLOR_2 = 17,
cv::IMREAD_REDUCED_GRAYSCALE_4 = 32,
cv::IMREAD_REDUCED_COLOR_4 = 33,
cv::IMREAD_REDUCED_GRAYSCALE_8 = 64,
cv::IMREAD_REDUCED_COLOR_8 = 65,
cv::IMREAD_IGNORE_ORIENTATION = 128
}

# 使用opencv库查看图片
import cv2
# 得到的是numpy数组
img = cv2.imread("../data/starryNight.jpg", -1) # 不改变
img
array([[[ 53,  90, 104],
        [ 31,  34,  39],
        [ 54,  39,  36],
        ...,
        [124, 161, 183],
        [138, 170, 189],
        [122, 150, 167]],

       [[ 65,  89, 101],
        [ 32,  20,  26],
        [ 54,  25,  21],
        ...,
        [138, 172, 195],
        [134, 166, 185],
        [132, 159, 179]],

       [[ 79,  85,  96],
        [ 53,  26,  30],
        [ 84,  37,  33],
        ...,
        [121, 156, 176],
        [131, 162, 183],
        [134, 164, 183]],

       ...,

       [[ 90, 149, 164],
        [121, 128, 148],
        [110, 129, 144],
        ...,
        [134, 171, 193],
        [116, 154, 178],
        [140, 177, 203]],

       [[ 99, 174, 183],
        [ 86, 105, 120],
        [121, 146, 156],
        ...,
        [114, 148, 171],
        [103, 139, 163],
        [144, 179, 205]],

       [[ 69, 149, 160],
        [102, 125, 140],
        [136, 159, 167],
        ...,
        [ 99, 133, 156],
        [102, 136, 159],
        [142, 176, 200]]], dtype=uint8)
import matplotlib.pyplot as plt
%matplotlib inline
# %config InlineBackend.figure_format = 'svg'
plt.imshow(img)
plt.xticks([]), plt.yticks([])
(([], []), ([], []))

在这里插入图片描述

注意到图片并不是我们想要的效果

这是因为:opencv的接口使用BGR,而matplotlib.pyplot 则是RGB模式

b,g,r = cv2.split(img)
img2 = cv2.merge([r,g,b])

plt.imshow(img2)
plt.xticks([]), plt.yticks([])
plt.show()

在这里插入图片描述

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值