文章目录
36. JPEG压缩- 第一步: 离散余弦变换(Discrete Cosine Transformation)(DCT)
离散余弦变换(Discrete Cosine Transformation)(DCT) 是使用下面的式子计算的频率变换:
离散余弦逆变换(Inverse Discrete Cosine Transformation)是离散余弦变换的逆变换,使用下式定义.
在这里,K是决定图像复原时分辨率高低的参数.K = T时,DCT的系数全部被保留,因此IDCT时分辨率最大.K=1或者K=2时,图像复原时的信息量(DCT系数)减少,分辨率降低.如果适当地设定K,可以减小文件大小.
在这里我们先将图像分割成88的小块,在各个小块中使用离散余弦变换编码,使用离散余弦逆变换解码,这就是JPEG
的编码过程.现在我们也同样地,把图像分割成88的小块,然后进行离散余弦变换和离散余弦逆变换.
# @Time : 2022/6/16 11:59
# @Author : Fioman
# @Phone : 13149920693
# @Tips : Talk is Cheap,Show me the code! ^_^^_^
from settings import *
import matplotlib.pyplot as plt
# DCT hyper-parameter
T = 8
K = 8
# DCT weight
def w(x,y,u,v):
cu = 1.
cv = 1.
if u == 0:
cu /= np.sqrt(2)
if v == 0:
cv /= np.sqrt(2)
theta = np.pi / (2 * T)
return (( 2 * cu * cv / T) * np.cos((2*x+1)*u*theta) * np.cos((2*y+1)*v*theta))
# DCT
def dct(image):
H,W,channel = image.shape
F = np.zeros((H,W,channel),dtype=np.float32)
for c in range(channel):
for yi in range(0, H, T):
for xi in range(0, W, T):
for v in range(T):
for u in range(T):
for y in range(T):
for x in range(T):
F[v + yi, u + xi, c] += image[y + yi, x + xi, c] * w(x, y, u, v)
return F
# IDCT
def idct(F):
H, W, channel = F.shape
out = np.zeros((H, W, channel), dtype=np.float32)
for c in range(channel):
for yi in range(0, H, T):
for xi in range(0, W, T):
for y in range(T):
for x in range(T):
for v in range(K):
for u in range(K):
out[y + yi, x + xi, c] += F[v + yi, u + xi, c] * w(x, y, u, v)
out = np.clip(out, 0, 255)
out = np.round(out).astype(np.uint8)
return out
imagePath = os.path.join(OPENCV_100_Q_PATH,"gray_02.bmp")
imageOriginal = cv.imread(imagePath).astype(np.float32)
# DCT
F = dct(imageOriginal)
# IDCT
out = idct(F)
cv.imshow("Dct",F)
cv.imshow("IDCT",out)
cv.waitKey(0)
37. PSNR
离散余弦逆变换中如果不使用8作为系数,而是使用4作为系数的话,图像的画质会变差.来求输入图像和刚经过离散余弦逆变换之后的图像的峰值信噪比吧!再求出离散余弦逆变换的比特率吧!
峰值信噪比(Peak Signal to Noise Ratio)缩写为PSNR,用来表示信号最大可能功率和影响它的表示精度的破坏性噪声功率的比值,可以显示图像画质损失程度.
峰值信噪比越大,表示画质损失越小.峰值信噪比通过下式定义.MAX表示图像点颜色的最大数值.如果取值范围是[0,255]的话,那么MAX的值就是255.MSE表示均方差(Mean Squared Error),用来表示两个图像各个像素点之间插值平方和的平均数:
如果我们进行 8 * 8的离散余弦变换,离散余弦逆变换的系数为KtimesK的话,比特率按下式定义:
import cv2
import numpy as np
import matplotlib.pyplot as plt
# DCT hyoer-parameter
T = 8
K = 4
channel = 3
# DCT weight
def w(x, y, u, v):
cu = 1.
cv = 1.
if u == 0:
cu /= np.sqrt(2)
if v == 0:
cv /= np.sqrt(2)
theta = np.pi / (2 * T)
return (( 2 * cu * cv / T) * np.cos((2*x+1)*u*theta) * np.cos((2*y+1)*v*theta))
# DCT
def dct(img):
H, W, _ = img.shape
F = np.zeros((H, W, channel), dtype=np.float32)
for c in range(channel):
for yi in range(0, H, T):
for xi in range(0, W, T):
for v in range(T):
for u in range(T):
for y in range(T):
for x in range(T):
F[v+yi, u+xi, c] += img[y+yi, x+xi, c] * w(x,y,u,v)
return F
# IDCT
def idct(F):
H, W, _ = F.shape
out = np.zeros((H, W, channel), dtype=np.float32)
for c in range(channel):
for yi in range(0, H, T):
for xi in range(0, W, T):
for y in range(T):
for x in range(T):
for v in range(K):
for u in range(K):
out[y+yi, x+xi, c] += F[v+yi, u+xi, c] * w(x,y,u,v)
out = np.clip(out, 0, 255)
out = np.round(out).astype(np.uint8)
return out
# MSE
def MSE(img1, img2):
H, W, _ = img1.shape
mse = np.sum((img1 - img2) ** 2) / (H * W * channel)
return mse
# PSNR
def PSNR(mse, vmax=255):
return 10 * np.log10(vmax * vmax / mse)
# bitrate
def BITRATE():
return 1. * T * K * K / T / T
# Read image
img = cv2.imread("imori.jpg").astype(np.float32)
# DCT
F = dct(img)
# IDCT
out = idct(F)
# MSE
mse = MSE(img, out)
# PSNR
psnr = PSNR(mse)
# bitrate
bitrate = BITRATE()
print("MSE:", mse)
print("PSNR:", psnr)
print("bitrate:", bitrate)
# Save result
cv2.imshow("result", out)
cv2.waitKey(0)
cv2.imwrite("out.jpg", out)
38. JPEG 压缩 - 第二步: 离散余弦变换 + 量化
量化离散余弦变换系数并使用 离散余弦逆变换恢复。再比较变换前后图片的大小。
量化离散余弦变换系数是用于编码 JPEG 图像的技术。
量化即在对值在预定义的区间内舍入,其中floor、ceil、round等是类似的计算。
在 JPEG 图像中,根据下面所示的量化矩阵量化离散余弦变换系数。该量化矩阵取自 JPEG 软件开发联合会组织颁布的标准量化表。在量化中,将8x 8的系数除以(量化矩阵) Q 并四舍五入。之后然后再乘以 Q 。对于离散余弦逆变换,应使用所有系数。
Q = np.array(((16, 11, 10, 16, 24, 40, 51, 61),
(12, 12, 14, 19, 26, 58, 60, 55),
(14, 13, 16, 24, 40, 57, 69, 56),
(14, 17, 22, 29, 51, 87, 80, 62),
(18, 22, 37, 56, 68, 109, 103, 77),
(24, 35, 55, 64, 81, 104, 113, 92),
(49, 64, 78, 87, 103, 121, 120, 101),
(72, 92, 95, 98, 112, 100, 103, 99)), dtype=np.float32)
由于量化降低了图像的大小,因此可以看出数据量已经减少。
from settings import *
import matplotlib.pyplot as plt
# DCT hyoer-parameter
T = 8
K = 4
channel = 3
# DCT weight
def DCT_w(x, y, u, v):
cu = 1.
cv = 1.
if u == 0:
cu /= np.sqrt(2)
if v == 0:
cv /= np.sqrt(2)
theta = np.pi / (2 * T)
return ((2 * cu * cv / T) * np.cos((2 * x + 1) * u * theta) * np.cos((2 * y + 1) * v * theta))
# DCT
def dct(img):
H, W, _ = img.shape
F = np.zeros((H, W, channel), dtype=np.float32)
for c in range(channel):
for yi in range(0, H, T):
for xi in range(0, W, T):
for v in range(T):
for u in range(T):
for y in range(T):
for x in range(T):
F[v + yi, u + xi, c] += img[y + yi, x + xi, c] * DCT_w(x, y, u, v)
return F
# IDCT
def idct(F):
H, W, _ = F.shape
out = np.zeros((H, W, channel), dtype=np.float32)
for c in range(channel):
for yi in range(0, H, T):
for xi in range(0, W, T):
for y in range(T):
for x in range(T):
for v in range(K):
for u in range(K):
out[y + yi, x + xi, c] += F[v + yi, u + xi, c] * DCT_w(x, y, u, v)
out = np.clip(out, 0, 255)
out = np.round(out).astype(np.uint8)
return out
# Quantization
def quantization(F):
H, W, _ = F.shape
Q = np.array(((16, 11, 10, 16, 24, 40, 51, 61),
(12, 12, 14, 19, 26, 58, 60, 55),
(14, 13, 16, 24, 40, 57, 69, 56),
(14, 17, 22, 29, 51, 87, 80, 62),
(18, 22, 37, 56, 68, 109, 103, 77),
(24, 35, 55, 64, 81, 104, 113, 92),
(49, 64, 78, 87, 103, 121, 120, 101),
(72, 92, 95, 98, 112, 100, 103, 99)), dtype=np.float32)
for ys in range(0, H, T):
for xs in range(0, W, T):
for c in range(channel):
F[ys: ys + T, xs: xs + T, c] = np.round(F[ys: ys + T, xs: xs + T, c] / Q) * Q
return F
# MSE
def MSE(img1, img2):
H, W, _ = img1.shape
mse = np.sum((img1 - img2) ** 2) / (H * W * channel)
return mse
# PSNR
def PSNR(mse, vmax=255):
return 10 * np.log10(vmax * vmax / mse)
# bitrate
def BITRATE():
return 1. * T * K * K / T / T
# Read image
imagePath = os.path.join(OPENCV_100_Q_PATH,"gray_02.bmp")
img = cv.imread(imagePath).astype(np.float32)
# DCT
F = dct(img)
# quantization
F = quantization(F)
# IDCT
out = idct(F)
# MSE
mse = MSE(img, out)
# PSNR
psnr = PSNR(mse)
# bitrate
bitrate = BITRATE()
print("MSE:", mse)
print("PSNR:", psnr)
print("bitrate:", bitrate)
# Save result
cv.imshow("result", out)
cv.waitKey(0)
cv.imwrite("out.jpg", out)
39. JPEG压缩- 第三步: YCbCr色彩空间
在 YCbCr 色彩空间内,将 Y 乘以0.7以使对比度变暗。
YCbCr 色彩空间是用于将图像由表示亮度的 Y、表示蓝色色度Cb以及表示红色色度Cr表示的方法。
这用于 JPEG 转换。
使用下式从 RGB 转换到 YCbCr:
使用下式从YCbCr转到RGB:
from settings import *
import matplotlib.pyplot as plt
channel = 3
# BGR -> Y Cb Cr
def BGR2YCbCr(img):
H, W, _ = img.shape
ycbcr = np.zeros([H, W, 3], dtype=np.float32)
ycbcr[..., 0] = 0.2990 * img[..., 2] + 0.5870 * img[..., 1] + 0.1140 * img[..., 0]
ycbcr[..., 1] = -0.1687 * img[..., 2] - 0.3313 * img[..., 1] + 0.5 * img[..., 0] + 128.
ycbcr[..., 2] = 0.5 * img[..., 2] - 0.4187 * img[..., 1] - 0.0813 * img[..., 0] + 128.
return ycbcr
# Y Cb Cr -> BGR
def YCbCr2BGR(ycbcr):
H, W, _ = ycbcr.shape
out = np.zeros([H, W, channel], dtype=np.float32)
out[..., 2] = ycbcr[..., 0] + (ycbcr[..., 2] - 128.) * 1.4020
out[..., 1] = ycbcr[..., 0] - (ycbcr[..., 1] - 128.) * 0.3441 - (ycbcr[..., 2] - 128.) * 0.7139
out[..., 0] = ycbcr[..., 0] + (ycbcr[..., 1] - 128.) * 1.7718
out = np.clip(out, 0, 255)
out = out.astype(np.uint8)
return out
# Read image
imagePath = os.path.join(OPENCV_100_Q_PATH,'color_01.bmp')
img = cv.imread(imagePath).astype(np.float32)
# bgr -> Y Cb Cr
ycbcr = BGR2YCbCr(img)
# process
ycbcr[..., 0] *= 0.7
# YCbCr > RGB
out = YCbCr2BGR(ycbcr)
# Save result
cv.imshow("result", out)
cv.waitKey(0)
cv.imwrite("out.jpg", out)
40. JPEG 压缩- 第四步: YCbCr + 离散余弦变换 + 量化
将图像转为 YCbCr 色彩空间之后,进行 离散余弦变换再对 Y 用 Q1 量化矩阵量化,Cb 和 Cr 用 Q2 量化矩阵量化。最后通过离散余弦逆变换对图像复原。还需比较图像的容量。算法如下:
- 将图像从RGB色彩空间变换到YCbCr色彩空间;
- 对YCbCr做DCT;
- DCT之后做量化;
- 量化之后应用IDCT;
- IDCT之后从YCbCr色彩空间变换到RGB色彩空间。
这是实际生活中使用的减少 JPEG 数据量的方法,Q1 和 Q2 根据 JPEG 规范由以下等式定义:
Q1 = np.array(((16, 11, 10, 16, 24, 40, 51, 61),
(12, 12, 14, 19, 26, 58, 60, 55),
(14, 13, 16, 24, 40, 57, 69, 56),
(14, 17, 22, 29, 51, 87, 80, 62),
(18, 22, 37, 56, 68, 109, 103, 77),
(24, 35, 55, 64, 81, 104, 113, 92),
(49, 64, 78, 87, 103, 121, 120, 101),
(72, 92, 95, 98, 112, 100, 103, 99)), dtype=np.float32)
Q2 = np.array(((17, 18, 24, 47, 99, 99, 99, 99),
(18, 21, 26, 66, 99, 99, 99, 99),
(24, 26, 56, 99, 99, 99, 99, 99),
(47, 66, 99, 99, 99, 99, 99, 99),
(99, 99, 99, 99, 99, 99, 99, 99),
(99, 99, 99, 99, 99, 99, 99, 99),
(99, 99, 99, 99, 99, 99, 99, 99),
(99, 99, 99, 99, 99, 99, 99, 99)), dtype=np.float32)
代码实现:
from settings import *
import matplotlib.pyplot as plt
# DCT hyoer-parameter
T = 8
K = 8
channel = 3
# BGR -> Y Cb Cr
def BGR2YCbCr(img):
H, W, _ = img.shape
ycbcr = np.zeros([H, W, 3], dtype=np.float32)
ycbcr[..., 0] = 0.2990 * img[..., 2] + 0.5870 * img[..., 1] + 0.1140 * img[..., 0]
ycbcr[..., 1] = -0.1687 * img[..., 2] - 0.3313 * img[..., 1] + 0.5 * img[..., 0] + 128.
ycbcr[..., 2] = 0.5 * img[..., 2] - 0.4187 * img[..., 1] - 0.0813 * img[..., 0] + 128.
return ycbcr
# Y Cb Cr -> BGR
def YCbCr2BGR(ycbcr):
H, W, _ = ycbcr.shape
out = np.zeros([H, W, channel], dtype=np.float32)
out[..., 2] = ycbcr[..., 0] + (ycbcr[..., 2] - 128.) * 1.4020
out[..., 1] = ycbcr[..., 0] - (ycbcr[..., 1] - 128.) * 0.3441 - (ycbcr[..., 2] - 128.) * 0.7139
out[..., 0] = ycbcr[..., 0] + (ycbcr[..., 1] - 128.) * 1.7718
out = np.clip(out, 0, 255)
out = out.astype(np.uint8)
return out
# DCT weight
def DCT_w(x, y, u, v):
cu = 1.
cv = 1.
if u == 0:
cu /= np.sqrt(2)
if v == 0:
cv /= np.sqrt(2)
theta = np.pi / (2 * T)
return (( 2 * cu * cv / T) * np.cos((2*x+1)*u*theta) * np.cos((2*y+1)*v*theta))
# DCT
def dct(img):
H, W, _ = img.shape
F = np.zeros((H, W, channel), dtype=np.float32)
for c in range(channel):
for yi in range(0, H, T):
for xi in range(0, W, T):
for v in range(T):
for u in range(T):
for y in range(T):
for x in range(T):
F[v+yi, u+xi, c] += img[y+yi, x+xi, c] * DCT_w(x,y,u,v)
return F
# IDCT
def idct(F):
H, W, _ = F.shape
out = np.zeros((H, W, channel), dtype=np.float32)
for c in range(channel):
for yi in range(0, H, T):
for xi in range(0, W, T):
for y in range(T):
for x in range(T):
for v in range(K):
for u in range(K):
out[y+yi, x+xi, c] += F[v+yi, u+xi, c] * DCT_w(x,y,u,v)
out = np.clip(out, 0, 255)
out = np.round(out).astype(np.uint8)
return out
# Quantization
def quantization(F):
H, W, _ = F.shape
Q = np.array(((16, 11, 10, 16, 24, 40, 51, 61),
(12, 12, 14, 19, 26, 58, 60, 55),
(14, 13, 16, 24, 40, 57, 69, 56),
(14, 17, 22, 29, 51, 87, 80, 62),
(18, 22, 37, 56, 68, 109, 103, 77),
(24, 35, 55, 64, 81, 104, 113, 92),
(49, 64, 78, 87, 103, 121, 120, 101),
(72, 92, 95, 98, 112, 100, 103, 99)), dtype=np.float32)
for ys in range(0, H, T):
for xs in range(0, W, T):
for c in range(channel):
F[ys: ys + T, xs: xs + T, c] = np.round(F[ys: ys + T, xs: xs + T, c] / Q) * Q
return F
# JPEG without Hufman coding
def JPEG(img):
# BGR -> Y Cb Cr
ycbcr = BGR2YCbCr(img)
# DCT
F = dct(ycbcr)
# quantization
F = quantization(F)
# IDCT
ycbcr = idct(F)
# Y Cb Cr -> BGR
out = YCbCr2BGR(ycbcr)
return out
# MSE
def MSE(img1, img2):
H, W, _ = img1.shape
mse = np.sum((img1 - img2) ** 2) / (H * W * channel)
return mse
# PSNR
def PSNR(mse, vmax=255):
return 10 * np.log10(vmax * vmax / mse)
# bitrate
def BITRATE():
return 1. * T * K * K / T / T
# Read image
imagePath = os.path.join(OPENCV_100_Q_PATH,'color_01.bmp')
img = cv.imread(imagePath).astype(np.float32)
# JPEG
out = JPEG(img)
# MSE
mse = MSE(img, out)
# PSNR
psnr = PSNR(mse)
# bitrate
bitrate = BITRATE()
print("MSE:", mse)
print("PSNR:", psnr)
print("bitrate:", bitrate)
# Save result
cv.imshow("result", out)
cv.waitKey(0)
cv.imwrite("out.jpg", out)