一、课程目标:图像压缩算法
掌握图像压缩的基本知识,Huffman编码、行程编码、算数编码算法实现。了解JPEG压缩编码实现、PCA图像压缩、DCT图像压缩等相关知识
可参考:
(9)图像编码与压缩(Image Coding and Compression)
二、Huffman编码
1.基于哈夫曼编码的压缩算法的Python实现
2.哈夫曼编码+python实现
3.Python实现图像的霍夫曼编码和译码
# Huffman编码示例
class Node:
def __init__(self, name, weight):
self.name = name # 节点名
self.weight = weight # 节点权重
self.left = None # 节点左子树
self.right = None # 节点右子树
self.father = None # 节点父节点
# 判断是否是左子树
def is_left_child(self):
return self.father.left == self
# 创建最初的叶子节点
def create_prim_nodes(Data_set, Labels):
if len(Data_set) != len(Labels):
raise Exception('数据和标签不匹配!')
nodes = []
for i in range(len(Labels)):
nodes.append(Node(Labels[i], Data_set[i]))
return nodes
# 创建huffman树
def create_HF_tree(nodes):
# 此处注意,copy()属于浅拷贝,只拷贝最外层元素,内层嵌套元素则通过引用,而不是独立分配内存
tree_nodes = nodes.copy()
while len(tree_nodes) > 1: # 只剩根节点时,退出循环
tree_nodes.sort(key=lambda node: node.weight) # 升序排列
new_left = tree_nodes.pop(0)
new_right = tree_nodes.pop(0)
new_node = Node(None, (new_left.weight + new_right.weight))
new_node.left = new_left
new_node.right = new_right
new_left.father = new_right.father = new_node
tree_nodes.append(new_node)
tree_nodes[0].father = None # 根节点父亲为None
return tree_nodes[0] # 返回根节点
# 获取huffman编码
def get_huffman_code(nodes):
codes = {}
for node in nodes:
code = ''
name = node.name
while node.father != None:
if node.is_left_child():
code = '0' + code
else:
code = '1' + code
node = node.father
codes[name] = code
return codes
if __name__ == '__main__':
labels = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']
data_set = [19, 12, 6, 3, 5, 15, 34, 110]
nodes = create_prim_nodes(data_set, labels) # 创建初始叶子节点
root = create_HF_tree(nodes) # 创建huffman树
codes = get_huffman_code(nodes) # 获取huffman编码
# 打印huffman码
for key in codes.keys():
print(key, ': ', codes[key])
三、行程编码
1.图像处理(一):基于行程编码的图像压缩python实现
2.行程编码压缩算法
# 行程编码示例
import cv2 as cv
import numpy as np
grayimg = cv.imread('D:\\DIP_Photo\\SHZU.jpg', 0)
rows, cols = grayimg.shape
image1 = grayimg.flatten() # 把灰度化后的二维图像降维为一维列表
print(len(image1))
# 二值化操作
for i in range(len(image1)):
if image1[i] >= 127:
image1[i] = 255
if image1[i] < 127:
image1[i] = 0
data = []
image3 = []
count = 1
# 行程压缩编码
for i in range(len(image1) - 1):
if count == 1:
image3.append(image1[i])
if image1[i] == image1[i + 1]:
count = count + 1
if i == len(image1) - 2:
image3.append(image1[i])
data.append(count)
else:
data.append(count)
count = 1
if image1[len(image1) - 1] != image1[-1]:
image3.append(image1[len(image1) - 1])
data.append(1)
# 压缩率
ys_rate = len(image3) / len(image1) * 100
print('压缩率为' + str(ys_rate) + '%')
# 行程编码解码
rec_image = []
for i in range(len(data)):
for j in range(data[i]):
rec_image.append(image3[i])
rec_image = np.reshape(rec_image, (rows, cols))
# cv.imwrite('output_SHZU.jpg', rec_image)
cv.imshow('rec_image', rec_image) # 重新输出二值化图像
cv.waitKey(0)
四、算数编码
1.算术编码代码matlab_算术编码的python实现
2.压缩算法之算术编码
# 算数编码的实现
def get_dict_from_singal():
singal_dict = {'A': (0, 0.1), 'B': (0.1, 0.5), 'C': (0.5, 0.7), 'D': (0.7, 1)}
return singal_dict
def encoder(singal, singal_dict):
Low = 0
High = 1
for s in singal:
CodeRange = High - Low
High = Low + CodeRange * singal_dict[s][1]
Low = Low + CodeRange * singal_dict[s][0]
return Low
def decoder(encoded_number, singal_dict, singal_length):
singal = []
while singal_length:
for k, v in singal_dict.items():
if v[0] <= encoded_number < v[1]:
singal.append(k)
range = v[1] - v[0]
encoded_number -= v[0]
encoded_number /= range
break
singal_length -= 1
return singal
def main():
singal_dict = get_dict_from_singal()
singal = 'CADACDB'
ans = encoder(singal, singal_dict)
print(ans)
singal_rec = decoder(ans, singal_dict, len(singal))
print(singal_rec)
if __name__ == '__main__':
main()
五、JPEG图像压缩
1.JPEG图片压缩的Python实现
2.JPEG系列三 JPEG图像压缩
3.JPEG图像压缩解压算法——C++实现
import numpy as np
import os
from PIL import Image
# JPG压缩/解压代码
class KJPEG:
def __init__(self):
# 初始化DCT变换的A矩阵,https://blog.csdn.net/ahafg/article/details/48808443
self.__dctA = np.zeros(shape=(8, 8))
for i in range(8):
c = 0
if i == 0:
c = np.sqrt(1 / 8)
else:
c = np.sqrt(2 / 8)
for j in range(8):
self.__dctA[i, j] = c * np.cos(np.pi * i * (2 * j + 1) / (2 * 8))
# 亮度量化矩阵
self.__lq = np.array([
16, 11, 10, 16, 24, 40, 51, 61,
12, 12, 14, 19, 26, 58, 60, 55,
14, 13, 16, 24, 40, 57, 69, 56,
14, 17, 22, 29, 51, 87, 80, 62,
18, 22, 37, 56, 68, 109, 103, 77,
24, 35, 55, 64, 81, 104, 113, 92,
49, 64, 78, 87, 103, 121, 120, 101,
72, 92, 95, 98, 112, 100, 103, 99,
])
# 色度量化矩阵
self.__cq = np.array([
17, 18, 24, 47, 99, 99, 99, 99,
18, 21, 26, 66, 99, 99, 99, 99,
24, 26, 56, 99, 99, 99, 99, 99,
47, 66, 99, 99, 99, 99, 99, 99,
99, 99, 99, 99, 99, 99, 99, 99,
99, 99, 99, 99, 99, 99, 99, 99,
99, 99, 99, 99, 99, 99, 99, 99,
99, 99, 99, 99, 99, 99, 99, 99,
])
# 标记矩阵类型,lt是亮度矩阵,ct是色度矩阵
self.__lt = 0
self.__ct = 1
# https://my.oschina.net/tigerBin/blog/1083549
# Zig编码表
self.__zig = np.array([
0, 1, 8, 16, 9, 2, 3, 10,
17, 24, 32, 25, 18, 11, 4, 5,
12, 19, 26, 33, 40, 48, 41, 34,
27, 20, 13, 6, 7, 14, 21, 28,
35, 42, 49, 56, 57, 50, 43, 36,
29, 22, 15, 23, 30, 37, 44, 51,
58, 59, 52, 45, 38, 31, 39, 46,
53, 60, 61, 54, 47, 55, 62, 63
])
# Zag编码表
self.__zag = np.array([
0, 1, 5, 6, 14, 15, 27, 28,
2, 4, 7, 13, 16, 26, 29, 42,
3, 8, 12, 17, 25, 30, 41, 43,
9, 11, 18, 24, 31, 40, 44, 53,
10, 19, 23, 32, 39, 45, 52, 54,
20, 22, 33, 38, 46, 41, 55, 60,
21, 34, 37, 47, 50, 56, 59, 61,
35, 36, 48, 49, 57, 58, 62, 63
])
def __Rgb2Yuv(self, r, g, b):
# 从图像获取YUV矩阵
y = 0.299 * r + 0.587 * g + 0.114 * b
u = -0.1687 * r - 0.3313 * g + 0.5 * b + 128
v = 0.5 * r - 0.419 * g - 0.081 * b + 128
return y, u, v
def __Fill(self, matrix):
# 图片的长宽都需要满足是16的倍数(采样长宽会缩小1/2和取块长宽会缩小1/8)
# 图像压缩三种取样方式4:4:4、4:2:2、4:2:0
fh, fw = 0, 0
if self.height % 16 != 0:
fh = 16 - self.height % 16
if self.width % 16 != 0:
fw = 16 - self.width % 16
res = np.pad(matrix, ((0, fh), (0, fw)), 'constant',
constant_values=(0, 0))
return res
def __Encode(self, matrix, tag):
# 先对矩阵进行填充
matrix = self.__Fill(matrix)
# 将图像矩阵切割成8*8小块
height, width = matrix.shape
# 减少for循环语句,利用numpy的自带函数来提升算法效率
# 参考吴恩达的公开课视频,numpy的函数自带并行处理,不用像for循环一样串行处理
shape = (height // 8, width // 8, 8, 8)
strides = matrix.itemsize * np.array([width * 8, 8, width, 1])
blocks = np.lib.stride_tricks.as_strided(matrix, shape=shape, strides=strides)
res = []
for i in range(height // 8):
for j in range(width // 8):
res.append(self.__Quantize(self.__Dct(blocks[i, j]).reshape(64), tag))
return res
def __Dct(self, block):
# DCT变换
res = np.dot(self.__dctA, block)
res = np.dot(res, np.transpose(self.__dctA))
return res
def __Quantize(self, block, tag):
res = block
if tag == self.__lt:
res = np.round(res / self.__lq)
elif tag == self.__ct:
res = np.round(res / self.__cq)
return res
def __Zig(self, blocks):
ty = np.array(blocks)
tz = np.zeros(ty.shape)
for i in range(len(self.__zig)):
tz[:, i] = ty[:, self.__zig[i]]
tz = tz.reshape(tz.shape[0] * tz.shape[1])
return tz.tolist()
def __Rle(self, blist):
res = []
cnt = 0
for i in range(len(blist)):
if blist[i] != 0:
res.append(cnt)
res.append(int(blist[i]))
cnt = 0
elif cnt == 15:
res.append(cnt)
res.append(int(blist[i]))
cnt = 0
else:
cnt += 1
# 末尾全是0的情况
if cnt != 0:
res.append(cnt - 1)
res.append(0)
return res
def Compress(self, filename):
# 根据路径image_path读取图片,并存储为RGB矩阵
image = Image.open(filename)
# 获取图片宽度width和高度height
self.width, self.height = image.size
image = image.convert('RGB')
image = np.asarray(image)
r = image[:, :, 0]
g = image[:, :, 1]
b = image[:, :, 2]
# 将图像RGB转YUV
y, u, v = self.__Rgb2Yuv(r, g, b)
# 对图像矩阵进行编码
y_blocks = self.__Encode(y, self.__lt)
u_blocks = self.__Encode(u, self.__ct)
v_blocks = self.__Encode(v, self.__ct)
# 对图像小块进行Zig编码和RLE编码
y_code = self.__Rle(self.__Zig(y_blocks))
u_code = self.__Rle(self.__Zig(u_blocks))
v_code = self.__Rle(self.__Zig(v_blocks))
# 计算VLI可变字长整数编码并写入文件,未实现Huffman部分
# 原理详解:https://www.cnblogs.com/Arvin-JIN/p/9133745.html
buff = 0
tfile = os.path.splitext(filename)[0] + ".gpj"
if os.path.exists(tfile):
os.remove(tfile)
with open(tfile, 'wb') as o:
o.write(self.height.to_bytes(2, byteorder='big'))
o.flush()
o.write(self.width.to_bytes(2, byteorder='big'))
o.flush()
o.write((len(y_code)).to_bytes(4, byteorder='big'))
o.flush()
o.write((len(u_code)).to_bytes(4, byteorder='big'))
o.flush()
o.write((len(v_code)).to_bytes(4, byteorder='big'))
o.flush()
self.__Write2File(tfile, y_code, u_code, v_code)
# https://blog.csdn.net/weixin_43690347/article/details/84146979
def __Write2File(self, filename, y_code, u_code, v_code):
with open(filename, "ab+") as o:
buff = 0
bcnt = 0
data = y_code + u_code + v_code
for i in range(len(data)):
if i % 2 == 0:
td = data[i]
for ti in range(4):
buff = (buff << 1) | ((td & 0x08) >> 3)
td <<= 1
bcnt += 1
if bcnt == 8:
o.write(buff.to_bytes(1, byteorder='big'))
o.flush()
buff = 0
bcnt = 0
else:
td = data[i]
vtl, vts = self.__VLI(td)
for ti in range(4):
buff = (buff << 1) | ((vtl & 0x08) >> 3)
vtl <<= 1
bcnt += 1
if bcnt == 8:
o.write(buff.to_bytes(1, byteorder='big'))
o.flush()
buff = 0
bcnt = 0
for ts in vts:
buff <<= 1
if ts == '1':
buff |= 1
bcnt += 1
if bcnt == 8:
o.write(buff.to_bytes(1, byteorder='big'))
o.flush()
buff = 0
bcnt = 0
if bcnt != 0:
buff <<= (8 - bcnt)
o.write(buff.to_bytes(1, byteorder='big'))
o.flush()
buff = 0
bcnt = 0
def __IDct(self, block):
# IDCT变换
res = np.dot(np.transpose(self.__dctA), block)
res = np.dot(res, self.__dctA)
return res
def __IQuantize(self, block, tag):
res = block
if tag == self.__lt:
res *= self.__lq
elif tag == self.__ct:
res *= self.__cq
return res
def __IFill(self, matrix):
matrix = matrix[:self.height, :self.width]
return matrix
def __Decode(self, blocks, tag):
tlist = []
for b in blocks:
b = np.array(b)
tlist.append(self.__IDct(self.__IQuantize(b, tag).reshape(8, 8)))
height_fill, width_fill = self.height, self.width
if height_fill % 16 != 0:
height_fill += 16 - height_fill % 16
if width_fill % 16 != 0:
width_fill += 16 - width_fill % 16
rlist = []
for hi in range(height_fill // 8):
start = hi * width_fill // 8
rlist.append(np.hstack(tuple(tlist[start: start + (width_fill // 8)])))
matrix = np.vstack(tuple(rlist))
res = self.__IFill(matrix)
return res
def __ReadFile(self, filename):
with open(filename, "rb") as o:
tb = o.read(2)
self.height = int.from_bytes(tb, byteorder='big')
tb = o.read(2)
self.width = int.from_bytes(tb, byteorder='big')
tb = o.read(4)
ylen = int.from_bytes(tb, byteorder='big')
tb = o.read(4)
ulen = int.from_bytes(tb, byteorder='big')
tb = o.read(4)
vlen = int.from_bytes(tb, byteorder='big')
buff = 0
bcnt = 0
rlist = []
itag = 0
icnt = 0
vtl, tb, tvtl = None, None, None
while len(rlist) < ylen + ulen + vlen:
if bcnt == 0:
tb = o.read(1)
if not tb:
break
tb = int.from_bytes(tb, byteorder='big')
bcnt = 8
if itag == 0:
buff = (buff << 1) | ((tb & 0x80) >> 7)
tb <<= 1
bcnt -= 1
icnt += 1
if icnt == 4:
rlist.append(buff & 0x0F)
elif icnt == 8:
vtl = buff & 0x0F
tvtl = vtl
itag = 1
buff = 0
else:
buff = (buff << 1) | ((tb & 0x80) >> 7)
tb <<= 1
bcnt -= 1
tvtl -= 1
if tvtl == 0 or tvtl == -1:
rlist.append(self.__IVLI(vtl, bin(buff)[2:].rjust(vtl, '0')))
itag = 0
icnt = 0
y_dcode = rlist[:ylen]
u_dcode = rlist[ylen:ylen + ulen]
v_dcode = rlist[ylen + ulen:ylen + ulen + vlen]
return y_dcode, u_dcode, v_dcode
pass
def __Zag(self, dcode):
dcode = np.array(dcode).reshape((len(dcode) // 64, 64))
tz = np.zeros(dcode.shape)
for i in range(len(self.__zag)):
tz[:, i] = dcode[:, self.__zag[i]]
rlist = tz.tolist()
return rlist
def __IRle(self, dcode):
rlist = []
for i in range(len(dcode)):
if i % 2 == 0:
rlist += [0] * dcode[i]
else:
rlist.append(dcode[i])
return rlist
def Decompress(self, filename):
y_dcode, u_dcode, v_dcode = self.__ReadFile(filename)
y_blocks = self.__Zag(self.__IRle(y_dcode))
u_blocks = self.__Zag(self.__IRle(u_dcode))
v_blocks = self.__Zag(self.__IRle(v_dcode))
y = self.__Decode(y_blocks, self.__lt)
u = self.__Decode(u_blocks, self.__ct)
v = self.__Decode(v_blocks, self.__ct)
r = (y + 1.402 * (v - 128))
g = (y - 0.34414 * (u - 128) - 0.71414 * (v - 128))
b = (y + 1.772 * (u - 128))
r = Image.fromarray(r).convert('L')
g = Image.fromarray(g).convert('L')
b = Image.fromarray(b).convert('L')
image = Image.merge("RGB", (r, g, b))
image.save("result.bmp", "bmp")
image.show()
def __VLI(self, n):
# 获取整数n的可变字长整数编码
ts, tl = 0, 0
if n > 0:
ts = bin(n)[2:]
tl = len(ts)
elif n < 0:
tn = (-n) ^ 0xFFFF
tl = len(bin(-n)[2:])
ts = bin(tn)[-tl:]
else:
tl = 0
ts = '0'
return (tl, ts)
def __IVLI(self, tl, ts):
# 获取可变字长整数编码对应的整数n
if tl != 0:
n = int(ts, 2)
if ts[0] == '0':
n = n ^ 0xFFFF
n = int(bin(n)[-tl:], 2)
n = -n
else:
n = 0
return n
if __name__ == '__main__':
kjpeg = KJPEG()
kjpeg.Compress("D:\\DIP_Photo\\RENZhengfei.bmp")
kjpeg.Decompress("D:\\DIP_Photo\\RENZhengfei.gpj")
六、PCA用于图像压缩
1.PCA - 主成分分析(Python实现图像压缩)
2.机器学习实战——PCA实现图像压缩
3.OpenCV—python 图像压缩
import numpy as np
import cv2
# PCA 图像压缩
def comp_2d(image_2d, rate):
height, width = image_2d.shape[:2]
mean_array = np.mean(image_2d, axis=1)
mean_array = mean_array[:, np.newaxis]
mean_array = np.tile(mean_array, width)
cov_mat = image_2d.astype(np.float64) - mean_array
eig_val, eig_vec = np.linalg.eigh(np.cov(cov_mat)) # 求特征值 特征向量
p = np.size(eig_vec, axis=1)
idx = np.argsort(eig_val)
idx = idx[::-1]
eig_vec = eig_vec[:, idx]
numpc = rate
if numpc < p or numpc > 0:
eig_vec = eig_vec[:, range(numpc)]
score = np.dot(eig_vec.T, cov_mat)
recon = np.dot(eig_vec, score) + mean_array
recon_img_mat = np.uint8(np.absolute(recon))
return recon_img_mat
if __name__ == '__main__':
data = cv2.imread("D:\\DIP_Photo\\JinChen01.jpg")
height, width = data.shape[:2]
a_g = data[:, :, 0]
a_b = data[:, :, 1]
a_r = data[:, :, 2]
rates = [30, 60, 90] # 主成分前30,60,90个k值
for rate in rates:
g_recon, b_recon, r_recon = comp_2d(a_g, rate), comp_2d(a_b, rate), comp_2d(a_r, rate)
result = cv2.merge([g_recon, b_recon, r_recon])
cv2.imshow('result_'+str(rate), result)
cv2.waitKey(0)
cv2.destroyAllWindows()
七、SVD用于图像压缩
1.SVD在图像压缩上的应用(基于matlab和python)
2.利用SVD进行图像压缩(附Python代码)
3.OpenCV—python 图像压缩
import cv2
import numpy as np
# SVD 图像压缩
def rebuild_img(u, sigma, v, p): # p表示奇异值的百分比
print('sigma.shape', sigma.shape)
print('sum(sigma)', sum(sigma))
m, n = len(u), len(v)
a = np.zeros((m, n)) # 创建空图片
count = int(sum(sigma))
curSum = 0
k = 0
while curSum <= count * p:
uk = u[:, k].reshape(m, 1)
vk = v[k].reshape(1, n)
a += sigma[k] * np.dot(uk, vk)
curSum += sigma[k]
k += 1
print('==k===:', k)
a[a < 0] = 0
a[a > 255] = 255
return np.rint(a).astype("uint8")
if __name__ == '__main__':
img = cv2.imread('D:\\DIP_Photo\\JinChen01.jpg', 1)
for p in np.arange(0.1, 1, 0.2):
u, sigma, v = np.linalg.svd(img[:, :, 0])
R = rebuild_img(u, sigma, v, p)
u, sigma, v = np.linalg.svd(img[:, :, 1])
G = rebuild_img(u, sigma, v, p)
u, sigma, v = np.linalg.svd(img[:, :, 2])
B = rebuild_img(u, sigma, v, p)
I = np.stack((R, G, B), 2)
cv2.imshow("svd_" + str(p * 100), I)
# cv2.imwrite(".\\svd_" + str(p * 100) + ".jpg", I)
cv2.imshow("img", img)
cv2.waitKey(0)
cv2.destroyAllWindows()
代码示例仅供学生学习参考,如有引用未标注请提醒。