返回:OpenCV系列文章目录(持续更新中......)
上一篇:如何利用OpenCV4.9 更改图像的对比度和亮度(31)
下一篇:OpenCV 如何使用 XML 和 YAML 文件的文件输入和输出(33)
目标
我们将寻求以下问题的答案:
- 什么是傅里叶变换,为什么要使用它?
- 如何在 OpenCV 中做到这一点?
- 使用以下函数: copyMakeBorder() , merge() , dft() , getOptimalDFTSize() , log()和normalize() .
源代码
你可以在官方网站下载相关源代码。
以下 dft() 的用法示例:
C++
#include "opencv2/core.hpp"
#include "opencv2/imgproc.hpp"
#include "opencv2/imgcodecs.hpp"
#include "opencv2/highgui.hpp"
#include <iostream>
using namespace cv;
using namespace std;
static void help(char ** argv)
{
cout << endl
<< "This program demonstrated the use of the discrete Fourier transform (DFT). " << endl
<< "The dft of an image is taken and it's power spectrum is displayed." << endl << endl
<< "Usage:" << endl
<< argv[0] << " [image_name -- default lena.jpg]" << endl << endl;
}
int main(int argc, char ** argv)
{
help(argv);
const char* filename = argc >=2 ? argv[1] : "lena.jpg";
Mat I = imread( samples::findFile( filename ), IMREAD_GRAYSCALE);
if( I.empty()){
cout << "Error opening image" << endl;
return EXIT_FAILURE;
}
Mat padded; //expand input image to optimal size
int m = getOptimalDFTSize( I.rows );
int n = getOptimalDFTSize( I.cols ); // on the border add zero values
copyMakeBorder(I, padded, 0, m - I.rows, 0, n - I.cols, BORDER_CONSTANT, Scalar::all(0));
Mat planes[] = {Mat_<float>(padded), Mat::zeros(padded.size(), CV_32F)};
Mat complexI;
merge(planes, 2, complexI); // Add to the expanded another plane with zeros
dft(complexI, complexI); // this way the result may fit in the source matrix
// compute the magnitude and switch to logarithmic scale
// => log(1 + sqrt(Re(DFT(I))^2 + Im(DFT(I))^2))
split(complexI, planes); // planes[0] = Re(DFT(I), planes[1] = Im(DFT(I))
magnitude(planes[0], planes[1], planes[0]);// planes[0] = magnitude
Mat magI = planes[0];
magI += Scalar::all(1); // switch to logarithmic scale
log(magI, magI);
// crop the spectrum, if it has an odd number of rows or columns
magI = magI(Rect(0, 0, magI.cols & -2, magI.rows & -2));
// rearrange the quadrants of Fourier image so that the origin is at the image center
int cx = magI.cols/2;
int cy = magI.rows/2;
Mat q0(magI, Rect(0, 0, cx, cy)); // Top-Left - Create a ROI per quadrant
Mat q1(magI, Rect(cx, 0, cx, cy)); // Top-Right
Mat q2(magI, Rect(0, cy, cx, cy)); // Bottom-Left
Mat q3(magI, Rect(cx, cy, cx, cy)); // Bottom-Right
Mat tmp; // swap quadrants (Top-Left with Bottom-Right)
q0.copyTo(tmp);
q3.copyTo(q0);
tmp.copyTo(q3);
q1.copyTo(tmp); // swap quadrant (Top-Right with Bottom-Left)
q2.copyTo(q1);
tmp.copyTo(q2);
normalize(magI, magI, 0, 1, NORM_MINMAX); // Transform the matrix with float values into a
// viewable image form (float between values 0 and 1).
imshow("Input Image" , I ); // Show the result
imshow("spectrum magnitude", magI);
waitKey();
return EXIT_SUCCESS;
}
Java
import org.opencv.core.*;
import org.opencv.highgui.HighGui;
import org.opencv.imgcodecs.Imgcodecs;
import java.util.List;
import java.util.*;
class DiscreteFourierTransformRun{
private void help() {
System.out.println("" +
"This program demonstrated the use of the discrete Fourier transform (DFT). \n" +
"The dft of an image is taken and it's power spectrum is displayed.\n" +
"Usage:\n" +
"./DiscreteFourierTransform [image_name -- default ../data/lena.jpg]");
}
public void run(String[] args){
help();
String filename = ((args.length > 0) ? args[0] : "../data/lena.jpg");
Mat I = Imgcodecs.imread(filename, Imgcodecs.IMREAD_GRAYSCALE);
if( I.empty() ) {
System.out.println("Error opening image");
System.exit(-1);
}
Mat padded = new Mat(); //expand input image to optimal size
int m = Core.getOptimalDFTSize( I.rows() );
int n = Core.getOptimalDFTSize( I.cols() ); // on the border add zero values
Core.copyMakeBorder(I, padded, 0, m - I.rows(), 0, n - I.cols(), Core.BORDER_CONSTANT, Scalar.all(0));
List<Mat> planes = new ArrayList<Mat>();
padded.convertTo(padded, CvType.CV_32F);
planes.add(padded);
planes.add(Mat.zeros(padded.size(), CvType.CV_32F));
Mat complexI = new Mat();
Core.merge(planes, complexI); // Add to the expanded another plane with zeros
Core.dft(complexI, complexI); // this way the result may fit in the source matrix
// compute the magnitude and switch to logarithmic scale
// => log(1 + sqrt(Re(DFT(I))^2 + Im(DFT(I))^2))
Core.split(complexI, planes); // planes.get(0) = Re(DFT(I)
// planes.get(1) = Im(DFT(I))
Core.magnitude(planes.get(0), planes.get(1), planes.get(0));// planes.get(0) = magnitude
Mat magI = planes.get(0);
Mat matOfOnes = Mat.ones(magI.size(), magI.type());
Core.add(matOfOnes, magI, magI); // switch to logarithmic scale
Core.log(magI, magI);
// crop the spectrum, if it has an odd number of rows or columns
magI = magI.submat(new Rect(0, 0, magI.cols() & -2, magI.rows() & -2));
// rearrange the quadrants of Fourier image so that the origin is at the image center
int cx = magI.cols()/2;
int cy = magI.rows()/2;
Mat q0 = new Mat(magI, new Rect(0, 0, cx, cy)); // Top-Left - Create a ROI per quadrant
Mat q1 = new Mat(magI, new Rect(cx, 0, cx, cy)); // Top-Right
Mat q2 = new Mat(magI, new Rect(0, cy, cx, cy)); // Bottom-Left
Mat q3 = new Mat(magI, new Rect(cx, cy, cx, cy)); // Bottom-Right
Mat tmp = new Mat(); // swap quadrants (Top-Left with Bottom-Right)
q0.copyTo(tmp);
q3.copyTo(q0);
tmp.copyTo(q3);
q1.copyTo(tmp); // swap quadrant (Top-Right with Bottom-Left)
q2.copyTo(q1);
tmp.copyTo(q2);
magI.convertTo(magI, CvType.CV_8UC1);
Core.normalize(magI, magI, 0, 255, Core.NORM_MINMAX, CvType.CV_8UC1); // Transform the matrix with float values
// into a viewable image form (float between
// values 0 and 255).
HighGui.imshow("Input Image" , I ); // Show the result
HighGui.imshow("Spectrum Magnitude", magI);
HighGui.waitKey();
System.exit(0);
}
}
public class DiscreteFourierTransform {
public static void main(String[] args) {
// Load the native library.
System.loadLibrary(Core.NATIVE_LIBRARY_NAME);
new DiscreteFourierTransformRun().run(args);
}
}
Python
from __future__ import print_function
import sys
import cv2 as cv
import numpy as np
def print_help():
print('''
This program demonstrated the use of the discrete Fourier transform (DFT).
The dft of an image is taken and it's power spectrum is displayed.
Usage:
discrete_fourier_transform.py [image_name -- default lena.jpg]''')
def main(argv):
print_help()
filename = argv[0] if len(argv) > 0 else 'lena.jpg'
I = cv.imread(cv.samples.findFile(filename), cv.IMREAD_GRAYSCALE)
if I is None:
print('Error opening image')
return -1
rows, cols = I.shape
m = cv.getOptimalDFTSize( rows )
n = cv.getOptimalDFTSize( cols )
padded = cv.copyMakeBorder(I, 0, m - rows, 0, n - cols, cv.BORDER_CONSTANT, value=[0, 0, 0])
planes = [np.float32(padded), np.zeros(padded.shape, np.float32)]
complexI = cv.merge(planes) # Add to the expanded another plane with zeros
cv.dft(complexI, complexI) # this way the result may fit in the source matrix
cv.split(complexI, planes) # planes[0] = Re(DFT(I), planes[1] = Im(DFT(I))
cv.magnitude(planes[0], planes[1], planes[0])# planes[0] = magnitude
magI = planes[0]
matOfOnes = np.ones(magI.shape, dtype=magI.dtype)
cv.add(matOfOnes, magI, magI) # switch to logarithmic scale
cv.log(magI, magI)
magI_rows, magI_cols = magI.shape
# crop the spectrum, if it has an odd number of rows or columns
magI = magI[0:(magI_rows & -2), 0:(magI_cols & -2)]
cx = int(magI_rows/2)
cy = int(magI_cols/2)
q0 = magI[0:cx, 0:cy] # Top-Left - Create a ROI per quadrant
q1 = magI[cx:cx+cx, 0:cy] # Top-Right
q2 = magI[0:cx, cy:cy+cy] # Bottom-Left
q3 = magI[cx:cx+cx, cy:cy+cy] # Bottom-Right
tmp = np.copy(q0) # swap quadrants (Top-Left with Bottom-Right)
magI[0:cx, 0:cy] = q3
magI[cx:cx + cx, cy:cy + cy] = tmp
tmp = np.copy(q1) # swap quadrant (Top-Right with Bottom-Left)
magI[cx:cx + cx, 0:cy] = q2
magI[0:cx, cy:cy + cy] = tmp
cv.normalize(magI, magI, 0, 1, cv.NORM_MINMAX) # Transform the matrix with float values into a
cv.imshow("Input Image" , I ) # Show the result
cv.imshow("spectrum magnitude", magI)
cv.waitKey()
if __name__ == "__main__":
main(sys.argv[1:])
解释:
傅里叶变换会将图像分解为正弦和余弦分量。换句话说,它将图像从其空间域转换为其频域。这个想法是,任何函数都可以精确地近似于无限正弦和余弦函数的总和。傅里叶变换是一种如何做到这一点的方法。从数学上讲,二维图像傅里叶变换是:
这里 f 是其空间域中的图像值,F 是其频域中的图像值。变换的结果是复数。可以通过真实图像和复数图像或通过幅度和相位图像来显示这一点。然而,在整个图像处理算法中,只有幅度图像是有趣的,因为它包含了我们需要的有关图像几何结构的所有信息。但是,如果您打算以这些形式对图像进行一些修改,然后需要重新转换它,则需要保留这两种形式。
在此示例中,我将演示如何计算和显示傅里叶变换的幅度图像。在数字图像的情况下是离散的。这意味着它们可能会从给定的域值中获取一个值。例如,在基本灰度中,图像值通常介于 0 到 255 之间。因此,傅里叶变换也需要是离散类型的,从而产生离散傅里叶变换 (DFT)。每当您需要从几何角度确定图像的结构时,您都需要使用它。以下是要遵循的步骤(如果是灰度输入图像 I):
将图像扩展到最佳大小
DFT 的性能取决于图像大小。对于数字 2、3 和 5 的倍数的图像大小,它往往是最快的。因此,为了实现最佳性能,通常最好将边框值填充到图像上,以获得具有此类特征的大小。getOptimalDFTSize()返回这个最佳大小,我们可以使用 copyMakeBorder() 函数来扩展图像的边框(附加的像素以零初始化):
c++:
Mat padded; //expand input image to optimal size
int m = getOptimalDFTSize( I.rows );
int n = getOptimalDFTSize( I.cols ); // on the border add zero values
copyMakeBorder(I, padded, 0, m - I.rows, 0, n - I.cols, BORDER_CONSTANT, Scalar::all(0));
Java:
Mat padded = new Mat(); //expand input image to optimal size
int m = Core.getOptimalDFTSize( I.rows() );
int n = Core.getOptimalDFTSize( I.cols() ); // on the border add zero values
Core.copyMakeBorder(I, padded, 0, m - I.rows(), 0, n - I.cols(), Core.BORDER_CONSTANT, Scalar.all(0));
python:
rows, cols = I.shape
m = cv.getOptimalDFTSize( rows )
n = cv.getOptimalDFTSize( cols )
padded = cv.copyMakeBorder(I, 0, m - rows, 0, n - cols, cv.BORDER_CONSTANT, value=[0, 0, 0])
为复杂价值和真实价值腾出空间
傅里叶变换的结果很复杂。这意味着对于每个图像值,结果是两个图像值(每个组件一个)。此外,频域范围比空间对应物大得多。因此,我们通常至少以浮点格式存储这些内容。因此,我们将输入图像转换为此类型,并使用另一个通道对其进行扩展以保存复数值:
c++:
Mat planes[] = {Mat_<float>(padded), Mat::zeros(padded.size(), CV_32F)};
Mat complexI;
merge(planes, 2, complexI); // Add to the expanded another plane with zeros
Java:
Mat padded = new Mat(); //expand input image to optimal size
int m = Core.getOptimalDFTSize( I.rows() );
int n = Core.getOptimalDFTSize( I.cols() ); // on the border add zero values
Core.copyMakeBorder(I, padded, 0, m - I.rows(), 0, n - I.cols(), Core.BORDER_CONSTANT, Scalar.all(0));
Python:
rows, cols = I.shape
m = cv.getOptimalDFTSize( rows )
n = cv.getOptimalDFTSize( cols )
padded = cv.copyMakeBorder(I, 0, m - rows, 0, n - cols, cv.BORDER_CONSTANT, value=[0, 0, 0])
进行离散傅里叶变换
可以就地计算(输入与输出相同):
C++:
dft(complexI, complexI); // this way the result may fit in the source matrix
Java:
Core.dft(complexI, complexI); // this way the result may fit in the source matrix
Python:
cv.dft(complexI, complexI) # this way the result may fit in the source matrix
将实数值和复数值转换为量级
复数有一个实数(Re)和一个复数(虚数-Im)部分。DFT 的结果是复数。DFT 的量级为:
翻译成 OpenCV 代码:
C++:
split(complexI, planes); // planes[0] = Re(DFT(I), planes[1] = Im(DFT(I))
magnitude(planes[0], planes[1], planes[0]);// planes[0] = magnitude
Mat magI = planes[0];
Java:
Core.split(complexI, planes); // planes.get(0) = Re(DFT(I)
// planes.get(1) = Im(DFT(I))
Core.magnitude(planes.get(0), planes.get(1), planes.get(0));// planes.get(0) = magnitude
Mat magI = planes.get(0);
Python:
cv.split(complexI, planes) # planes[0] = Re(DFT(I), planes[1] = Im(DFT(I))
cv.magnitude(planes[0], planes[1], planes[0])# planes[0] = magnitude
magI = planes[0]
切换到对数刻度
事实证明,傅里叶系数的动态范围太大,无法显示在屏幕上。我们有一些小的和一些高的变化值,我们无法像这样观察到。因此,高值将全部变成白点,而小值则变成黑点。为了使用灰度值进行可视化,我们可以将线性刻度转换为对数刻度:
翻译成 OpenCV 代码:
C++:
magI += Scalar::all(1); // switch to logarithmic scale
log(magI, magI);
Java:
Mat matOfOnes = Mat.ones(magI.size(), magI.type());
Core.add(matOfOnes, magI, magI); // switch to logarithmic scale
Core.log(magI, magI);
Python :
matOfOnes = np.ones(magI.shape, dtype=magI.dtype)
cv.add(matOfOnes, magI, magI) # switch to logarithmic scale
cv.log(magI, magI)
裁剪和重新排列
还记得,在第一步,我们扩展了图像吗?好吧,是时候扔掉新引入的价值观了。出于可视化目的,我们还可以重新排列结果的象限,使原点(零,零)与图像中心相对应。
C++:
// crop the spectrum, if it has an odd number of rows or columns
magI = magI(Rect(0, 0, magI.cols & -2, magI.rows & -2));
// rearrange the quadrants of Fourier image so that the origin is at the image center
int cx = magI.cols/2;
int cy = magI.rows/2;
Mat q0(magI, Rect(0, 0, cx, cy)); // Top-Left - Create a ROI per quadrant
Mat q1(magI, Rect(cx, 0, cx, cy)); // Top-Right
Mat q2(magI, Rect(0, cy, cx, cy)); // Bottom-Left
Mat q3(magI, Rect(cx, cy, cx, cy)); // Bottom-Right
Mat tmp; // swap quadrants (Top-Left with Bottom-Right)
q0.copyTo(tmp);
q3.copyTo(q0);
tmp.copyTo(q3);
q1.copyTo(tmp); // swap quadrant (Top-Right with Bottom-Left)
q2.copyTo(q1);
tmp.copyTo(q2);
Java:
// crop the spectrum, if it has an odd number of rows or columns
magI = magI.submat(new Rect(0, 0, magI.cols() & -2, magI.rows() & -2));
// rearrange the quadrants of Fourier image so that the origin is at the image center
int cx = magI.cols()/2;
int cy = magI.rows()/2;
Mat q0 = new Mat(magI, new Rect(0, 0, cx, cy)); // Top-Left - Create a ROI per quadrant
Mat q1 = new Mat(magI, new Rect(cx, 0, cx, cy)); // Top-Right
Mat q2 = new Mat(magI, new Rect(0, cy, cx, cy)); // Bottom-Left
Mat q3 = new Mat(magI, new Rect(cx, cy, cx, cy)); // Bottom-Right
Mat tmp = new Mat(); // swap quadrants (Top-Left with Bottom-Right)
q0.copyTo(tmp);
q3.copyTo(q0);
tmp.copyTo(q3);
q1.copyTo(tmp); // swap quadrant (Top-Right with Bottom-Left)
q2.copyTo(q1);
tmp.copyTo(q2);
Python:
magI_rows, magI_cols = magI.shape
# crop the spectrum, if it has an odd number of rows or columns
magI = magI[0:(magI_rows & -2), 0:(magI_cols & -2)]
cx = int(magI_rows/2)
cy = int(magI_cols/2)
q0 = magI[0:cx, 0:cy] # Top-Left - Create a ROI per quadrant
q1 = magI[cx:cx+cx, 0:cy] # Top-Right
q2 = magI[0:cx, cy:cy+cy] # Bottom-Left
q3 = magI[cx:cx+cx, cy:cy+cy] # Bottom-Right
tmp = np.copy(q0) # swap quadrants (Top-Left with Bottom-Right)
magI[0:cx, 0:cy] = q3
magI[cx:cx + cx, cy:cy + cy] = tmp
tmp = np.copy(q1) # swap quadrant (Top-Right with Bottom-Left)
magI[cx:cx + cx, 0:cy] = q2
magI[0:cx, cy:cy + cy] = tmp
正常化
出于可视化目的,再次执行此操作。我们现在有了星等,但这仍然超出了我们的图像显示范围 0 到 1。我们使用 cv::normalize() 函数将我们的值规范化到这个范围。
C++:
normalize(magI, magI, 0, 1, NORM_MINMAX); // Transform the matrix with float values into a
// viewable image form (float between values 0 and 1).
Java:
Core.normalize(magI, magI, 0, 255, Core.NORM_MINMAX, CvType.CV_8UC1); // Transform the matrix with float values
// into a viewable image form (float between
// values 0 and 255).
python:
cv.normalize(magI, magI, 0, 1, cv.NORM_MINMAX) # Transform the matrix with float values into a
结果
一个应用思路是确定图像中存在的几何方向。例如,让我们找出文本是否是水平的?看一些文本,你会注意到文本线条也形成了水平线,字母形成了垂直线条。在傅里叶变换的情况下,也可以看到文本片段的这两个主要组成部分。让我们使用这个水平和这个旋转的图像来描述一个文本。
如果是横向文本:
如果文本旋转:
您可以看到,频域中最有影响力的分量(幅度图像上最亮的点)遵循图像上物体的几何旋转。由此我们可以计算偏移量并执行图像旋转以校正最终的未对齐。
参考文献:
1、《Discrete Fourier Transform》-----Bernát Gábor