从编码的图像和video中提取DCT系数
有没有一种方法可以很容易地从编码图像和video中提取DCT系数(和量化参数)? 任何解码器软件必须使用它们来解码块DCT编码的图像和video。 所以我很确定解码器知道他们是什么。 有没有办法让他们知道谁在使用解码器?
我正在实施直接在DCT域中工作的一些video质量评估algorithm。 目前,我的大部分代码都使用OpenCV,如果有人知道使用该框架的解决scheme,那将是非常棒的。 我不介意使用其他库(可能是libjpeg,但似乎只用于静止图像),但我主要关心的是做尽可能less的格式特定的工作(我不想重新发明轮子和写我自己的解码器)。 我希望能够打开OpenCV可以打开的任何video/图像(H.264,MPEG,JPEG等),如果是块DCT编码,则可以获得DCT系数。
在最坏的情况下,我知道我可以编写我自己的块DCT代码,通过它运行解压缩的帧/图像,然后我会回到DCT域。 这不是一个优雅的解决scheme,我希望我能做得更好。
目前,我使用相当常见的OpenCV样板打开图像:
IplImage *image = cvLoadImage(filename); // Run quality assessment metric
我用于video的代码同样微不足道:
CvCapture *capture = cvCaptureFromAVI(filename); while (cvGrabFrame(capture)) { IplImage *frame = cvRetrieveFrame(capture); // Run quality assessment metric on frame } cvReleaseCapture(&capture);
在这两种情况下,我得到一个BGR格式的3通道IplImage
。 有什么办法可以得到DCT系数吗?
那么,我做了一些阅读,我原来的问题似乎是一厢情愿的例子。
基本上,从H.264video帧中获得DCT系数是不可能的,原因很简单,H.264 不使用DCT 。 它使用不同的变换(整数变换)。 接下来,该变换的系数不一定是逐帧变化的–H.264更聪明,因为它将帧分割成片。 应该可以通过一个特殊的解码器来获得这些系数,但是我怀疑OpenCV会把它暴露给用户。
对于JPEG,事情有点积极。 正如我所怀疑的, libjpeg为您揭示了DCT系数。 我写了一个小应用程序来显示它的工作原理(源代码在最后)。 它使用每个块的DC术语创build一个新的图像。 由于DC项等于块平均值(在适当缩放之后),所以DC图像是inputJPEG图像的下采样版本。
编辑:固定在源的缩放
原始图像(512 x 512):
DC图像(64×64):亮度Cr Cb RGB
来源(C ++):
#include <stdio.h> #include <assert.h> #include <cv.h> #include <highgui.h> extern "C" { #include "jpeglib.h" #include <setjmp.h> } #define DEBUG 0 #define OUTPUT_IMAGES 1 /* * Extract the DC terms from the specified component. */ IplImage * extract_dc(j_decompress_ptr cinfo, jvirt_barray_ptr *coeffs, int ci) { jpeg_component_info *ci_ptr = &cinfo->comp_info[ci]; CvSize size = cvSize(ci_ptr->width_in_blocks, ci_ptr->height_in_blocks); IplImage *dc = cvCreateImage(size, IPL_DEPTH_8U, 1); assert(dc != NULL); JQUANT_TBL *tbl = ci_ptr->quant_table; UINT16 dc_quant = tbl->quantval[0]; #if DEBUG printf("DCT method: %x\n", cinfo->dct_method); printf ( "component: %d (%dx %d blocks) sampling: (%dx %d)\n", ci, ci_ptr->width_in_blocks, ci_ptr->height_in_blocks, ci_ptr->h_samp_factor, ci_ptr->v_samp_factor ); printf("quantization table: %d\n", ci); for (int i = 0; i < DCTSIZE2; ++i) { printf("% 4d ", (int)(tbl->quantval[i])); if ((i + 1) % 8 == 0) printf("\n"); } printf("raw DC coefficients:\n"); #endif JBLOCKARRAY buf = (cinfo->mem->access_virt_barray) ( (j_common_ptr)cinfo, coeffs[ci], 0, ci_ptr->v_samp_factor, FALSE ); for (int sf = 0; (JDIMENSION)sf < ci_ptr->height_in_blocks; ++sf) { for (JDIMENSION b = 0; b < ci_ptr->width_in_blocks; ++b) { int intensity = 0; intensity = buf[sf][b][0]*dc_quant/DCTSIZE + 128; intensity = MAX(0, intensity); intensity = MIN(255, intensity); cvSet2D(dc, sf, (int)b, cvScalar(intensity)); #if DEBUG printf("% 2d ", buf[sf][b][0]); #endif } #if DEBUG printf("\n"); #endif } return dc; } IplImage *upscale_chroma(IplImage *quarter, CvSize full_size) { IplImage *full = cvCreateImage(full_size, IPL_DEPTH_8U, 1); cvResize(quarter, full, CV_INTER_NN); return full; } GLOBAL(int) read_JPEG_file (char * filename, IplImage **dc) { /* This struct contains the JPEG decompression parameters and pointers to * working space (which is allocated as needed by the JPEG library). */ struct jpeg_decompress_struct cinfo; struct jpeg_error_mgr jerr; /* More stuff */ FILE * infile; /* source file */ /* In this example we want to open the input file before doing anything else, * so that the setjmp() error recovery below can assume the file is open. * VERY IMPORTANT: use "b" option to fopen() if you are on a machine that * requires it in order to read binary files. */ if ((infile = fopen(filename, "rb")) == NULL) { fprintf(stderr, "can't open %s\n", filename); return 0; } /* Step 1: allocate and initialize JPEG decompression object */ cinfo.err = jpeg_std_error(&jerr); /* Now we can initialize the JPEG decompression object. */ jpeg_create_decompress(&cinfo); /* Step 2: specify data source (eg, a file) */ jpeg_stdio_src(&cinfo, infile); /* Step 3: read file parameters with jpeg_read_header() */ (void) jpeg_read_header(&cinfo, TRUE); /* We can ignore the return value from jpeg_read_header since * (a) suspension is not possible with the stdio data source, and * (b) we passed TRUE to reject a tables-only JPEG file as an error. * See libjpeg.txt for more info. */ /* Step 4: set parameters for decompression */ /* In this example, we don't need to change any of the defaults set by * jpeg_read_header(), so we do nothing here. */ jvirt_barray_ptr *coeffs = jpeg_read_coefficients(&cinfo); IplImage *y = extract_dc(&cinfo, coeffs, 0); IplImage *cb_q = extract_dc(&cinfo, coeffs, 1); IplImage *cr_q = extract_dc(&cinfo, coeffs, 2); IplImage *cb = upscale_chroma(cb_q, cvGetSize(y)); IplImage *cr = upscale_chroma(cr_q, cvGetSize(y)); cvReleaseImage(&cb_q); cvReleaseImage(&cr_q); #if OUTPUT_IMAGES cvSaveImage("y.png", y); cvSaveImage("cb.png", cb); cvSaveImage("cr.png", cr); #endif *dc = cvCreateImage(cvGetSize(y), IPL_DEPTH_8U, 3); assert(dc != NULL); cvMerge(y, cr, cb, NULL, *dc); cvReleaseImage(&y); cvReleaseImage(&cb); cvReleaseImage(&cr); /* Step 7: Finish decompression */ (void) jpeg_finish_decompress(&cinfo); /* We can ignore the return value since suspension is not possible * with the stdio data source. */ /* Step 8: Release JPEG decompression object */ /* This is an important step since it will release a good deal of memory. */ jpeg_destroy_decompress(&cinfo); fclose(infile); return 1; } int main(int argc, char **argv) { int ret = 0; if (argc != 2) { fprintf(stderr, "usage: %s filename.jpg\n", argv[0]); return 1; } IplImage *dc = NULL; ret = read_JPEG_file(argv[1], &dc); assert(dc != NULL); IplImage *rgb = cvCreateImage(cvGetSize(dc), IPL_DEPTH_8U, 3); cvCvtColor(dc, rgb, CV_YCrCb2RGB); #if OUTPUT_IMAGES cvSaveImage("rgb.png", rgb); #else cvNamedWindow("DC", CV_WINDOW_AUTOSIZE); cvShowImage("DC", rgb); cvWaitKey(0); #endif cvReleaseImage(&dc); cvReleaseImage(&rgb); return 0; }
你可以使用libjpeg来提取你的jpeg文件的dct数据,但对于h.264video文件,我找不到任何给你的数据(实际整数数据)的开源代码。 但是您可以使用h.264开源软件,如JM , JSVM或x264 。 在这两个源文件中,您必须find它们使用dct函数的特定函数,并将其更改为您所需的forms,以获取您的输出数据。
对于Image:使用下面的代码,并且在read_jpeg_file( infilename, v, quant_tbl )
, v
和quant_tbl
将分别具有您的jpeg图像的dct data
和quantization table
。
我使用Qvector来存储我的输出数据,将其更改为您首选的c ++数组列表。
#include <iostream> #include <stdio.h> #include <jpeglib.h> #include <stdlib.h> #include <setjmp.h> #include <fstream> #include <QVector> int read_jpeg_file( char *filename, QVector<QVector<int> > &dct_coeff, QVector<unsigned short> &quant_tbl) { struct jpeg_decompress_struct cinfo; struct jpeg_error_mgr jerr; FILE * infile; if ((infile = fopen(filename, "rb")) == NULL) { fprintf(stderr, "can't open %s\n", filename); return 0; } cinfo.err = jpeg_std_error(&jerr); jpeg_create_decompress(&cinfo); jpeg_stdio_src(&cinfo, infile); (void) jpeg_read_header(&cinfo, TRUE); jvirt_barray_ptr *coeffs_array = jpeg_read_coefficients(&cinfo); for (int ci = 0; ci < 1; ci++) { JBLOCKARRAY buffer_one; JCOEFPTR blockptr_one; jpeg_component_info* compptr_one; compptr_one = cinfo.comp_info + ci; for (int by = 0; by < compptr_one->height_in_blocks; by++) { buffer_one = (cinfo.mem->access_virt_barray)((j_common_ptr)&cinfo, coeffs_array[ci], by, (JDIMENSION)1, FALSE); for (int bx = 0; bx < compptr_one->width_in_blocks; bx++) { blockptr_one = buffer_one[0][bx]; QVector<int> tmp; for (int bi = 0; bi < 64; bi++) { tmp.append(blockptr_one[bi]); } dct_coeff.push_back(tmp); } } } // coantization table j_decompress_ptr dec_cinfo = (j_decompress_ptr) &cinfo; jpeg_component_info *ci_ptr = &dec_cinfo->comp_info[0]; JQUANT_TBL *tbl = ci_ptr->quant_table; for(int ci =0 ; ci < 64; ci++){ quant_tbl.append(tbl->quantval[ci]); } return 1; } int main() { QVector<QVector<int> > v; QVector<unsigned short> quant_tbl; char *infilename = "your_image.jpg"; std::ofstream out; out.open("out_dct.txt"); if( read_jpeg_file( infilename, v, quant_tbl ) > 0 ){ for(int j = 0; j < v.size(); j++ ){ for (int i = 0; i < v[0].size(); ++i){ out << v[j][i] << "\t"; } out << "---------------" << std::endl; } out << "\n\n\n" << std::string(10,'-') << std::endl; out << "\nQauntization Table:" << std::endl; for(int i = 0; i < quant_tbl.size(); i++ ){ out << quant_tbl[i] << "\t"; } } else{ std::cout << "Can not read, Returned With Error"; return -1; } out.close(); return 0; }