问题描述
- x64环境下,把内嵌汇编的汇编单独放在.asm文件中
- void GDIRender::YUV_TO_RGB24(unsigned char *puc_y int stride_y
unsigned char *puc_u unsigned char *puc_v int stride_uv
unsigned char *puc_out int width_y int height_yint stride_out) {int y horiz_count;unsigned char *puc_out_remembered;if (height_y < 0) { /* we are flipping our output upside-down */ height_y = -height_y; puc_y += (height_y - 1) * stride_y ; puc_u += (height_y/2 - 1) * stride_uv; puc_v += (height_y/2 - 1) * stride_uv; stride_y = -stride_y; stride_uv = -stride_uv;}horiz_count = -(width_y >> 3);for (y=0; y<height_y; y++) { if (y == height_y-1) { /* this is the last output line - we need to be careful not to overrun the end of this line */ unsigned char temp_buff[3*MAXIMUM_Y_WIDTH+1]; puc_out_remembered = puc_out; puc_out = temp_buff; /* write the RGB to a temporary store */ } _asm { push eax push ebx push ecx push edx push edi mov eax puc_out mov ebx puc_y mov ecx puc_u mov edx puc_v mov edi horiz_count horiz_loop: movd mm2 [ecx] pxor mm7 mm7 movd mm3 [edx] punpcklbw mm2 mm7 ; mm2 = __u3__u2__u1__u0 movq mm0 [ebx] ; mm0 = y7y6y5y4y3y2y1y0 punpcklbw mm3 mm7 ; mm3 = __v3__v2__v1__v0 movq mm1 mmw_0x00ff ; mm1 = 00ff00ff00ff00ff psubusb mm0 mmb_0x10 ; mm0 -= 16 psubw mm2 mmw_0x0080 ; mm2 -= 128 pand mm1 mm0 ; mm1 = __y6__y4__y2__y0 psubw mm3 mmw_0x0080 ; mm3 -= 128 psllw mm1 3 ; mm1 *= 8 psrlw mm0 8 ; mm0 = __y7__y5__y3__y1 psllw mm2 3 ; mm2 *= 8 pmulhw mm1 mmw_mult_Y ; mm1 *= luma coeff psllw mm0 3 ; mm0 *= 8 psllw mm3 3 ; mm3 *= 8 movq mm5 mm3 ; mm5 = mm3 = v pmulhw mm5 mmw_mult_V_R ; mm5 = red chroma movq mm4 mm2 ; mm4 = mm2 = u pmulhw mm0 mmw_mult_Y ; mm0 *= luma coeff movq mm7 mm1 ; even luma part pmulhw mm2 mmw_mult_U_G ; mm2 *= u green coeff paddsw mm7 mm5 ; mm7 = luma + chroma __r6__r4__r2__r0 pmulhw mm3 mmw_mult_V_G ; mm3 *= v green coeff packuswb mm7 mm7 ; mm7 = r6r4r2r0r6r4r2r0 pmulhw mm4 mmw_mult_U_B ; mm4 = blue chroma paddsw mm5 mm0 ; mm5 = luma + chroma __r7__r5__r3__r1 packuswb mm5 mm5 ; mm6 = r7r5r3r1r7r5r3r1 paddsw mm2 mm3 ; mm2 = green chroma movq mm3 mm1 ; mm3 = __y6__y4__y2__y0 movq mm6 mm1 ; mm6 = __y6__y4__y2__y0 paddsw mm3 mm4 ; mm3 = luma + chroma __b6__b4__b2__b0 paddsw mm6 mm2 ; mm6 = luma + chroma __g6__g4__g2__g0 punpcklbw mm7 mm5 ; mm7 = r7r6r5r4r3r2r1r0 paddsw mm2 mm0 ; odd luma part plus chroma part __g7__g5__g3__g1 packuswb mm6 mm6 ; mm2 = g6g4g2g0g6g4g2g0 packuswb mm2 mm2 ; mm2 = g7g5g3g1g7g5g3g1 packuswb mm3 mm3 ; mm3 = b6b4b2b0b6b4b2b0 paddsw mm4 mm0 ; odd luma part plus chroma part __b7__b5__b3__b1 packuswb mm4 mm4 ; mm4 = b7b5b3b1b7b5b3b1 punpcklbw mm6 mm2 ; mm6 = g7g6g5g4g3g2g1g0 punpcklbw mm3 mm4 ; mm3 = b7b6b5b4b3b2b1b0 /* 32-bit shuffle.... */ pxor mm0 mm0 ; is this needed? movq mm1 mm6 ; mm1 = g7g6g5g4g3g2g1g0 punpcklbw mm1 mm0 ; mm1 = __g3__g2__g1__g0 movq mm0 mm3 ; mm0 = b7b6b5b4b3b2b1b0 punpcklbw mm0 mm7 ; mm0 = r3b3r2b2r1b1r0b0 movq mm2 mm0 ; mm2 = r3b3r2b2r1b1r0b0 punpcklbw mm0 mm1 ; mm0 = __r1g1b1__r0g0b0 punpckhbw mm2 mm1 ; mm2 = __r3g3b3__r2g2b2 /* 24-bit shuffle and save... */ movd [eax] mm0 ; eax[0] = __r0g0b0 psrlq mm0 32 ; mm0 = __r1g1b1 movd 3[eax] mm0 ; eax[3] = __r1g1b1 movd 6[eax] mm2 ; eax[6] = __r2g2b2 psrlq mm2 32 ; mm2 = __r3g3b3 movd 9[eax] mm2 ; eax[9] = __r3g3b3 /* 32-bit shuffle.... */ pxor mm0 mm0 ; is this needed? movq mm1 mm6 ; mm1 = g7g6g5g4g3g2g1g0 punpckhbw mm1 mm0 ; mm1 = __g7__g6__g5__g4 movq mm0 mm3 ; mm0 = b7b6b5b4b3b2b1b0 punpckhbw mm0 mm7 ; mm0 = r7b7r6b6r5b5r4b4 movq mm2 mm0 ; mm2 = r7b7r6b6r5b5r4b4 punpcklbw mm0 mm1 ; mm0 = __r5g5b5__r4g4b4 punpckhbw mm2 mm1 ; mm2 = __r7g7b7__r6g6b6 /* 24-bit shuffle and save... */ movd 12[eax] mm0 ; eax[12] = __r4g4b4 psrlq mm0 32 ; mm0 = __r5g5b5 movd 15[eax] mm0 ; eax[15] = __r5g5b5 add ebx 8 ; puc_y += 8; movd 18[eax] mm2 ; eax[18] = __r6g6b6 psrlq mm2 32 ; mm2 = __r7g7b7 add ecx 4 ; puc_u += 4; add edx 4 ; puc_v += 4; movd 21[eax] mm2 ; eax[21] = __r7g7b7 add eax 24 ; puc_out += 24 inc edi jne horiz_loop pop edi pop edx pop ecx pop ebx pop eax emms } if (y == height_y-1) { /* last line of output - we have used the temp_buff and need to copy... */ int x = 3 * width_y; /* interation counter */ unsigned char *ps = puc_out; /* source pointer (temporary line store) */ unsigned char *pd = puc_out_remembered; /* dest pointer */ while (x--) *(pd++) = *(ps++); /* copy the line */ } puc_y += stride_y; if (y%2) { puc_u += stride_uv; puc_v += stride_uv; } puc_out += stride_out; }
}
上面是视频解码的视频格式转换的代码,由于x64不支持内嵌汇编,在网上也找了关于移植的文件,我的没有任何汇编基础,还请会的帮我转下汇编,就是把内嵌汇编这段单独挡在一个文件(.asm)中,然后能编译通过,我用的环境是vs2010的x64 ,请知道帮下忙,谢谢
解决方案
http://bbs.pediy.com/archive/index.php?t-182669.html
时间: 2022-12-20