00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030 #include <stddef.h>
00031 #include <inttypes.h>
00032
00033 #ifndef __WORDSIZE
00034
00035 #define __WORDSIZE MP_WORDSIZE
00036 #endif
00037
00038 #undef PREFETCH
00039 #undef MOVNTQ
00040 #undef EMMS
00041 #undef SFENCE
00042 #undef MMREG_SIZE
00043 #undef PREFETCHW
00044 #undef PAVGB
00045
00046 #ifdef HAVE_SSE2
00047 #define MMREG_SIZE 16
00048 #else
00049 #define MMREG_SIZE 8
00050 #endif
00051
00052 #ifdef HAVE_3DNOW
00053 #define PREFETCH "prefetch"
00054 #define PREFETCHW "prefetchw"
00055 #define PAVGB "pavgusb"
00056 #elif defined (HAVE_MMX2)
00057 #define PREFETCH "prefetchnta"
00058 #define PREFETCHW "prefetcht0"
00059 #define PAVGB "pavgb"
00060 #else
00061 #ifdef __APPLE__
00062 #define PREFETCH "#"
00063 #define PREFETCHW "#"
00064 #else
00065 #define PREFETCH " # nop"
00066 #define PREFETCHW " # nop"
00067 #endif
00068 #endif
00069
00070 #ifdef HAVE_3DNOW
00071
00072 #define EMMS "femms"
00073 #else
00074 #define EMMS "emms"
00075 #endif
00076
00077 #ifdef HAVE_MMX2
00078 #define MOVNTQ "movntq"
00079 #define SFENCE "sfence"
00080 #else
00081 #define MOVNTQ "movq"
00082 #define SFENCE " # nop"
00083 #endif
00084
00085 static inline void RENAME(rgb24to32)(const uint8_t *src, uint8_t *dst, long src_size)
00086 {
00087 uint8_t *dest = dst;
00088 const uint8_t *s = src;
00089 const uint8_t *end;
00090 #ifdef HAVE_MMX
00091 const uint8_t *mm_end;
00092 #endif
00093 end = s + src_size;
00094 #ifdef HAVE_MMX
00095 __asm __volatile(PREFETCH" %0"::"m"(*s):"memory");
00096 mm_end = end - 23;
00097 __asm __volatile("movq %0, %%mm7"::"m"(mask32):"memory");
00098 while (s < mm_end)
00099 {
00100 __asm __volatile(
00101 PREFETCH" 32%1 \n\t"
00102 "movd %1, %%mm0 \n\t"
00103 "punpckldq 3%1, %%mm0 \n\t"
00104 "movd 6%1, %%mm1 \n\t"
00105 "punpckldq 9%1, %%mm1 \n\t"
00106 "movd 12%1, %%mm2 \n\t"
00107 "punpckldq 15%1, %%mm2 \n\t"
00108 "movd 18%1, %%mm3 \n\t"
00109 "punpckldq 21%1, %%mm3 \n\t"
00110 "pand %%mm7, %%mm0 \n\t"
00111 "pand %%mm7, %%mm1 \n\t"
00112 "pand %%mm7, %%mm2 \n\t"
00113 "pand %%mm7, %%mm3 \n\t"
00114 MOVNTQ" %%mm0, %0 \n\t"
00115 MOVNTQ" %%mm1, 8%0 \n\t"
00116 MOVNTQ" %%mm2, 16%0 \n\t"
00117 MOVNTQ" %%mm3, 24%0"
00118 :"=m"(*dest)
00119 :"m"(*s)
00120 :"memory");
00121 dest += 32;
00122 s += 24;
00123 }
00124 __asm __volatile(SFENCE:::"memory");
00125 __asm __volatile(EMMS:::"memory");
00126 #endif
00127 while (s < end)
00128 {
00129 #ifdef WORDS_BIGENDIAN
00130
00131 *dest++ = 0;
00132 *dest++ = s[2];
00133 *dest++ = s[1];
00134 *dest++ = s[0];
00135 s+=3;
00136 #else
00137 *dest++ = *s++;
00138 *dest++ = *s++;
00139 *dest++ = *s++;
00140 *dest++ = 0;
00141 #endif
00142 }
00143 }
00144
00145 static inline void RENAME(rgb32to24)(const uint8_t *src, uint8_t *dst, long src_size)
00146 {
00147 uint8_t *dest = dst;
00148 const uint8_t *s = src;
00149 const uint8_t *end;
00150 #ifdef HAVE_MMX
00151 const uint8_t *mm_end;
00152 #endif
00153 end = s + src_size;
00154 #ifdef HAVE_MMX
00155 __asm __volatile(PREFETCH" %0"::"m"(*s):"memory");
00156 mm_end = end - 31;
00157 while (s < mm_end)
00158 {
00159 __asm __volatile(
00160 PREFETCH" 32%1 \n\t"
00161 "movq %1, %%mm0 \n\t"
00162 "movq 8%1, %%mm1 \n\t"
00163 "movq 16%1, %%mm4 \n\t"
00164 "movq 24%1, %%mm5 \n\t"
00165 "movq %%mm0, %%mm2 \n\t"
00166 "movq %%mm1, %%mm3 \n\t"
00167 "movq %%mm4, %%mm6 \n\t"
00168 "movq %%mm5, %%mm7 \n\t"
00169 "psrlq $8, %%mm2 \n\t"
00170 "psrlq $8, %%mm3 \n\t"
00171 "psrlq $8, %%mm6 \n\t"
00172 "psrlq $8, %%mm7 \n\t"
00173 "pand %2, %%mm0 \n\t"
00174 "pand %2, %%mm1 \n\t"
00175 "pand %2, %%mm4 \n\t"
00176 "pand %2, %%mm5 \n\t"
00177 "pand %3, %%mm2 \n\t"
00178 "pand %3, %%mm3 \n\t"
00179 "pand %3, %%mm6 \n\t"
00180 "pand %3, %%mm7 \n\t"
00181 "por %%mm2, %%mm0 \n\t"
00182 "por %%mm3, %%mm1 \n\t"
00183 "por %%mm6, %%mm4 \n\t"
00184 "por %%mm7, %%mm5 \n\t"
00185
00186 "movq %%mm1, %%mm2 \n\t"
00187 "movq %%mm4, %%mm3 \n\t"
00188 "psllq $48, %%mm2 \n\t"
00189 "psllq $32, %%mm3 \n\t"
00190 "pand %4, %%mm2 \n\t"
00191 "pand %5, %%mm3 \n\t"
00192 "por %%mm2, %%mm0 \n\t"
00193 "psrlq $16, %%mm1 \n\t"
00194 "psrlq $32, %%mm4 \n\t"
00195 "psllq $16, %%mm5 \n\t"
00196 "por %%mm3, %%mm1 \n\t"
00197 "pand %6, %%mm5 \n\t"
00198 "por %%mm5, %%mm4 \n\t"
00199
00200 MOVNTQ" %%mm0, %0 \n\t"
00201 MOVNTQ" %%mm1, 8%0 \n\t"
00202 MOVNTQ" %%mm4, 16%0"
00203 :"=m"(*dest)
00204 :"m"(*s),"m"(mask24l),
00205 "m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh)
00206 :"memory");
00207 dest += 24;
00208 s += 32;
00209 }
00210 __asm __volatile(SFENCE:::"memory");
00211 __asm __volatile(EMMS:::"memory");
00212 #endif
00213 while (s < end)
00214 {
00215 #ifdef WORDS_BIGENDIAN
00216
00217 s++;
00218 dest[2] = *s++;
00219 dest[1] = *s++;
00220 dest[0] = *s++;
00221 dest += 3;
00222 #else
00223 *dest++ = *s++;
00224 *dest++ = *s++;
00225 *dest++ = *s++;
00226 s++;
00227 #endif
00228 }
00229 }
00230
00231
00232
00233
00234
00235
00236
00237 static inline void RENAME(rgb15to16)(const uint8_t *src, uint8_t *dst, long src_size)
00238 {
00239 register const uint8_t* s=src;
00240 register uint8_t* d=dst;
00241 register const uint8_t *end;
00242 const uint8_t *mm_end;
00243 end = s + src_size;
00244 #ifdef HAVE_MMX
00245 __asm __volatile(PREFETCH" %0"::"m"(*s));
00246 __asm __volatile("movq %0, %%mm4"::"m"(mask15s));
00247 mm_end = end - 15;
00248 while (s<mm_end)
00249 {
00250 __asm __volatile(
00251 PREFETCH" 32%1 \n\t"
00252 "movq %1, %%mm0 \n\t"
00253 "movq 8%1, %%mm2 \n\t"
00254 "movq %%mm0, %%mm1 \n\t"
00255 "movq %%mm2, %%mm3 \n\t"
00256 "pand %%mm4, %%mm0 \n\t"
00257 "pand %%mm4, %%mm2 \n\t"
00258 "paddw %%mm1, %%mm0 \n\t"
00259 "paddw %%mm3, %%mm2 \n\t"
00260 MOVNTQ" %%mm0, %0 \n\t"
00261 MOVNTQ" %%mm2, 8%0"
00262 :"=m"(*d)
00263 :"m"(*s)
00264 );
00265 d+=16;
00266 s+=16;
00267 }
00268 __asm __volatile(SFENCE:::"memory");
00269 __asm __volatile(EMMS:::"memory");
00270 #endif
00271 mm_end = end - 3;
00272 while (s < mm_end)
00273 {
00274 register unsigned x= *((uint32_t *)s);
00275 *((uint32_t *)d) = (x&0x7FFF7FFF) + (x&0x7FE07FE0);
00276 d+=4;
00277 s+=4;
00278 }
00279 if (s < end)
00280 {
00281 register unsigned short x= *((uint16_t *)s);
00282 *((uint16_t *)d) = (x&0x7FFF) + (x&0x7FE0);
00283 }
00284 }
00285
00286 static inline void RENAME(rgb16to15)(const uint8_t *src, uint8_t *dst, long src_size)
00287 {
00288 register const uint8_t* s=src;
00289 register uint8_t* d=dst;
00290 register const uint8_t *end;
00291 const uint8_t *mm_end;
00292 end = s + src_size;
00293 #ifdef HAVE_MMX
00294 __asm __volatile(PREFETCH" %0"::"m"(*s));
00295 __asm __volatile("movq %0, %%mm7"::"m"(mask15rg));
00296 __asm __volatile("movq %0, %%mm6"::"m"(mask15b));
00297 mm_end = end - 15;
00298 while (s<mm_end)
00299 {
00300 __asm __volatile(
00301 PREFETCH" 32%1 \n\t"
00302 "movq %1, %%mm0 \n\t"
00303 "movq 8%1, %%mm2 \n\t"
00304 "movq %%mm0, %%mm1 \n\t"
00305 "movq %%mm2, %%mm3 \n\t"
00306 "psrlq $1, %%mm0 \n\t"
00307 "psrlq $1, %%mm2 \n\t"
00308 "pand %%mm7, %%mm0 \n\t"
00309 "pand %%mm7, %%mm2 \n\t"
00310 "pand %%mm6, %%mm1 \n\t"
00311 "pand %%mm6, %%mm3 \n\t"
00312 "por %%mm1, %%mm0 \n\t"
00313 "por %%mm3, %%mm2 \n\t"
00314 MOVNTQ" %%mm0, %0 \n\t"
00315 MOVNTQ" %%mm2, 8%0"
00316 :"=m"(*d)
00317 :"m"(*s)
00318 );
00319 d+=16;
00320 s+=16;
00321 }
00322 __asm __volatile(SFENCE:::"memory");
00323 __asm __volatile(EMMS:::"memory");
00324 #endif
00325 mm_end = end - 3;
00326 while (s < mm_end)
00327 {
00328 register uint32_t x= *((uint32_t *)s);
00329 *((uint32_t *)d) = ((x>>1)&0x7FE07FE0) | (x&0x001F001F);
00330 s+=4;
00331 d+=4;
00332 }
00333 if (s < end)
00334 {
00335 register uint16_t x= *((uint16_t *)s);
00336 *((uint16_t *)d) = ((x>>1)&0x7FE0) | (x&0x001F);
00337 s+=2;
00338 d+=2;
00339 }
00340 }
00341
00342 static inline void RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, long src_size)
00343 {
00344 const uint8_t *s = src;
00345 const uint8_t *end;
00346 #ifdef HAVE_MMX
00347 const uint8_t *mm_end;
00348 #endif
00349 uint16_t *d = (uint16_t *)dst;
00350 end = s + src_size;
00351 #ifdef HAVE_MMX
00352 mm_end = end - 15;
00353 #if 1 //is faster only if multiplies are reasonably fast (FIXME figure out on which CPUs this is faster, on Athlon it is slightly faster)
00354 asm volatile(
00355 "movq %3, %%mm5 \n\t"
00356 "movq %4, %%mm6 \n\t"
00357 "movq %5, %%mm7 \n\t"
00358 "jmp 2f \n\t"
00359 ASMALIGN(4)
00360 "1: \n\t"
00361 PREFETCH" 32(%1) \n\t"
00362 "movd (%1), %%mm0 \n\t"
00363 "movd 4(%1), %%mm3 \n\t"
00364 "punpckldq 8(%1), %%mm0 \n\t"
00365 "punpckldq 12(%1), %%mm3 \n\t"
00366 "movq %%mm0, %%mm1 \n\t"
00367 "movq %%mm3, %%mm4 \n\t"
00368 "pand %%mm6, %%mm0 \n\t"
00369 "pand %%mm6, %%mm3 \n\t"
00370 "pmaddwd %%mm7, %%mm0 \n\t"
00371 "pmaddwd %%mm7, %%mm3 \n\t"
00372 "pand %%mm5, %%mm1 \n\t"
00373 "pand %%mm5, %%mm4 \n\t"
00374 "por %%mm1, %%mm0 \n\t"
00375 "por %%mm4, %%mm3 \n\t"
00376 "psrld $5, %%mm0 \n\t"
00377 "pslld $11, %%mm3 \n\t"
00378 "por %%mm3, %%mm0 \n\t"
00379 MOVNTQ" %%mm0, (%0) \n\t"
00380 "add $16, %1 \n\t"
00381 "add $8, %0 \n\t"
00382 "2: \n\t"
00383 "cmp %2, %1 \n\t"
00384 " jb 1b \n\t"
00385 : "+r" (d), "+r"(s)
00386 : "r" (mm_end), "m" (mask3216g), "m" (mask3216br), "m" (mul3216)
00387 );
00388 #else
00389 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory");
00390 __asm __volatile(
00391 "movq %0, %%mm7 \n\t"
00392 "movq %1, %%mm6 \n\t"
00393 ::"m"(red_16mask),"m"(green_16mask));
00394 while (s < mm_end)
00395 {
00396 __asm __volatile(
00397 PREFETCH" 32%1 \n\t"
00398 "movd %1, %%mm0 \n\t"
00399 "movd 4%1, %%mm3 \n\t"
00400 "punpckldq 8%1, %%mm0 \n\t"
00401 "punpckldq 12%1, %%mm3 \n\t"
00402 "movq %%mm0, %%mm1 \n\t"
00403 "movq %%mm0, %%mm2 \n\t"
00404 "movq %%mm3, %%mm4 \n\t"
00405 "movq %%mm3, %%mm5 \n\t"
00406 "psrlq $3, %%mm0 \n\t"
00407 "psrlq $3, %%mm3 \n\t"
00408 "pand %2, %%mm0 \n\t"
00409 "pand %2, %%mm3 \n\t"
00410 "psrlq $5, %%mm1 \n\t"
00411 "psrlq $5, %%mm4 \n\t"
00412 "pand %%mm6, %%mm1 \n\t"
00413 "pand %%mm6, %%mm4 \n\t"
00414 "psrlq $8, %%mm2 \n\t"
00415 "psrlq $8, %%mm5 \n\t"
00416 "pand %%mm7, %%mm2 \n\t"
00417 "pand %%mm7, %%mm5 \n\t"
00418 "por %%mm1, %%mm0 \n\t"
00419 "por %%mm4, %%mm3 \n\t"
00420 "por %%mm2, %%mm0 \n\t"
00421 "por %%mm5, %%mm3 \n\t"
00422 "psllq $16, %%mm3 \n\t"
00423 "por %%mm3, %%mm0 \n\t"
00424 MOVNTQ" %%mm0, %0 \n\t"
00425 :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
00426 d += 4;
00427 s += 16;
00428 }
00429 #endif
00430 __asm __volatile(SFENCE:::"memory");
00431 __asm __volatile(EMMS:::"memory");
00432 #endif
00433 while (s < end)
00434 {
00435 register int rgb = *(uint32_t*)s; s += 4;
00436 *d++ = ((rgb&0xFF)>>3) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>8);
00437 }
00438 }
00439
00440 static inline void RENAME(rgb32tobgr16)(const uint8_t *src, uint8_t *dst, long src_size)
00441 {
00442 const uint8_t *s = src;
00443 const uint8_t *end;
00444 #ifdef HAVE_MMX
00445 const uint8_t *mm_end;
00446 #endif
00447 uint16_t *d = (uint16_t *)dst;
00448 end = s + src_size;
00449 #ifdef HAVE_MMX
00450 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory");
00451 __asm __volatile(
00452 "movq %0, %%mm7 \n\t"
00453 "movq %1, %%mm6 \n\t"
00454 ::"m"(red_16mask),"m"(green_16mask));
00455 mm_end = end - 15;
00456 while (s < mm_end)
00457 {
00458 __asm __volatile(
00459 PREFETCH" 32%1 \n\t"
00460 "movd %1, %%mm0 \n\t"
00461 "movd 4%1, %%mm3 \n\t"
00462 "punpckldq 8%1, %%mm0 \n\t"
00463 "punpckldq 12%1, %%mm3 \n\t"
00464 "movq %%mm0, %%mm1 \n\t"
00465 "movq %%mm0, %%mm2 \n\t"
00466 "movq %%mm3, %%mm4 \n\t"
00467 "movq %%mm3, %%mm5 \n\t"
00468 "psllq $8, %%mm0 \n\t"
00469 "psllq $8, %%mm3 \n\t"
00470 "pand %%mm7, %%mm0 \n\t"
00471 "pand %%mm7, %%mm3 \n\t"
00472 "psrlq $5, %%mm1 \n\t"
00473 "psrlq $5, %%mm4 \n\t"
00474 "pand %%mm6, %%mm1 \n\t"
00475 "pand %%mm6, %%mm4 \n\t"
00476 "psrlq $19, %%mm2 \n\t"
00477 "psrlq $19, %%mm5 \n\t"
00478 "pand %2, %%mm2 \n\t"
00479 "pand %2, %%mm5 \n\t"
00480 "por %%mm1, %%mm0 \n\t"
00481 "por %%mm4, %%mm3 \n\t"
00482 "por %%mm2, %%mm0 \n\t"
00483 "por %%mm5, %%mm3 \n\t"
00484 "psllq $16, %%mm3 \n\t"
00485 "por %%mm3, %%mm0 \n\t"
00486 MOVNTQ" %%mm0, %0 \n\t"
00487 :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
00488 d += 4;
00489 s += 16;
00490 }
00491 __asm __volatile(SFENCE:::"memory");
00492 __asm __volatile(EMMS:::"memory");
00493 #endif
00494 while (s < end)
00495 {
00496 register int rgb = *(uint32_t*)s; s += 4;
00497 *d++ = ((rgb&0xF8)<<8) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>19);
00498 }
00499 }
00500
00501 static inline void RENAME(rgb32to15)(const uint8_t *src, uint8_t *dst, long src_size)
00502 {
00503 const uint8_t *s = src;
00504 const uint8_t *end;
00505 #ifdef HAVE_MMX
00506 const uint8_t *mm_end;
00507 #endif
00508 uint16_t *d = (uint16_t *)dst;
00509 end = s + src_size;
00510 #ifdef HAVE_MMX
00511 mm_end = end - 15;
00512 #if 1 //is faster only if multiplies are reasonably fast (FIXME figure out on which CPUs this is faster, on Athlon it is slightly faster)
00513 asm volatile(
00514 "movq %3, %%mm5 \n\t"
00515 "movq %4, %%mm6 \n\t"
00516 "movq %5, %%mm7 \n\t"
00517 "jmp 2f \n\t"
00518 ASMALIGN(4)
00519 "1: \n\t"
00520 PREFETCH" 32(%1) \n\t"
00521 "movd (%1), %%mm0 \n\t"
00522 "movd 4(%1), %%mm3 \n\t"
00523 "punpckldq 8(%1), %%mm0 \n\t"
00524 "punpckldq 12(%1), %%mm3 \n\t"
00525 "movq %%mm0, %%mm1 \n\t"
00526 "movq %%mm3, %%mm4 \n\t"
00527 "pand %%mm6, %%mm0 \n\t"
00528 "pand %%mm6, %%mm3 \n\t"
00529 "pmaddwd %%mm7, %%mm0 \n\t"
00530 "pmaddwd %%mm7, %%mm3 \n\t"
00531 "pand %%mm5, %%mm1 \n\t"
00532 "pand %%mm5, %%mm4 \n\t"
00533 "por %%mm1, %%mm0 \n\t"
00534 "por %%mm4, %%mm3 \n\t"
00535 "psrld $6, %%mm0 \n\t"
00536 "pslld $10, %%mm3 \n\t"
00537 "por %%mm3, %%mm0 \n\t"
00538 MOVNTQ" %%mm0, (%0) \n\t"
00539 "add $16, %1 \n\t"
00540 "add $8, %0 \n\t"
00541 "2: \n\t"
00542 "cmp %2, %1 \n\t"
00543 " jb 1b \n\t"
00544 : "+r" (d), "+r"(s)
00545 : "r" (mm_end), "m" (mask3215g), "m" (mask3216br), "m" (mul3215)
00546 );
00547 #else
00548 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory");
00549 __asm __volatile(
00550 "movq %0, %%mm7 \n\t"
00551 "movq %1, %%mm6 \n\t"
00552 ::"m"(red_15mask),"m"(green_15mask));
00553 while (s < mm_end)
00554 {
00555 __asm __volatile(
00556 PREFETCH" 32%1 \n\t"
00557 "movd %1, %%mm0 \n\t"
00558 "movd 4%1, %%mm3 \n\t"
00559 "punpckldq 8%1, %%mm0 \n\t"
00560 "punpckldq 12%1, %%mm3 \n\t"
00561 "movq %%mm0, %%mm1 \n\t"
00562 "movq %%mm0, %%mm2 \n\t"
00563 "movq %%mm3, %%mm4 \n\t"
00564 "movq %%mm3, %%mm5 \n\t"
00565 "psrlq $3, %%mm0 \n\t"
00566 "psrlq $3, %%mm3 \n\t"
00567 "pand %2, %%mm0 \n\t"
00568 "pand %2, %%mm3 \n\t"
00569 "psrlq $6, %%mm1 \n\t"
00570 "psrlq $6, %%mm4 \n\t"
00571 "pand %%mm6, %%mm1 \n\t"
00572 "pand %%mm6, %%mm4 \n\t"
00573 "psrlq $9, %%mm2 \n\t"
00574 "psrlq $9, %%mm5 \n\t"
00575 "pand %%mm7, %%mm2 \n\t"
00576 "pand %%mm7, %%mm5 \n\t"
00577 "por %%mm1, %%mm0 \n\t"
00578 "por %%mm4, %%mm3 \n\t"
00579 "por %%mm2, %%mm0 \n\t"
00580 "por %%mm5, %%mm3 \n\t"
00581 "psllq $16, %%mm3 \n\t"
00582 "por %%mm3, %%mm0 \n\t"
00583 MOVNTQ" %%mm0, %0 \n\t"
00584 :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
00585 d += 4;
00586 s += 16;
00587 }
00588 #endif
00589 __asm __volatile(SFENCE:::"memory");
00590 __asm __volatile(EMMS:::"memory");
00591 #endif
00592 while (s < end)
00593 {
00594 register int rgb = *(uint32_t*)s; s += 4;
00595 *d++ = ((rgb&0xFF)>>3) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>9);
00596 }
00597 }
00598
00599 static inline void RENAME(rgb32tobgr15)(const uint8_t *src, uint8_t *dst, long src_size)
00600 {
00601 const uint8_t *s = src;
00602 const uint8_t *end;
00603 #ifdef HAVE_MMX
00604 const uint8_t *mm_end;
00605 #endif
00606 uint16_t *d = (uint16_t *)dst;
00607 end = s + src_size;
00608 #ifdef HAVE_MMX
00609 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory");
00610 __asm __volatile(
00611 "movq %0, %%mm7 \n\t"
00612 "movq %1, %%mm6 \n\t"
00613 ::"m"(red_15mask),"m"(green_15mask));
00614 mm_end = end - 15;
00615 while (s < mm_end)
00616 {
00617 __asm __volatile(
00618 PREFETCH" 32%1 \n\t"
00619 "movd %1, %%mm0 \n\t"
00620 "movd 4%1, %%mm3 \n\t"
00621 "punpckldq 8%1, %%mm0 \n\t"
00622 "punpckldq 12%1, %%mm3 \n\t"
00623 "movq %%mm0, %%mm1 \n\t"
00624 "movq %%mm0, %%mm2 \n\t"
00625 "movq %%mm3, %%mm4 \n\t"
00626 "movq %%mm3, %%mm5 \n\t"
00627 "psllq $7, %%mm0 \n\t"
00628 "psllq $7, %%mm3 \n\t"
00629 "pand %%mm7, %%mm0 \n\t"
00630 "pand %%mm7, %%mm3 \n\t"
00631 "psrlq $6, %%mm1 \n\t"
00632 "psrlq $6, %%mm4 \n\t"
00633 "pand %%mm6, %%mm1 \n\t"
00634 "pand %%mm6, %%mm4 \n\t"
00635 "psrlq $19, %%mm2 \n\t"
00636 "psrlq $19, %%mm5 \n\t"
00637 "pand %2, %%mm2 \n\t"
00638 "pand %2, %%mm5 \n\t"
00639 "por %%mm1, %%mm0 \n\t"
00640 "por %%mm4, %%mm3 \n\t"
00641 "por %%mm2, %%mm0 \n\t"
00642 "por %%mm5, %%mm3 \n\t"
00643 "psllq $16, %%mm3 \n\t"
00644 "por %%mm3, %%mm0 \n\t"
00645 MOVNTQ" %%mm0, %0 \n\t"
00646 :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
00647 d += 4;
00648 s += 16;
00649 }
00650 __asm __volatile(SFENCE:::"memory");
00651 __asm __volatile(EMMS:::"memory");
00652 #endif
00653 while (s < end)
00654 {
00655 register int rgb = *(uint32_t*)s; s += 4;
00656 *d++ = ((rgb&0xF8)<<7) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>19);
00657 }
00658 }
00659
00660 static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, long src_size)
00661 {
00662 const uint8_t *s = src;
00663 const uint8_t *end;
00664 #ifdef HAVE_MMX
00665 const uint8_t *mm_end;
00666 #endif
00667 uint16_t *d = (uint16_t *)dst;
00668 end = s + src_size;
00669 #ifdef HAVE_MMX
00670 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory");
00671 __asm __volatile(
00672 "movq %0, %%mm7 \n\t"
00673 "movq %1, %%mm6 \n\t"
00674 ::"m"(red_16mask),"m"(green_16mask));
00675 mm_end = end - 11;
00676 while (s < mm_end)
00677 {
00678 __asm __volatile(
00679 PREFETCH" 32%1 \n\t"
00680 "movd %1, %%mm0 \n\t"
00681 "movd 3%1, %%mm3 \n\t"
00682 "punpckldq 6%1, %%mm0 \n\t"
00683 "punpckldq 9%1, %%mm3 \n\t"
00684 "movq %%mm0, %%mm1 \n\t"
00685 "movq %%mm0, %%mm2 \n\t"
00686 "movq %%mm3, %%mm4 \n\t"
00687 "movq %%mm3, %%mm5 \n\t"
00688 "psrlq $3, %%mm0 \n\t"
00689 "psrlq $3, %%mm3 \n\t"
00690 "pand %2, %%mm0 \n\t"
00691 "pand %2, %%mm3 \n\t"
00692 "psrlq $5, %%mm1 \n\t"
00693 "psrlq $5, %%mm4 \n\t"
00694 "pand %%mm6, %%mm1 \n\t"
00695 "pand %%mm6, %%mm4 \n\t"
00696 "psrlq $8, %%mm2 \n\t"
00697 "psrlq $8, %%mm5 \n\t"
00698 "pand %%mm7, %%mm2 \n\t"
00699 "pand %%mm7, %%mm5 \n\t"
00700 "por %%mm1, %%mm0 \n\t"
00701 "por %%mm4, %%mm3 \n\t"
00702 "por %%mm2, %%mm0 \n\t"
00703 "por %%mm5, %%mm3 \n\t"
00704 "psllq $16, %%mm3 \n\t"
00705 "por %%mm3, %%mm0 \n\t"
00706 MOVNTQ" %%mm0, %0 \n\t"
00707 :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
00708 d += 4;
00709 s += 12;
00710 }
00711 __asm __volatile(SFENCE:::"memory");
00712 __asm __volatile(EMMS:::"memory");
00713 #endif
00714 while (s < end)
00715 {
00716 const int b = *s++;
00717 const int g = *s++;
00718 const int r = *s++;
00719 *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8);
00720 }
00721 }
00722
00723 static inline void RENAME(rgb24tobgr16)(const uint8_t *src, uint8_t *dst, long src_size)
00724 {
00725 const uint8_t *s = src;
00726 const uint8_t *end;
00727 #ifdef HAVE_MMX
00728 const uint8_t *mm_end;
00729 #endif
00730 uint16_t *d = (uint16_t *)dst;
00731 end = s + src_size;
00732 #ifdef HAVE_MMX
00733 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory");
00734 __asm __volatile(
00735 "movq %0, %%mm7 \n\t"
00736 "movq %1, %%mm6 \n\t"
00737 ::"m"(red_16mask),"m"(green_16mask));
00738 mm_end = end - 15;
00739 while (s < mm_end)
00740 {
00741 __asm __volatile(
00742 PREFETCH" 32%1 \n\t"
00743 "movd %1, %%mm0 \n\t"
00744 "movd 3%1, %%mm3 \n\t"
00745 "punpckldq 6%1, %%mm0 \n\t"
00746 "punpckldq 9%1, %%mm3 \n\t"
00747 "movq %%mm0, %%mm1 \n\t"
00748 "movq %%mm0, %%mm2 \n\t"
00749 "movq %%mm3, %%mm4 \n\t"
00750 "movq %%mm3, %%mm5 \n\t"
00751 "psllq $8, %%mm0 \n\t"
00752 "psllq $8, %%mm3 \n\t"
00753 "pand %%mm7, %%mm0 \n\t"
00754 "pand %%mm7, %%mm3 \n\t"
00755 "psrlq $5, %%mm1 \n\t"
00756 "psrlq $5, %%mm4 \n\t"
00757 "pand %%mm6, %%mm1 \n\t"
00758 "pand %%mm6, %%mm4 \n\t"
00759 "psrlq $19, %%mm2 \n\t"
00760 "psrlq $19, %%mm5 \n\t"
00761 "pand %2, %%mm2 \n\t"
00762 "pand %2, %%mm5 \n\t"
00763 "por %%mm1, %%mm0 \n\t"
00764 "por %%mm4, %%mm3 \n\t"
00765 "por %%mm2, %%mm0 \n\t"
00766 "por %%mm5, %%mm3 \n\t"
00767 "psllq $16, %%mm3 \n\t"
00768 "por %%mm3, %%mm0 \n\t"
00769 MOVNTQ" %%mm0, %0 \n\t"
00770 :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
00771 d += 4;
00772 s += 12;
00773 }
00774 __asm __volatile(SFENCE:::"memory");
00775 __asm __volatile(EMMS:::"memory");
00776 #endif
00777 while (s < end)
00778 {
00779 const int r = *s++;
00780 const int g = *s++;
00781 const int b = *s++;
00782 *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8);
00783 }
00784 }
00785
00786 static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, long src_size)
00787 {
00788 const uint8_t *s = src;
00789 const uint8_t *end;
00790 #ifdef HAVE_MMX
00791 const uint8_t *mm_end;
00792 #endif
00793 uint16_t *d = (uint16_t *)dst;
00794 end = s + src_size;
00795 #ifdef HAVE_MMX
00796 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory");
00797 __asm __volatile(
00798 "movq %0, %%mm7 \n\t"
00799 "movq %1, %%mm6 \n\t"
00800 ::"m"(red_15mask),"m"(green_15mask));
00801 mm_end = end - 11;
00802 while (s < mm_end)
00803 {
00804 __asm __volatile(
00805 PREFETCH" 32%1 \n\t"
00806 "movd %1, %%mm0 \n\t"
00807 "movd 3%1, %%mm3 \n\t"
00808 "punpckldq 6%1, %%mm0 \n\t"
00809 "punpckldq 9%1, %%mm3 \n\t"
00810 "movq %%mm0, %%mm1 \n\t"
00811 "movq %%mm0, %%mm2 \n\t"
00812 "movq %%mm3, %%mm4 \n\t"
00813 "movq %%mm3, %%mm5 \n\t"
00814 "psrlq $3, %%mm0 \n\t"
00815 "psrlq $3, %%mm3 \n\t"
00816 "pand %2, %%mm0 \n\t"
00817 "pand %2, %%mm3 \n\t"
00818 "psrlq $6, %%mm1 \n\t"
00819 "psrlq $6, %%mm4 \n\t"
00820 "pand %%mm6, %%mm1 \n\t"
00821 "pand %%mm6, %%mm4 \n\t"
00822 "psrlq $9, %%mm2 \n\t"
00823 "psrlq $9, %%mm5 \n\t"
00824 "pand %%mm7, %%mm2 \n\t"
00825 "pand %%mm7, %%mm5 \n\t"
00826 "por %%mm1, %%mm0 \n\t"
00827 "por %%mm4, %%mm3 \n\t"
00828 "por %%mm2, %%mm0 \n\t"
00829 "por %%mm5, %%mm3 \n\t"
00830 "psllq $16, %%mm3 \n\t"
00831 "por %%mm3, %%mm0 \n\t"
00832 MOVNTQ" %%mm0, %0 \n\t"
00833 :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
00834 d += 4;
00835 s += 12;
00836 }
00837 __asm __volatile(SFENCE:::"memory");
00838 __asm __volatile(EMMS:::"memory");
00839 #endif
00840 while (s < end)
00841 {
00842 const int b = *s++;
00843 const int g = *s++;
00844 const int r = *s++;
00845 *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7);
00846 }
00847 }
00848
00849 static inline void RENAME(rgb24tobgr15)(const uint8_t *src, uint8_t *dst, long src_size)
00850 {
00851 const uint8_t *s = src;
00852 const uint8_t *end;
00853 #ifdef HAVE_MMX
00854 const uint8_t *mm_end;
00855 #endif
00856 uint16_t *d = (uint16_t *)dst;
00857 end = s + src_size;
00858 #ifdef HAVE_MMX
00859 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory");
00860 __asm __volatile(
00861 "movq %0, %%mm7 \n\t"
00862 "movq %1, %%mm6 \n\t"
00863 ::"m"(red_15mask),"m"(green_15mask));
00864 mm_end = end - 15;
00865 while (s < mm_end)
00866 {
00867 __asm __volatile(
00868 PREFETCH" 32%1 \n\t"
00869 "movd %1, %%mm0 \n\t"
00870 "movd 3%1, %%mm3 \n\t"
00871 "punpckldq 6%1, %%mm0 \n\t"
00872 "punpckldq 9%1, %%mm3 \n\t"
00873 "movq %%mm0, %%mm1 \n\t"
00874 "movq %%mm0, %%mm2 \n\t"
00875 "movq %%mm3, %%mm4 \n\t"
00876 "movq %%mm3, %%mm5 \n\t"
00877 "psllq $7, %%mm0 \n\t"
00878 "psllq $7, %%mm3 \n\t"
00879 "pand %%mm7, %%mm0 \n\t"
00880 "pand %%mm7, %%mm3 \n\t"
00881 "psrlq $6, %%mm1 \n\t"
00882 "psrlq $6, %%mm4 \n\t"
00883 "pand %%mm6, %%mm1 \n\t"
00884 "pand %%mm6, %%mm4 \n\t"
00885 "psrlq $19, %%mm2 \n\t"
00886 "psrlq $19, %%mm5 \n\t"
00887 "pand %2, %%mm2 \n\t"
00888 "pand %2, %%mm5 \n\t"
00889 "por %%mm1, %%mm0 \n\t"
00890 "por %%mm4, %%mm3 \n\t"
00891 "por %%mm2, %%mm0 \n\t"
00892 "por %%mm5, %%mm3 \n\t"
00893 "psllq $16, %%mm3 \n\t"
00894 "por %%mm3, %%mm0 \n\t"
00895 MOVNTQ" %%mm0, %0 \n\t"
00896 :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
00897 d += 4;
00898 s += 12;
00899 }
00900 __asm __volatile(SFENCE:::"memory");
00901 __asm __volatile(EMMS:::"memory");
00902 #endif
00903 while (s < end)
00904 {
00905 const int r = *s++;
00906 const int g = *s++;
00907 const int b = *s++;
00908 *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7);
00909 }
00910 }
00911
00912
00913
00914
00915
00916
00917
00918
00919
00920
00921
00922
00923
00924
00925
00926
00927
00928
00929
00930
00931
00932
00933 static inline void RENAME(rgb15to24)(const uint8_t *src, uint8_t *dst, long src_size)
00934 {
00935 const uint16_t *end;
00936 #ifdef HAVE_MMX
00937 const uint16_t *mm_end;
00938 #endif
00939 uint8_t *d = (uint8_t *)dst;
00940 const uint16_t *s = (uint16_t *)src;
00941 end = s + src_size/2;
00942 #ifdef HAVE_MMX
00943 __asm __volatile(PREFETCH" %0"::"m"(*s):"memory");
00944 mm_end = end - 7;
00945 while (s < mm_end)
00946 {
00947 __asm __volatile(
00948 PREFETCH" 32%1 \n\t"
00949 "movq %1, %%mm0 \n\t"
00950 "movq %1, %%mm1 \n\t"
00951 "movq %1, %%mm2 \n\t"
00952 "pand %2, %%mm0 \n\t"
00953 "pand %3, %%mm1 \n\t"
00954 "pand %4, %%mm2 \n\t"
00955 "psllq $3, %%mm0 \n\t"
00956 "psrlq $2, %%mm1 \n\t"
00957 "psrlq $7, %%mm2 \n\t"
00958 "movq %%mm0, %%mm3 \n\t"
00959 "movq %%mm1, %%mm4 \n\t"
00960 "movq %%mm2, %%mm5 \n\t"
00961 "punpcklwd %5, %%mm0 \n\t"
00962 "punpcklwd %5, %%mm1 \n\t"
00963 "punpcklwd %5, %%mm2 \n\t"
00964 "punpckhwd %5, %%mm3 \n\t"
00965 "punpckhwd %5, %%mm4 \n\t"
00966 "punpckhwd %5, %%mm5 \n\t"
00967 "psllq $8, %%mm1 \n\t"
00968 "psllq $16, %%mm2 \n\t"
00969 "por %%mm1, %%mm0 \n\t"
00970 "por %%mm2, %%mm0 \n\t"
00971 "psllq $8, %%mm4 \n\t"
00972 "psllq $16, %%mm5 \n\t"
00973 "por %%mm4, %%mm3 \n\t"
00974 "por %%mm5, %%mm3 \n\t"
00975
00976 "movq %%mm0, %%mm6 \n\t"
00977 "movq %%mm3, %%mm7 \n\t"
00978
00979 "movq 8%1, %%mm0 \n\t"
00980 "movq 8%1, %%mm1 \n\t"
00981 "movq 8%1, %%mm2 \n\t"
00982 "pand %2, %%mm0 \n\t"
00983 "pand %3, %%mm1 \n\t"
00984 "pand %4, %%mm2 \n\t"
00985 "psllq $3, %%mm0 \n\t"
00986 "psrlq $2, %%mm1 \n\t"
00987 "psrlq $7, %%mm2 \n\t"
00988 "movq %%mm0, %%mm3 \n\t"
00989 "movq %%mm1, %%mm4 \n\t"
00990 "movq %%mm2, %%mm5 \n\t"
00991 "punpcklwd %5, %%mm0 \n\t"
00992 "punpcklwd %5, %%mm1 \n\t"
00993 "punpcklwd %5, %%mm2 \n\t"
00994 "punpckhwd %5, %%mm3 \n\t"
00995 "punpckhwd %5, %%mm4 \n\t"
00996 "punpckhwd %5, %%mm5 \n\t"
00997 "psllq $8, %%mm1 \n\t"
00998 "psllq $16, %%mm2 \n\t"
00999 "por %%mm1, %%mm0 \n\t"
01000 "por %%mm2, %%mm0 \n\t"
01001 "psllq $8, %%mm4 \n\t"
01002 "psllq $16, %%mm5 \n\t"
01003 "por %%mm4, %%mm3 \n\t"
01004 "por %%mm5, %%mm3 \n\t"
01005
01006 :"=m"(*d)
01007 :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r), "m"(mmx_null)
01008 :"memory");
01009
01010 __asm __volatile(
01011 "movq %%mm0, %%mm4 \n\t"
01012 "movq %%mm3, %%mm5 \n\t"
01013 "movq %%mm6, %%mm0 \n\t"
01014 "movq %%mm7, %%mm1 \n\t"
01015
01016 "movq %%mm4, %%mm6 \n\t"
01017 "movq %%mm5, %%mm7 \n\t"
01018 "movq %%mm0, %%mm2 \n\t"
01019 "movq %%mm1, %%mm3 \n\t"
01020
01021 "psrlq $8, %%mm2 \n\t"
01022 "psrlq $8, %%mm3 \n\t"
01023 "psrlq $8, %%mm6 \n\t"
01024 "psrlq $8, %%mm7 \n\t"
01025 "pand %2, %%mm0 \n\t"
01026 "pand %2, %%mm1 \n\t"
01027 "pand %2, %%mm4 \n\t"
01028 "pand %2, %%mm5 \n\t"
01029 "pand %3, %%mm2 \n\t"
01030 "pand %3, %%mm3 \n\t"
01031 "pand %3, %%mm6 \n\t"
01032 "pand %3, %%mm7 \n\t"
01033 "por %%mm2, %%mm0 \n\t"
01034 "por %%mm3, %%mm1 \n\t"
01035 "por %%mm6, %%mm4 \n\t"
01036 "por %%mm7, %%mm5 \n\t"
01037
01038 "movq %%mm1, %%mm2 \n\t"
01039 "movq %%mm4, %%mm3 \n\t"
01040 "psllq $48, %%mm2 \n\t"
01041 "psllq $32, %%mm3 \n\t"
01042 "pand %4, %%mm2 \n\t"
01043 "pand %5, %%mm3 \n\t"
01044 "por %%mm2, %%mm0 \n\t"
01045 "psrlq $16, %%mm1 \n\t"
01046 "psrlq $32, %%mm4 \n\t"
01047 "psllq $16, %%mm5 \n\t"
01048 "por %%mm3, %%mm1 \n\t"
01049 "pand %6, %%mm5 \n\t"
01050 "por %%mm5, %%mm4 \n\t"
01051
01052 MOVNTQ" %%mm0, %0 \n\t"
01053 MOVNTQ" %%mm1, 8%0 \n\t"
01054 MOVNTQ" %%mm4, 16%0"
01055
01056 :"=m"(*d)
01057 :"m"(*s),"m"(mask24l),"m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh)
01058 :"memory");
01059 d += 24;
01060 s += 8;
01061 }
01062 __asm __volatile(SFENCE:::"memory");
01063 __asm __volatile(EMMS:::"memory");
01064 #endif
01065 while (s < end)
01066 {
01067 register uint16_t bgr;
01068 bgr = *s++;
01069 *d++ = (bgr&0x1F)<<3;
01070 *d++ = (bgr&0x3E0)>>2;
01071 *d++ = (bgr&0x7C00)>>7;
01072 }
01073 }
01074
01075 static inline void RENAME(rgb16to24)(const uint8_t *src, uint8_t *dst, long src_size)
01076 {
01077 const uint16_t *end;
01078 #ifdef HAVE_MMX
01079 const uint16_t *mm_end;
01080 #endif
01081 uint8_t *d = (uint8_t *)dst;
01082 const uint16_t *s = (const uint16_t *)src;
01083 end = s + src_size/2;
01084 #ifdef HAVE_MMX
01085 __asm __volatile(PREFETCH" %0"::"m"(*s):"memory");
01086 mm_end = end - 7;
01087 while (s < mm_end)
01088 {
01089 __asm __volatile(
01090 PREFETCH" 32%1 \n\t"
01091 "movq %1, %%mm0 \n\t"
01092 "movq %1, %%mm1 \n\t"
01093 "movq %1, %%mm2 \n\t"
01094 "pand %2, %%mm0 \n\t"
01095 "pand %3, %%mm1 \n\t"
01096 "pand %4, %%mm2 \n\t"
01097 "psllq $3, %%mm0 \n\t"
01098 "psrlq $3, %%mm1 \n\t"
01099 "psrlq $8, %%mm2 \n\t"
01100 "movq %%mm0, %%mm3 \n\t"
01101 "movq %%mm1, %%mm4 \n\t"
01102 "movq %%mm2, %%mm5 \n\t"
01103 "punpcklwd %5, %%mm0 \n\t"
01104 "punpcklwd %5, %%mm1 \n\t"
01105 "punpcklwd %5, %%mm2 \n\t"
01106 "punpckhwd %5, %%mm3 \n\t"
01107 "punpckhwd %5, %%mm4 \n\t"
01108 "punpckhwd %5, %%mm5 \n\t"
01109 "psllq $8, %%mm1 \n\t"
01110 "psllq $16, %%mm2 \n\t"
01111 "por %%mm1, %%mm0 \n\t"
01112 "por %%mm2, %%mm0 \n\t"
01113 "psllq $8, %%mm4 \n\t"
01114 "psllq $16, %%mm5 \n\t"
01115 "por %%mm4, %%mm3 \n\t"
01116 "por %%mm5, %%mm3 \n\t"
01117
01118 "movq %%mm0, %%mm6 \n\t"
01119 "movq %%mm3, %%mm7 \n\t"
01120
01121 "movq 8%1, %%mm0 \n\t"
01122 "movq 8%1, %%mm1 \n\t"
01123 "movq 8%1, %%mm2 \n\t"
01124 "pand %2, %%mm0 \n\t"
01125 "pand %3, %%mm1 \n\t"
01126 "pand %4, %%mm2 \n\t"
01127 "psllq $3, %%mm0 \n\t"
01128 "psrlq $3, %%mm1 \n\t"
01129 "psrlq $8, %%mm2 \n\t"
01130 "movq %%mm0, %%mm3 \n\t"
01131 "movq %%mm1, %%mm4 \n\t"
01132 "movq %%mm2, %%mm5 \n\t"
01133 "punpcklwd %5, %%mm0 \n\t"
01134 "punpcklwd %5, %%mm1 \n\t"
01135 "punpcklwd %5, %%mm2 \n\t"
01136 "punpckhwd %5, %%mm3 \n\t"
01137 "punpckhwd %5, %%mm4 \n\t"
01138 "punpckhwd %5, %%mm5 \n\t"
01139 "psllq $8, %%mm1 \n\t"
01140 "psllq $16, %%mm2 \n\t"
01141 "por %%mm1, %%mm0 \n\t"
01142 "por %%mm2, %%mm0 \n\t"
01143 "psllq $8, %%mm4 \n\t"
01144 "psllq $16, %%mm5 \n\t"
01145 "por %%mm4, %%mm3 \n\t"
01146 "por %%mm5, %%mm3 \n\t"
01147 :"=m"(*d)
01148 :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mmx_null)
01149 :"memory");
01150
01151 __asm __volatile(
01152 "movq %%mm0, %%mm4 \n\t"
01153 "movq %%mm3, %%mm5 \n\t"
01154 "movq %%mm6, %%mm0 \n\t"
01155 "movq %%mm7, %%mm1 \n\t"
01156
01157 "movq %%mm4, %%mm6 \n\t"
01158 "movq %%mm5, %%mm7 \n\t"
01159 "movq %%mm0, %%mm2 \n\t"
01160 "movq %%mm1, %%mm3 \n\t"
01161
01162 "psrlq $8, %%mm2 \n\t"
01163 "psrlq $8, %%mm3 \n\t"
01164 "psrlq $8, %%mm6 \n\t"
01165 "psrlq $8, %%mm7 \n\t"
01166 "pand %2, %%mm0 \n\t"
01167 "pand %2, %%mm1 \n\t"
01168 "pand %2, %%mm4 \n\t"
01169 "pand %2, %%mm5 \n\t"
01170 "pand %3, %%mm2 \n\t"
01171 "pand %3, %%mm3 \n\t"
01172 "pand %3, %%mm6 \n\t"
01173 "pand %3, %%mm7 \n\t"
01174 "por %%mm2, %%mm0 \n\t"
01175 "por %%mm3, %%mm1 \n\t"
01176 "por %%mm6, %%mm4 \n\t"
01177 "por %%mm7, %%mm5 \n\t"
01178
01179 "movq %%mm1, %%mm2 \n\t"
01180 "movq %%mm4, %%mm3 \n\t"
01181 "psllq $48, %%mm2 \n\t"
01182 "psllq $32, %%mm3 \n\t"
01183 "pand %4, %%mm2 \n\t"
01184 "pand %5, %%mm3 \n\t"
01185 "por %%mm2, %%mm0 \n\t"
01186 "psrlq $16, %%mm1 \n\t"
01187 "psrlq $32, %%mm4 \n\t"
01188 "psllq $16, %%mm5 \n\t"
01189 "por %%mm3, %%mm1 \n\t"
01190 "pand %6, %%mm5 \n\t"
01191 "por %%mm5, %%mm4 \n\t"
01192
01193 MOVNTQ" %%mm0, %0 \n\t"
01194 MOVNTQ" %%mm1, 8%0 \n\t"
01195 MOVNTQ" %%mm4, 16%0"
01196
01197 :"=m"(*d)
01198 :"m"(*s),"m"(mask24l),"m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh)
01199 :"memory");
01200 d += 24;
01201 s += 8;
01202 }
01203 __asm __volatile(SFENCE:::"memory");
01204 __asm __volatile(EMMS:::"memory");
01205 #endif
01206 while (s < end)
01207 {
01208 register uint16_t bgr;
01209 bgr = *s++;
01210 *d++ = (bgr&0x1F)<<3;
01211 *d++ = (bgr&0x7E0)>>3;
01212 *d++ = (bgr&0xF800)>>8;
01213 }
01214 }
01215
01216 static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, long src_size)
01217 {
01218 const uint16_t *end;
01219 #ifdef HAVE_MMX
01220 const uint16_t *mm_end;
01221 #endif
01222 uint8_t *d = (uint8_t *)dst;
01223 const uint16_t *s = (const uint16_t *)src;
01224 end = s + src_size/2;
01225 #ifdef HAVE_MMX
01226 __asm __volatile(PREFETCH" %0"::"m"(*s):"memory");
01227 __asm __volatile("pxor %%mm7,%%mm7 \n\t":::"memory");
01228 mm_end = end - 3;
01229 while (s < mm_end)
01230 {
01231 __asm __volatile(
01232 PREFETCH" 32%1 \n\t"
01233 "movq %1, %%mm0 \n\t"
01234 "movq %1, %%mm1 \n\t"
01235 "movq %1, %%mm2 \n\t"
01236 "pand %2, %%mm0 \n\t"
01237 "pand %3, %%mm1 \n\t"
01238 "pand %4, %%mm2 \n\t"
01239 "psllq $3, %%mm0 \n\t"
01240 "psrlq $2, %%mm1 \n\t"
01241 "psrlq $7, %%mm2 \n\t"
01242 "movq %%mm0, %%mm3 \n\t"
01243 "movq %%mm1, %%mm4 \n\t"
01244 "movq %%mm2, %%mm5 \n\t"
01245 "punpcklwd %%mm7, %%mm0 \n\t"
01246 "punpcklwd %%mm7, %%mm1 \n\t"
01247 "punpcklwd %%mm7, %%mm2 \n\t"
01248 "punpckhwd %%mm7, %%mm3 \n\t"
01249 "punpckhwd %%mm7, %%mm4 \n\t"
01250 "punpckhwd %%mm7, %%mm5 \n\t"
01251 "psllq $8, %%mm1 \n\t"
01252 "psllq $16, %%mm2 \n\t"
01253 "por %%mm1, %%mm0 \n\t"
01254 "por %%mm2, %%mm0 \n\t"
01255 "psllq $8, %%mm4 \n\t"
01256 "psllq $16, %%mm5 \n\t"
01257 "por %%mm4, %%mm3 \n\t"
01258 "por %%mm5, %%mm3 \n\t"
01259 MOVNTQ" %%mm0, %0 \n\t"
01260 MOVNTQ" %%mm3, 8%0 \n\t"
01261 :"=m"(*d)
01262 :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r)
01263 :"memory");
01264 d += 16;
01265 s += 4;
01266 }
01267 __asm __volatile(SFENCE:::"memory");
01268 __asm __volatile(EMMS:::"memory");
01269 #endif
01270 while (s < end)
01271 {
01272 #if 0 //slightly slower on Athlon
01273 int bgr= *s++;
01274 *((uint32_t*)d)++ = ((bgr&0x1F)<<3) + ((bgr&0x3E0)<<6) + ((bgr&0x7C00)<<9);
01275 #else
01276 register uint16_t bgr;
01277 bgr = *s++;
01278 #ifdef WORDS_BIGENDIAN
01279 *d++ = 0;
01280 *d++ = (bgr&0x7C00)>>7;
01281 *d++ = (bgr&0x3E0)>>2;
01282 *d++ = (bgr&0x1F)<<3;
01283 #else
01284 *d++ = (bgr&0x1F)<<3;
01285 *d++ = (bgr&0x3E0)>>2;
01286 *d++ = (bgr&0x7C00)>>7;
01287 *d++ = 0;
01288 #endif
01289
01290 #endif
01291 }
01292 }
01293
01294 static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, long src_size)
01295 {
01296 const uint16_t *end;
01297 #ifdef HAVE_MMX
01298 const uint16_t *mm_end;
01299 #endif
01300 uint8_t *d = (uint8_t *)dst;
01301 const uint16_t *s = (uint16_t *)src;
01302 end = s + src_size/2;
01303 #ifdef HAVE_MMX
01304 __asm __volatile(PREFETCH" %0"::"m"(*s):"memory");
01305 __asm __volatile("pxor %%mm7,%%mm7 \n\t":::"memory");
01306 mm_end = end - 3;
01307 while (s < mm_end)
01308 {
01309 __asm __volatile(
01310 PREFETCH" 32%1 \n\t"
01311 "movq %1, %%mm0 \n\t"
01312 "movq %1, %%mm1 \n\t"
01313 "movq %1, %%mm2 \n\t"
01314 "pand %2, %%mm0 \n\t"
01315 "pand %3, %%mm1 \n\t"
01316 "pand %4, %%mm2 \n\t"
01317 "psllq $3, %%mm0 \n\t"
01318 "psrlq $3, %%mm1 \n\t"
01319 "psrlq $8, %%mm2 \n\t"
01320 "movq %%mm0, %%mm3 \n\t"
01321 "movq %%mm1, %%mm4 \n\t"
01322 "movq %%mm2, %%mm5 \n\t"
01323 "punpcklwd %%mm7, %%mm0 \n\t"
01324 "punpcklwd %%mm7, %%mm1 \n\t"
01325 "punpcklwd %%mm7, %%mm2 \n\t"
01326 "punpckhwd %%mm7, %%mm3 \n\t"
01327 "punpckhwd %%mm7, %%mm4 \n\t"
01328 "punpckhwd %%mm7, %%mm5 \n\t"
01329 "psllq $8, %%mm1 \n\t"
01330 "psllq $16, %%mm2 \n\t"
01331 "por %%mm1, %%mm0 \n\t"
01332 "por %%mm2, %%mm0 \n\t"
01333 "psllq $8, %%mm4 \n\t"
01334 "psllq $16, %%mm5 \n\t"
01335 "por %%mm4, %%mm3 \n\t"
01336 "por %%mm5, %%mm3 \n\t"
01337 MOVNTQ" %%mm0, %0 \n\t"
01338 MOVNTQ" %%mm3, 8%0 \n\t"
01339 :"=m"(*d)
01340 :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r)
01341 :"memory");
01342 d += 16;
01343 s += 4;
01344 }
01345 __asm __volatile(SFENCE:::"memory");
01346 __asm __volatile(EMMS:::"memory");
01347 #endif
01348 while (s < end)
01349 {
01350 register uint16_t bgr;
01351 bgr = *s++;
01352 #ifdef WORDS_BIGENDIAN
01353 *d++ = 0;
01354 *d++ = (bgr&0xF800)>>8;
01355 *d++ = (bgr&0x7E0)>>3;
01356 *d++ = (bgr&0x1F)<<3;
01357 #else
01358 *d++ = (bgr&0x1F)<<3;
01359 *d++ = (bgr&0x7E0)>>3;
01360 *d++ = (bgr&0xF800)>>8;
01361 *d++ = 0;
01362 #endif
01363 }
01364 }
01365
01366 static inline void RENAME(rgb32tobgr32)(const uint8_t *src, uint8_t *dst, long src_size)
01367 {
01368 long idx = 15 - src_size;
01369 uint8_t *s = (uint8_t *) src-idx, *d = dst-idx;
01370 #ifdef HAVE_MMX
01371 __asm __volatile(
01372 "test %0, %0 \n\t"
01373 "jns 2f \n\t"
01374 PREFETCH" (%1, %0) \n\t"
01375 "movq %3, %%mm7 \n\t"
01376 "pxor %4, %%mm7 \n\t"
01377 "movq %%mm7, %%mm6 \n\t"
01378 "pxor %5, %%mm7 \n\t"
01379 ASMALIGN(4)
01380 "1: \n\t"
01381 PREFETCH" 32(%1, %0) \n\t"
01382 "movq (%1, %0), %%mm0 \n\t"
01383 "movq 8(%1, %0), %%mm1 \n\t"
01384 # ifdef HAVE_MMX2
01385 "pshufw $177, %%mm0, %%mm3 \n\t"
01386 "pshufw $177, %%mm1, %%mm5 \n\t"
01387 "pand %%mm7, %%mm0 \n\t"
01388 "pand %%mm6, %%mm3 \n\t"
01389 "pand %%mm7, %%mm1 \n\t"
01390 "pand %%mm6, %%mm5 \n\t"
01391 "por %%mm3, %%mm0 \n\t"
01392 "por %%mm5, %%mm1 \n\t"
01393 # else
01394 "movq %%mm0, %%mm2 \n\t"
01395 "movq %%mm1, %%mm4 \n\t"
01396 "pand %%mm7, %%mm0 \n\t"
01397 "pand %%mm6, %%mm2 \n\t"
01398 "pand %%mm7, %%mm1 \n\t"
01399 "pand %%mm6, %%mm4 \n\t"
01400 "movq %%mm2, %%mm3 \n\t"
01401 "movq %%mm4, %%mm5 \n\t"
01402 "pslld $16, %%mm2 \n\t"
01403 "psrld $16, %%mm3 \n\t"
01404 "pslld $16, %%mm4 \n\t"
01405 "psrld $16, %%mm5 \n\t"
01406 "por %%mm2, %%mm0 \n\t"
01407 "por %%mm4, %%mm1 \n\t"
01408 "por %%mm3, %%mm0 \n\t"
01409 "por %%mm5, %%mm1 \n\t"
01410 # endif
01411 MOVNTQ" %%mm0, (%2, %0) \n\t"
01412 MOVNTQ" %%mm1, 8(%2, %0) \n\t"
01413 "add $16, %0 \n\t"
01414 "js 1b \n\t"
01415 SFENCE" \n\t"
01416 EMMS" \n\t"
01417 "2: \n\t"
01418 : "+&r"(idx)
01419 : "r" (s), "r" (d), "m" (mask32b), "m" (mask32r), "m" (mmx_one)
01420 : "memory");
01421 #endif
01422 for (; idx<15; idx+=4) {
01423 register int v = *(uint32_t *)&s[idx], g = v & 0xff00ff00;
01424 v &= 0xff00ff;
01425 *(uint32_t *)&d[idx] = (v>>16) + g + (v<<16);
01426 }
01427 }
01428
01429 static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, long src_size)
01430 {
01431 unsigned i;
01432 #ifdef HAVE_MMX
01433 long mmx_size= 23 - src_size;
01434 asm volatile (
01435 "test %%"REG_a", %%"REG_a" \n\t"
01436 "jns 2f \n\t"
01437 "movq "MANGLE(mask24r)", %%mm5 \n\t"
01438 "movq "MANGLE(mask24g)", %%mm6 \n\t"
01439 "movq "MANGLE(mask24b)", %%mm7 \n\t"
01440 ASMALIGN(4)
01441 "1: \n\t"
01442 PREFETCH" 32(%1, %%"REG_a") \n\t"
01443 "movq (%1, %%"REG_a"), %%mm0 \n\t"
01444 "movq (%1, %%"REG_a"), %%mm1 \n\t"
01445 "movq 2(%1, %%"REG_a"), %%mm2 \n\t"
01446 "psllq $16, %%mm0 \n\t"
01447 "pand %%mm5, %%mm0 \n\t"
01448 "pand %%mm6, %%mm1 \n\t"
01449 "pand %%mm7, %%mm2 \n\t"
01450 "por %%mm0, %%mm1 \n\t"
01451 "por %%mm2, %%mm1 \n\t"
01452 "movq 6(%1, %%"REG_a"), %%mm0 \n\t"
01453 MOVNTQ" %%mm1, (%2, %%"REG_a") \n\t"
01454 "movq 8(%1, %%"REG_a"), %%mm1 \n\t"
01455 "movq 10(%1, %%"REG_a"), %%mm2 \n\t"
01456 "pand %%mm7, %%mm0 \n\t"
01457 "pand %%mm5, %%mm1 \n\t"
01458 "pand %%mm6, %%mm2 \n\t"
01459 "por %%mm0, %%mm1 \n\t"
01460 "por %%mm2, %%mm1 \n\t"
01461 "movq 14(%1, %%"REG_a"), %%mm0 \n\t"
01462 MOVNTQ" %%mm1, 8(%2, %%"REG_a") \n\t"
01463 "movq 16(%1, %%"REG_a"), %%mm1 \n\t"
01464 "movq 18(%1, %%"REG_a"), %%mm2 \n\t"
01465 "pand %%mm6, %%mm0 \n\t"
01466 "pand %%mm7, %%mm1 \n\t"
01467 "pand %%mm5, %%mm2 \n\t"
01468 "por %%mm0, %%mm1 \n\t"
01469 "por %%mm2, %%mm1 \n\t"
01470 MOVNTQ" %%mm1, 16(%2, %%"REG_a") \n\t"
01471 "add $24, %%"REG_a" \n\t"
01472 " js 1b \n\t"
01473 "2: \n\t"
01474 : "+a" (mmx_size)
01475 : "r" (src-mmx_size), "r"(dst-mmx_size)
01476 );
01477
01478 __asm __volatile(SFENCE:::"memory");
01479 __asm __volatile(EMMS:::"memory");
01480
01481 if (mmx_size==23) return;
01482
01483 src+= src_size;
01484 dst+= src_size;
01485 src_size= 23-mmx_size;
01486 src-= src_size;
01487 dst-= src_size;
01488 #endif
01489 for (i=0; i<src_size; i+=3)
01490 {
01491 register uint8_t x;
01492 x = src[i + 2];
01493 dst[i + 1] = src[i + 1];
01494 dst[i + 2] = src[i + 0];
01495 dst[i + 0] = x;
01496 }
01497 }
01498
01499 static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
01500 long width, long height,
01501 long lumStride, long chromStride, long dstStride, long vertLumPerChroma)
01502 {
01503 long y;
01504 const long chromWidth= width>>1;
01505 for (y=0; y<height; y++)
01506 {
01507 #ifdef HAVE_MMX
01508
01509 asm volatile(
01510 "xor %%"REG_a", %%"REG_a" \n\t"
01511 ASMALIGN(4)
01512 "1: \n\t"
01513 PREFETCH" 32(%1, %%"REG_a", 2) \n\t"
01514 PREFETCH" 32(%2, %%"REG_a") \n\t"
01515 PREFETCH" 32(%3, %%"REG_a") \n\t"
01516 "movq (%2, %%"REG_a"), %%mm0 \n\t"
01517 "movq %%mm0, %%mm2 \n\t"
01518 "movq (%3, %%"REG_a"), %%mm1 \n\t"
01519 "punpcklbw %%mm1, %%mm0 \n\t"
01520 "punpckhbw %%mm1, %%mm2 \n\t"
01521
01522 "movq (%1, %%"REG_a",2), %%mm3 \n\t"
01523 "movq 8(%1, %%"REG_a",2), %%mm5 \n\t"
01524 "movq %%mm3, %%mm4 \n\t"
01525 "movq %%mm5, %%mm6 \n\t"
01526 "punpcklbw %%mm0, %%mm3 \n\t"
01527 "punpckhbw %%mm0, %%mm4 \n\t"
01528 "punpcklbw %%mm2, %%mm5 \n\t"
01529 "punpckhbw %%mm2, %%mm6 \n\t"
01530
01531 MOVNTQ" %%mm3, (%0, %%"REG_a", 4) \n\t"
01532 MOVNTQ" %%mm4, 8(%0, %%"REG_a", 4) \n\t"
01533 MOVNTQ" %%mm5, 16(%0, %%"REG_a", 4) \n\t"
01534 MOVNTQ" %%mm6, 24(%0, %%"REG_a", 4) \n\t"
01535
01536 "add $8, %%"REG_a" \n\t"
01537 "cmp %4, %%"REG_a" \n\t"
01538 " jb 1b \n\t"
01539 ::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "g" (chromWidth)
01540 : "%"REG_a
01541 );
01542 #else
01543
01544 #if defined ARCH_ALPHA && defined HAVE_MVI
01545 #define pl2yuy2(n) \
01546 y1 = yc[n]; \
01547 y2 = yc2[n]; \
01548 u = uc[n]; \
01549 v = vc[n]; \
01550 asm("unpkbw %1, %0" : "=r"(y1) : "r"(y1)); \
01551 asm("unpkbw %1, %0" : "=r"(y2) : "r"(y2)); \
01552 asm("unpkbl %1, %0" : "=r"(u) : "r"(u)); \
01553 asm("unpkbl %1, %0" : "=r"(v) : "r"(v)); \
01554 yuv1 = (u << 8) + (v << 24); \
01555 yuv2 = yuv1 + y2; \
01556 yuv1 += y1; \
01557 qdst[n] = yuv1; \
01558 qdst2[n] = yuv2;
01559
01560 int i;
01561 uint64_t *qdst = (uint64_t *) dst;
01562 uint64_t *qdst2 = (uint64_t *) (dst + dstStride);
01563 const uint32_t *yc = (uint32_t *) ysrc;
01564 const uint32_t *yc2 = (uint32_t *) (ysrc + lumStride);
01565 const uint16_t *uc = (uint16_t*) usrc, *vc = (uint16_t*) vsrc;
01566 for (i = 0; i < chromWidth; i += 8){
01567 uint64_t y1, y2, yuv1, yuv2;
01568 uint64_t u, v;
01569
01570 asm("ldq $31,64(%0)" :: "r"(yc));
01571 asm("ldq $31,64(%0)" :: "r"(yc2));
01572 asm("ldq $31,64(%0)" :: "r"(uc));
01573 asm("ldq $31,64(%0)" :: "r"(vc));
01574
01575 pl2yuy2(0);
01576 pl2yuy2(1);
01577 pl2yuy2(2);
01578 pl2yuy2(3);
01579
01580 yc += 4;
01581 yc2 += 4;
01582 uc += 4;
01583 vc += 4;
01584 qdst += 4;
01585 qdst2 += 4;
01586 }
01587 y++;
01588 ysrc += lumStride;
01589 dst += dstStride;
01590
01591 #elif __WORDSIZE >= 64
01592 int i;
01593 uint64_t *ldst = (uint64_t *) dst;
01594 const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
01595 for (i = 0; i < chromWidth; i += 2){
01596 uint64_t k, l;
01597 k = yc[0] + (uc[0] << 8) +
01598 (yc[1] << 16) + (vc[0] << 24);
01599 l = yc[2] + (uc[1] << 8) +
01600 (yc[3] << 16) + (vc[1] << 24);
01601 *ldst++ = k + (l << 32);
01602 yc += 4;
01603 uc += 2;
01604 vc += 2;
01605 }
01606
01607 #else
01608 int i, *idst = (int32_t *) dst;
01609 const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
01610 for (i = 0; i < chromWidth; i++){
01611 #ifdef WORDS_BIGENDIAN
01612 *idst++ = (yc[0] << 24)+ (uc[0] << 16) +
01613 (yc[1] << 8) + (vc[0] << 0);
01614 #else
01615 *idst++ = yc[0] + (uc[0] << 8) +
01616 (yc[1] << 16) + (vc[0] << 24);
01617 #endif
01618 yc += 2;
01619 uc++;
01620 vc++;
01621 }
01622 #endif
01623 #endif
01624 if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1)
01625 {
01626 usrc += chromStride;
01627 vsrc += chromStride;
01628 }
01629 ysrc += lumStride;
01630 dst += dstStride;
01631 }
01632 #ifdef HAVE_MMX
01633 asm( EMMS" \n\t"
01634 SFENCE" \n\t"
01635 :::"memory");
01636 #endif
01637 }
01638
01643 static inline void RENAME(yv12toyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
01644 long width, long height,
01645 long lumStride, long chromStride, long dstStride)
01646 {
01647
01648 RENAME(yuvPlanartoyuy2)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 2);
01649 }
01650
01651 static inline void RENAME(yuvPlanartouyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
01652 long width, long height,
01653 long lumStride, long chromStride, long dstStride, long vertLumPerChroma)
01654 {
01655 long y;
01656 const long chromWidth= width>>1;
01657 for (y=0; y<height; y++)
01658 {
01659 #ifdef HAVE_MMX
01660
01661 asm volatile(
01662 "xor %%"REG_a", %%"REG_a" \n\t"
01663 ASMALIGN(4)
01664 "1: \n\t"
01665 PREFETCH" 32(%1, %%"REG_a", 2) \n\t"
01666 PREFETCH" 32(%2, %%"REG_a") \n\t"
01667 PREFETCH" 32(%3, %%"REG_a") \n\t"
01668 "movq (%2, %%"REG_a"), %%mm0 \n\t"
01669 "movq %%mm0, %%mm2 \n\t"
01670 "movq (%3, %%"REG_a"), %%mm1 \n\t"
01671 "punpcklbw %%mm1, %%mm0 \n\t"
01672 "punpckhbw %%mm1, %%mm2 \n\t"
01673
01674 "movq (%1, %%"REG_a",2), %%mm3 \n\t"
01675 "movq 8(%1, %%"REG_a",2), %%mm5 \n\t"
01676 "movq %%mm0, %%mm4 \n\t"
01677 "movq %%mm2, %%mm6 \n\t"
01678 "punpcklbw %%mm3, %%mm0 \n\t"
01679 "punpckhbw %%mm3, %%mm4 \n\t"
01680 "punpcklbw %%mm5, %%mm2 \n\t"
01681 "punpckhbw %%mm5, %%mm6 \n\t"
01682
01683 MOVNTQ" %%mm0, (%0, %%"REG_a", 4) \n\t"
01684 MOVNTQ" %%mm4, 8(%0, %%"REG_a", 4) \n\t"
01685 MOVNTQ" %%mm2, 16(%0, %%"REG_a", 4) \n\t"
01686 MOVNTQ" %%mm6, 24(%0, %%"REG_a", 4) \n\t"
01687
01688 "add $8, %%"REG_a" \n\t"
01689 "cmp %4, %%"REG_a" \n\t"
01690 " jb 1b \n\t"
01691 ::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "g" (chromWidth)
01692 : "%"REG_a
01693 );
01694 #else
01695
01696
01697 #if __WORDSIZE >= 64
01698 int i;
01699 uint64_t *ldst = (uint64_t *) dst;
01700 const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
01701 for (i = 0; i < chromWidth; i += 2){
01702 uint64_t k, l;
01703 k = uc[0] + (yc[0] << 8) +
01704 (vc[0] << 16) + (yc[1] << 24);
01705 l = uc[1] + (yc[2] << 8) +
01706 (vc[1] << 16) + (yc[3] << 24);
01707 *ldst++ = k + (l << 32);
01708 yc += 4;
01709 uc += 2;
01710 vc += 2;
01711 }
01712
01713 #else
01714 int i, *idst = (int32_t *) dst;
01715 const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
01716 for (i = 0; i < chromWidth; i++){
01717 #ifdef WORDS_BIGENDIAN
01718 *idst++ = (uc[0] << 24)+ (yc[0] << 16) +
01719 (vc[0] << 8) + (yc[1] << 0);
01720 #else
01721 *idst++ = uc[0] + (yc[0] << 8) +
01722 (vc[0] << 16) + (yc[1] << 24);
01723 #endif
01724 yc += 2;
01725 uc++;
01726 vc++;
01727 }
01728 #endif
01729 #endif
01730 if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1)
01731 {
01732 usrc += chromStride;
01733 vsrc += chromStride;
01734 }
01735 ysrc += lumStride;
01736 dst += dstStride;
01737 }
01738 #ifdef HAVE_MMX
01739 asm( EMMS" \n\t"
01740 SFENCE" \n\t"
01741 :::"memory");
01742 #endif
01743 }
01744
01749 static inline void RENAME(yv12touyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
01750 long width, long height,
01751 long lumStride, long chromStride, long dstStride)
01752 {
01753
01754 RENAME(yuvPlanartouyvy)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 2);
01755 }
01756
01760 static inline void RENAME(yuv422ptoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
01761 long width, long height,
01762 long lumStride, long chromStride, long dstStride)
01763 {
01764 RENAME(yuvPlanartoyuy2)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 1);
01765 }
01766
01771 static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
01772 long width, long height,
01773 long lumStride, long chromStride, long srcStride)
01774 {
01775 long y;
01776 const long chromWidth= width>>1;
01777 for (y=0; y<height; y+=2)
01778 {
01779 #ifdef HAVE_MMX
01780 asm volatile(
01781 "xor %%"REG_a", %%"REG_a" \n\t"
01782 "pcmpeqw %%mm7, %%mm7 \n\t"
01783 "psrlw $8, %%mm7 \n\t"
01784 ASMALIGN(4)
01785 "1: \n\t"
01786 PREFETCH" 64(%0, %%"REG_a", 4) \n\t"
01787 "movq (%0, %%"REG_a", 4), %%mm0 \n\t"
01788 "movq 8(%0, %%"REG_a", 4), %%mm1 \n\t"
01789 "movq %%mm0, %%mm2 \n\t"
01790 "movq %%mm1, %%mm3 \n\t"
01791 "psrlw $8, %%mm0 \n\t"
01792 "psrlw $8, %%mm1 \n\t"
01793 "pand %%mm7, %%mm2 \n\t"
01794 "pand %%mm7, %%mm3 \n\t"
01795 "packuswb %%mm1, %%mm0 \n\t"
01796 "packuswb %%mm3, %%mm2 \n\t"
01797
01798 MOVNTQ" %%mm2, (%1, %%"REG_a", 2) \n\t"
01799
01800 "movq 16(%0, %%"REG_a", 4), %%mm1 \n\t"
01801 "movq 24(%0, %%"REG_a", 4), %%mm2 \n\t"
01802 "movq %%mm1, %%mm3 \n\t"
01803 "movq %%mm2, %%mm4 \n\t"
01804 "psrlw $8, %%mm1 \n\t"
01805 "psrlw $8, %%mm2 \n\t"
01806 "pand %%mm7, %%mm3 \n\t"
01807 "pand %%mm7, %%mm4 \n\t"
01808 "packuswb %%mm2, %%mm1 \n\t"
01809 "packuswb %%mm4, %%mm3 \n\t"
01810
01811 MOVNTQ" %%mm3, 8(%1, %%"REG_a", 2) \n\t"
01812
01813 "movq %%mm0, %%mm2 \n\t"
01814 "movq %%mm1, %%mm3 \n\t"
01815 "psrlw $8, %%mm0 \n\t"
01816 "psrlw $8, %%mm1 \n\t"
01817 "pand %%mm7, %%mm2 \n\t"
01818 "pand %%mm7, %%mm3 \n\t"
01819 "packuswb %%mm1, %%mm0 \n\t"
01820 "packuswb %%mm3, %%mm2 \n\t"
01821
01822 MOVNTQ" %%mm0, (%3, %%"REG_a") \n\t"
01823 MOVNTQ" %%mm2, (%2, %%"REG_a") \n\t"
01824
01825 "add $8, %%"REG_a" \n\t"
01826 "cmp %4, %%"REG_a" \n\t"
01827 " jb 1b \n\t"
01828 ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
01829 : "memory", "%"REG_a
01830 );
01831
01832 ydst += lumStride;
01833 src += srcStride;
01834
01835 asm volatile(
01836 "xor %%"REG_a", %%"REG_a" \n\t"
01837 ASMALIGN(4)
01838 "1: \n\t"
01839 PREFETCH" 64(%0, %%"REG_a", 4) \n\t"
01840 "movq (%0, %%"REG_a", 4), %%mm0 \n\t"
01841 "movq 8(%0, %%"REG_a", 4), %%mm1 \n\t"
01842 "movq 16(%0, %%"REG_a", 4), %%mm2 \n\t"
01843 "movq 24(%0, %%"REG_a", 4), %%mm3 \n\t"
01844 "pand %%mm7, %%mm0 \n\t"
01845 "pand %%mm7, %%mm1 \n\t"
01846 "pand %%mm7, %%mm2 \n\t"
01847 "pand %%mm7, %%mm3 \n\t"
01848 "packuswb %%mm1, %%mm0 \n\t"
01849 "packuswb %%mm3, %%mm2 \n\t"
01850
01851 MOVNTQ" %%mm0, (%1, %%"REG_a", 2) \n\t"
01852 MOVNTQ" %%mm2, 8(%1, %%"REG_a", 2) \n\t"
01853
01854 "add $8, %%"REG_a" \n\t"
01855 "cmp %4, %%"REG_a" \n\t"
01856 " jb 1b \n\t"
01857
01858 ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
01859 : "memory", "%"REG_a
01860 );
01861 #else
01862 long i;
01863 for (i=0; i<chromWidth; i++)
01864 {
01865 ydst[2*i+0] = src[4*i+0];
01866 udst[i] = src[4*i+1];
01867 ydst[2*i+1] = src[4*i+2];
01868 vdst[i] = src[4*i+3];
01869 }
01870 ydst += lumStride;
01871 src += srcStride;
01872
01873 for (i=0; i<chromWidth; i++)
01874 {
01875 ydst[2*i+0] = src[4*i+0];
01876 ydst[2*i+1] = src[4*i+2];
01877 }
01878 #endif
01879 udst += chromStride;
01880 vdst += chromStride;
01881 ydst += lumStride;
01882 src += srcStride;
01883 }
01884 #ifdef HAVE_MMX
01885 asm volatile( EMMS" \n\t"
01886 SFENCE" \n\t"
01887 :::"memory");
01888 #endif
01889 }
01890
01891 static inline void RENAME(yvu9toyv12)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc,
01892 uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
01893 long width, long height, long lumStride, long chromStride)
01894 {
01895
01896 memcpy(ydst, ysrc, width*height);
01897
01898
01899 }
01900
01901 static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWidth, long srcHeight, long srcStride, long dstStride)
01902 {
01903 long x,y;
01904
01905 dst[0]= src[0];
01906
01907
01908 for (x=0; x<srcWidth-1; x++){
01909 dst[2*x+1]= (3*src[x] + src[x+1])>>2;
01910 dst[2*x+2]= ( src[x] + 3*src[x+1])>>2;
01911 }
01912 dst[2*srcWidth-1]= src[srcWidth-1];
01913
01914 dst+= dstStride;
01915
01916 for (y=1; y<srcHeight; y++){
01917 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
01918 const long mmxSize= srcWidth&~15;
01919 asm volatile(
01920 "mov %4, %%"REG_a" \n\t"
01921 "1: \n\t"
01922 "movq (%0, %%"REG_a"), %%mm0 \n\t"
01923 "movq (%1, %%"REG_a"), %%mm1 \n\t"
01924 "movq 1(%0, %%"REG_a"), %%mm2 \n\t"
01925 "movq 1(%1, %%"REG_a"), %%mm3 \n\t"
01926 "movq -1(%0, %%"REG_a"), %%mm4 \n\t"
01927 "movq -1(%1, %%"REG_a"), %%mm5 \n\t"
01928 PAVGB" %%mm0, %%mm5 \n\t"
01929 PAVGB" %%mm0, %%mm3 \n\t"
01930 PAVGB" %%mm0, %%mm5 \n\t"
01931 PAVGB" %%mm0, %%mm3 \n\t"
01932 PAVGB" %%mm1, %%mm4 \n\t"
01933 PAVGB" %%mm1, %%mm2 \n\t"
01934 PAVGB" %%mm1, %%mm4 \n\t"
01935 PAVGB" %%mm1, %%mm2 \n\t"
01936 "movq %%mm5, %%mm7 \n\t"
01937 "movq %%mm4, %%mm6 \n\t"
01938 "punpcklbw %%mm3, %%mm5 \n\t"
01939 "punpckhbw %%mm3, %%mm7 \n\t"
01940 "punpcklbw %%mm2, %%mm4 \n\t"
01941 "punpckhbw %%mm2, %%mm6 \n\t"
01942 #if 1
01943 MOVNTQ" %%mm5, (%2, %%"REG_a", 2) \n\t"
01944 MOVNTQ" %%mm7, 8(%2, %%"REG_a", 2) \n\t"
01945 MOVNTQ" %%mm4, (%3, %%"REG_a", 2) \n\t"
01946 MOVNTQ" %%mm6, 8(%3, %%"REG_a", 2) \n\t"
01947 #else
01948 "movq %%mm5, (%2, %%"REG_a", 2) \n\t"
01949 "movq %%mm7, 8(%2, %%"REG_a", 2) \n\t"
01950 "movq %%mm4, (%3, %%"REG_a", 2) \n\t"
01951 "movq %%mm6, 8(%3, %%"REG_a", 2) \n\t"
01952 #endif
01953 "add $8, %%"REG_a" \n\t"
01954 " js 1b \n\t"
01955 :: "r" (src + mmxSize ), "r" (src + srcStride + mmxSize ),
01956 "r" (dst + mmxSize*2), "r" (dst + dstStride + mmxSize*2),
01957 "g" (-mmxSize)
01958 : "%"REG_a
01959
01960 );
01961 #else
01962 const long mmxSize=1;
01963 #endif
01964 dst[0 ]= (3*src[0] + src[srcStride])>>2;
01965 dst[dstStride]= ( src[0] + 3*src[srcStride])>>2;
01966
01967 for (x=mmxSize-1; x<srcWidth-1; x++){
01968 dst[2*x +1]= (3*src[x+0] + src[x+srcStride+1])>>2;
01969 dst[2*x+dstStride+2]= ( src[x+0] + 3*src[x+srcStride+1])>>2;
01970 dst[2*x+dstStride+1]= ( src[x+1] + 3*src[x+srcStride ])>>2;
01971 dst[2*x +2]= (3*src[x+1] + src[x+srcStride ])>>2;
01972 }
01973 dst[srcWidth*2 -1 ]= (3*src[srcWidth-1] + src[srcWidth-1 + srcStride])>>2;
01974 dst[srcWidth*2 -1 + dstStride]= ( src[srcWidth-1] + 3*src[srcWidth-1 + srcStride])>>2;
01975
01976 dst+=dstStride*2;
01977 src+=srcStride;
01978 }
01979
01980
01981 #if 1
01982 dst[0]= src[0];
01983
01984 for (x=0; x<srcWidth-1; x++){
01985 dst[2*x+1]= (3*src[x] + src[x+1])>>2;
01986 dst[2*x+2]= ( src[x] + 3*src[x+1])>>2;
01987 }
01988 dst[2*srcWidth-1]= src[srcWidth-1];
01989 #else
01990 for (x=0; x<srcWidth; x++){
01991 dst[2*x+0]=
01992 dst[2*x+1]= src[x];
01993 }
01994 #endif
01995
01996 #ifdef HAVE_MMX
01997 asm volatile( EMMS" \n\t"
01998 SFENCE" \n\t"
01999 :::"memory");
02000 #endif
02001 }
02002
02009 static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
02010 long width, long height,
02011 long lumStride, long chromStride, long srcStride)
02012 {
02013 long y;
02014 const long chromWidth= width>>1;
02015 for (y=0; y<height; y+=2)
02016 {
02017 #ifdef HAVE_MMX
02018 asm volatile(
02019 "xorl %%eax, %%eax \n\t"
02020 "pcmpeqw %%mm7, %%mm7 \n\t"
02021 "psrlw $8, %%mm7 \n\t"
02022 ASMALIGN(4)
02023 "1: \n\t"
02024 PREFETCH" 64(%0, %%eax, 4) \n\t"
02025 "movq (%0, %%eax, 4), %%mm0 \n\t"
02026 "movq 8(%0, %%eax, 4), %%mm1 \n\t"
02027 "movq %%mm0, %%mm2 \n\t"
02028 "movq %%mm1, %%mm3 \n\t"
02029 "pand %%mm7, %%mm0 \n\t"
02030 "pand %%mm7, %%mm1 \n\t"
02031 "psrlw $8, %%mm2 \n\t"
02032 "psrlw $8, %%mm3 \n\t"
02033 "packuswb %%mm1, %%mm0 \n\t"
02034 "packuswb %%mm3, %%mm2 \n\t"
02035
02036 MOVNTQ" %%mm2, (%1, %%eax, 2) \n\t"
02037
02038 "movq 16(%0, %%eax, 4), %%mm1 \n\t"
02039 "movq 24(%0, %%eax, 4), %%mm2 \n\t"
02040 "movq %%mm1, %%mm3 \n\t"
02041 "movq %%mm2, %%mm4 \n\t"
02042 "pand %%mm7, %%mm1 \n\t"
02043 "pand %%mm7, %%mm2 \n\t"
02044 "psrlw $8, %%mm3 \n\t"
02045 "psrlw $8, %%mm4 \n\t"
02046 "packuswb %%mm2, %%mm1 \n\t"
02047 "packuswb %%mm4, %%mm3 \n\t"
02048
02049 MOVNTQ" %%mm3, 8(%1, %%eax, 2) \n\t"
02050
02051 "movq %%mm0, %%mm2 \n\t"
02052 "movq %%mm1, %%mm3 \n\t"
02053 "psrlw $8, %%mm0 \n\t"
02054 "psrlw $8, %%mm1 \n\t"
02055 "pand %%mm7, %%mm2 \n\t"
02056 "pand %%mm7, %%mm3 \n\t"
02057 "packuswb %%mm1, %%mm0 \n\t"
02058 "packuswb %%mm3, %%mm2 \n\t"
02059
02060 MOVNTQ" %%mm0, (%3, %%eax) \n\t"
02061 MOVNTQ" %%mm2, (%2, %%eax) \n\t"
02062
02063 "addl $8, %%eax \n\t"
02064 "cmpl %4, %%eax \n\t"
02065 " jb 1b \n\t"
02066 ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
02067 : "memory", "%eax"
02068 );
02069
02070 ydst += lumStride;
02071 src += srcStride;
02072
02073 asm volatile(
02074 "xorl %%eax, %%eax \n\t"
02075 ASMALIGN(4)
02076 "1: \n\t"
02077 PREFETCH" 64(%0, %%eax, 4) \n\t"
02078 "movq (%0, %%eax, 4), %%mm0 \n\t"
02079 "movq 8(%0, %%eax, 4), %%mm1 \n\t"
02080 "movq 16(%0, %%eax, 4), %%mm2 \n\t"
02081 "movq 24(%0, %%eax, 4), %%mm3 \n\t"
02082 "psrlw $8, %%mm0 \n\t"
02083 "psrlw $8, %%mm1 \n\t"
02084 "psrlw $8, %%mm2 \n\t"
02085 "psrlw $8, %%mm3 \n\t"
02086 "packuswb %%mm1, %%mm0 \n\t"
02087 "packuswb %%mm3, %%mm2 \n\t"
02088
02089 MOVNTQ" %%mm0, (%1, %%eax, 2) \n\t"
02090 MOVNTQ" %%mm2, 8(%1, %%eax, 2) \n\t"
02091
02092 "addl $8, %%eax \n\t"
02093 "cmpl %4, %%eax \n\t"
02094 " jb 1b \n\t"
02095
02096 ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
02097 : "memory", "%eax"
02098 );
02099 #else
02100 long i;
02101 for (i=0; i<chromWidth; i++)
02102 {
02103 udst[i] = src[4*i+0];
02104 ydst[2*i+0] = src[4*i+1];
02105 vdst[i] = src[4*i+2];
02106 ydst[2*i+1] = src[4*i+3];
02107 }
02108 ydst += lumStride;
02109 src += srcStride;
02110
02111 for (i=0; i<chromWidth; i++)
02112 {
02113 ydst[2*i+0] = src[4*i+1];
02114 ydst[2*i+1] = src[4*i+3];
02115 }
02116 #endif
02117 udst += chromStride;
02118 vdst += chromStride;
02119 ydst += lumStride;
02120 src += srcStride;
02121 }
02122 #ifdef HAVE_MMX
02123 asm volatile( EMMS" \n\t"
02124 SFENCE" \n\t"
02125 :::"memory");
02126 #endif
02127 }
02128
02136 static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
02137 long width, long height,
02138 long lumStride, long chromStride, long srcStride)
02139 {
02140 long y;
02141 const long chromWidth= width>>1;
02142 #ifdef HAVE_MMX
02143 for (y=0; y<height-2; y+=2)
02144 {
02145 long i;
02146 for (i=0; i<2; i++)
02147 {
02148 asm volatile(
02149 "mov %2, %%"REG_a" \n\t"
02150 "movq "MANGLE(ff_bgr2YCoeff)", %%mm6 \n\t"
02151 "movq "MANGLE(ff_w1111)", %%mm5 \n\t"
02152 "pxor %%mm7, %%mm7 \n\t"
02153 "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d" \n\t"
02154 ASMALIGN(4)
02155 "1: \n\t"
02156 PREFETCH" 64(%0, %%"REG_d") \n\t"
02157 "movd (%0, %%"REG_d"), %%mm0 \n\t"
02158 "movd 3(%0, %%"REG_d"), %%mm1 \n\t"
02159 "punpcklbw %%mm7, %%mm0 \n\t"
02160 "punpcklbw %%mm7, %%mm1 \n\t"
02161 "movd 6(%0, %%"REG_d"), %%mm2 \n\t"
02162 "movd 9(%0, %%"REG_d"), %%mm3 \n\t"
02163 "punpcklbw %%mm7, %%mm2 \n\t"
02164 "punpcklbw %%mm7, %%mm3 \n\t"
02165 "pmaddwd %%mm6, %%mm0 \n\t"
02166 "pmaddwd %%mm6, %%mm1 \n\t"
02167 "pmaddwd %%mm6, %%mm2 \n\t"
02168 "pmaddwd %%mm6, %%mm3 \n\t"
02169 #ifndef FAST_BGR2YV12
02170 "psrad $8, %%mm0 \n\t"
02171 "psrad $8, %%mm1 \n\t"
02172 "psrad $8, %%mm2 \n\t"
02173 "psrad $8, %%mm3 \n\t"
02174 #endif
02175 "packssdw %%mm1, %%mm0 \n\t"
02176 "packssdw %%mm3, %%mm2 \n\t"
02177 "pmaddwd %%mm5, %%mm0 \n\t"
02178 "pmaddwd %%mm5, %%mm2 \n\t"
02179 "packssdw %%mm2, %%mm0 \n\t"
02180 "psraw $7, %%mm0 \n\t"
02181
02182 "movd 12(%0, %%"REG_d"), %%mm4 \n\t"
02183 "movd 15(%0, %%"REG_d"), %%mm1 \n\t"
02184 "punpcklbw %%mm7, %%mm4 \n\t"
02185 "punpcklbw %%mm7, %%mm1 \n\t"
02186 "movd 18(%0, %%"REG_d"), %%mm2 \n\t"
02187 "movd 21(%0, %%"REG_d"), %%mm3 \n\t"
02188 "punpcklbw %%mm7, %%mm2 \n\t"
02189 "punpcklbw %%mm7, %%mm3 \n\t"
02190 "pmaddwd %%mm6, %%mm4 \n\t"
02191 "pmaddwd %%mm6, %%mm1 \n\t"
02192 "pmaddwd %%mm6, %%mm2 \n\t"
02193 "pmaddwd %%mm6, %%mm3 \n\t"
02194 #ifndef FAST_BGR2YV12
02195 "psrad $8, %%mm4 \n\t"
02196 "psrad $8, %%mm1 \n\t"
02197 "psrad $8, %%mm2 \n\t"
02198 "psrad $8, %%mm3 \n\t"
02199 #endif
02200 "packssdw %%mm1, %%mm4 \n\t"
02201 "packssdw %%mm3, %%mm2 \n\t"
02202 "pmaddwd %%mm5, %%mm4 \n\t"
02203 "pmaddwd %%mm5, %%mm2 \n\t"
02204 "add $24, %%"REG_d" \n\t"
02205 "packssdw %%mm2, %%mm4 \n\t"
02206 "psraw $7, %%mm4 \n\t"
02207
02208 "packuswb %%mm4, %%mm0 \n\t"
02209 "paddusb "MANGLE(ff_bgr2YOffset)", %%mm0 \n\t"
02210
02211 MOVNTQ" %%mm0, (%1, %%"REG_a") \n\t"
02212 "add $8, %%"REG_a" \n\t"
02213 " js 1b \n\t"
02214 : : "r" (src+width*3), "r" (ydst+width), "g" (-width)
02215 : "%"REG_a, "%"REG_d
02216 );
02217 ydst += lumStride;
02218 src += srcStride;
02219 }
02220 src -= srcStride*2;
02221 asm volatile(
02222 "mov %4, %%"REG_a" \n\t"
02223 "movq "MANGLE(ff_w1111)", %%mm5 \n\t"
02224 "movq "MANGLE(ff_bgr2UCoeff)", %%mm6 \n\t"
02225 "pxor %%mm7, %%mm7 \n\t"
02226 "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d" \n\t"
02227 "add %%"REG_d", %%"REG_d" \n\t"
02228 ASMALIGN(4)
02229 "1: \n\t"
02230 PREFETCH" 64(%0, %%"REG_d") \n\t"
02231 PREFETCH" 64(%1, %%"REG_d") \n\t"
02232 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
02233 "movq (%0, %%"REG_d"), %%mm0 \n\t"
02234 "movq (%1, %%"REG_d"), %%mm1 \n\t"
02235 "movq 6(%0, %%"REG_d"), %%mm2 \n\t"
02236 "movq 6(%1, %%"REG_d"), %%mm3 \n\t"
02237 PAVGB" %%mm1, %%mm0 \n\t"
02238 PAVGB" %%mm3, %%mm2 \n\t"
02239 "movq %%mm0, %%mm1 \n\t"
02240 "movq %%mm2, %%mm3 \n\t"
02241 "psrlq $24, %%mm0 \n\t"
02242 "psrlq $24, %%mm2 \n\t"
02243 PAVGB" %%mm1, %%mm0 \n\t"
02244 PAVGB" %%mm3, %%mm2 \n\t"
02245 "punpcklbw %%mm7, %%mm0 \n\t"
02246 "punpcklbw %%mm7, %%mm2 \n\t"
02247 #else
02248 "movd (%0, %%"REG_d"), %%mm0 \n\t"
02249 "movd (%1, %%"REG_d"), %%mm1 \n\t"
02250 "movd 3(%0, %%"REG_d"), %%mm2 \n\t"
02251 "movd 3(%1, %%"REG_d"), %%mm3 \n\t"
02252 "punpcklbw %%mm7, %%mm0 \n\t"
02253 "punpcklbw %%mm7, %%mm1 \n\t"
02254 "punpcklbw %%mm7, %%mm2 \n\t"
02255 "punpcklbw %%mm7, %%mm3 \n\t"
02256 "paddw %%mm1, %%mm0 \n\t"
02257 "paddw %%mm3, %%mm2 \n\t"
02258 "paddw %%mm2, %%mm0 \n\t"
02259 "movd 6(%0, %%"REG_d"), %%mm4 \n\t"
02260 "movd 6(%1, %%"REG_d"), %%mm1 \n\t"
02261 "movd 9(%0, %%"REG_d"), %%mm2 \n\t"
02262 "movd 9(%1, %%"REG_d"), %%mm3 \n\t"
02263 "punpcklbw %%mm7, %%mm4 \n\t"
02264 "punpcklbw %%mm7, %%mm1 \n\t"
02265 "punpcklbw %%mm7, %%mm2 \n\t"
02266 "punpcklbw %%mm7, %%mm3 \n\t"
02267 "paddw %%mm1, %%mm4 \n\t"
02268 "paddw %%mm3, %%mm2 \n\t"
02269 "paddw %%mm4, %%mm2 \n\t"
02270 "psrlw $2, %%mm0 \n\t"
02271 "psrlw $2, %%mm2 \n\t"
02272 #endif
02273 "movq "MANGLE(ff_bgr2VCoeff)", %%mm1 \n\t"
02274 "movq "MANGLE(ff_bgr2VCoeff)", %%mm3 \n\t"
02275
02276 "pmaddwd %%mm0, %%mm1 \n\t"
02277 "pmaddwd %%mm2, %%mm3 \n\t"
02278 "pmaddwd %%mm6, %%mm0 \n\t"
02279 "pmaddwd %%mm6, %%mm2 \n\t"
02280 #ifndef FAST_BGR2YV12
02281 "psrad $8, %%mm0 \n\t"
02282 "psrad $8, %%mm1 \n\t"
02283 "psrad $8, %%mm2 \n\t"
02284 "psrad $8, %%mm3 \n\t"
02285 #endif
02286 "packssdw %%mm2, %%mm0 \n\t"
02287 "packssdw %%mm3, %%mm1 \n\t"
02288 "pmaddwd %%mm5, %%mm0 \n\t"
02289 "pmaddwd %%mm5, %%mm1 \n\t"
02290 "packssdw %%mm1, %%mm0 \n\t"
02291 "psraw $7, %%mm0 \n\t"
02292
02293 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
02294 "movq 12(%0, %%"REG_d"), %%mm4 \n\t"
02295 "movq 12(%1, %%"REG_d"), %%mm1 \n\t"
02296 "movq 18(%0, %%"REG_d"), %%mm2 \n\t"
02297 "movq 18(%1, %%"REG_d"), %%mm3 \n\t"
02298 PAVGB" %%mm1, %%mm4 \n\t"
02299 PAVGB" %%mm3, %%mm2 \n\t"
02300 "movq %%mm4, %%mm1 \n\t"
02301 "movq %%mm2, %%mm3 \n\t"
02302 "psrlq $24, %%mm4 \n\t"
02303 "psrlq $24, %%mm2 \n\t"
02304 PAVGB" %%mm1, %%mm4 \n\t"
02305 PAVGB" %%mm3, %%mm2 \n\t"
02306 "punpcklbw %%mm7, %%mm4 \n\t"
02307 "punpcklbw %%mm7, %%mm2 \n\t"
02308 #else
02309 "movd 12(%0, %%"REG_d"), %%mm4 \n\t"
02310 "movd 12(%1, %%"REG_d"), %%mm1 \n\t"
02311 "movd 15(%0, %%"REG_d"), %%mm2 \n\t"
02312 "movd 15(%1, %%"REG_d"), %%mm3 \n\t"
02313 "punpcklbw %%mm7, %%mm4 \n\t"
02314 "punpcklbw %%mm7, %%mm1 \n\t"
02315 "punpcklbw %%mm7, %%mm2 \n\t"
02316 "punpcklbw %%mm7, %%mm3 \n\t"
02317 "paddw %%mm1, %%mm4 \n\t"
02318 "paddw %%mm3, %%mm2 \n\t"
02319 "paddw %%mm2, %%mm4 \n\t"
02320 "movd 18(%0, %%"REG_d"), %%mm5 \n\t"
02321 "movd 18(%1, %%"REG_d"), %%mm1 \n\t"
02322 "movd 21(%0, %%"REG_d"), %%mm2 \n\t"
02323 "movd 21(%1, %%"REG_d"), %%mm3 \n\t"
02324 "punpcklbw %%mm7, %%mm5 \n\t"
02325 "punpcklbw %%mm7, %%mm1 \n\t"
02326 "punpcklbw %%mm7, %%mm2 \n\t"
02327 "punpcklbw %%mm7, %%mm3 \n\t"
02328 "paddw %%mm1, %%mm5 \n\t"
02329 "paddw %%mm3, %%mm2 \n\t"
02330 "paddw %%mm5, %%mm2 \n\t"
02331 "movq "MANGLE(ff_w1111)", %%mm5 \n\t"
02332 "psrlw $2, %%mm4 \n\t"
02333 "psrlw $2, %%mm2 \n\t"
02334 #endif
02335 "movq "MANGLE(ff_bgr2VCoeff)", %%mm1 \n\t"
02336 "movq "MANGLE(ff_bgr2VCoeff)", %%mm3 \n\t"
02337
02338 "pmaddwd %%mm4, %%mm1 \n\t"
02339 "pmaddwd %%mm2, %%mm3 \n\t"
02340 "pmaddwd %%mm6, %%mm4 \n\t"
02341 "pmaddwd %%mm6, %%mm2 \n\t"
02342 #ifndef FAST_BGR2YV12
02343 "psrad $8, %%mm4 \n\t"
02344 "psrad $8, %%mm1 \n\t"
02345 "psrad $8, %%mm2 \n\t"
02346 "psrad $8, %%mm3 \n\t"
02347 #endif
02348 "packssdw %%mm2, %%mm4 \n\t"
02349 "packssdw %%mm3, %%mm1 \n\t"
02350 "pmaddwd %%mm5, %%mm4 \n\t"
02351 "pmaddwd %%mm5, %%mm1 \n\t"
02352 "add $24, %%"REG_d" \n\t"
02353 "packssdw %%mm1, %%mm4 \n\t"
02354 "psraw $7, %%mm4 \n\t"
02355
02356 "movq %%mm0, %%mm1 \n\t"
02357 "punpckldq %%mm4, %%mm0 \n\t"
02358 "punpckhdq %%mm4, %%mm1 \n\t"
02359 "packsswb %%mm1, %%mm0 \n\t"
02360 "paddb "MANGLE(ff_bgr2UVOffset)", %%mm0 \n\t"
02361 "movd %%mm0, (%2, %%"REG_a") \n\t"
02362 "punpckhdq %%mm0, %%mm0 \n\t"
02363 "movd %%mm0, (%3, %%"REG_a") \n\t"
02364 "add $4, %%"REG_a" \n\t"
02365 " js 1b \n\t"
02366 : : "r" (src+chromWidth*6), "r" (src+srcStride+chromWidth*6), "r" (udst+chromWidth), "r" (vdst+chromWidth), "g" (-chromWidth)
02367 : "%"REG_a, "%"REG_d
02368 );
02369
02370 udst += chromStride;
02371 vdst += chromStride;
02372 src += srcStride*2;
02373 }
02374
02375 asm volatile( EMMS" \n\t"
02376 SFENCE" \n\t"
02377 :::"memory");
02378 #else
02379 y=0;
02380 #endif
02381 for (; y<height; y+=2)
02382 {
02383 long i;
02384 for (i=0; i<chromWidth; i++)
02385 {
02386 unsigned int b = src[6*i+0];
02387 unsigned int g = src[6*i+1];
02388 unsigned int r = src[6*i+2];
02389
02390 unsigned int Y = ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16;
02391 unsigned int V = ((RV*r + GV*g + BV*b)>>RGB2YUV_SHIFT) + 128;
02392 unsigned int U = ((RU*r + GU*g + BU*b)>>RGB2YUV_SHIFT) + 128;
02393
02394 udst[i] = U;
02395 vdst[i] = V;
02396 ydst[2*i] = Y;
02397
02398 b = src[6*i+3];
02399 g = src[6*i+4];
02400 r = src[6*i+5];
02401
02402 Y = ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16;
02403 ydst[2*i+1] = Y;
02404 }
02405 ydst += lumStride;
02406 src += srcStride;
02407
02408 for (i=0; i<chromWidth; i++)
02409 {
02410 unsigned int b = src[6*i+0];
02411 unsigned int g = src[6*i+1];
02412 unsigned int r = src[6*i+2];
02413
02414 unsigned int Y = ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16;
02415
02416 ydst[2*i] = Y;
02417
02418 b = src[6*i+3];
02419 g = src[6*i+4];
02420 r = src[6*i+5];
02421
02422 Y = ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16;
02423 ydst[2*i+1] = Y;
02424 }
02425 udst += chromStride;
02426 vdst += chromStride;
02427 ydst += lumStride;
02428 src += srcStride;
02429 }
02430 }
02431
02432 void RENAME(interleaveBytes)(uint8_t *src1, uint8_t *src2, uint8_t *dest,
02433 long width, long height, long src1Stride,
02434 long src2Stride, long dstStride){
02435 long h;
02436
02437 for (h=0; h < height; h++)
02438 {
02439 long w;
02440
02441 #ifdef HAVE_MMX
02442 #ifdef HAVE_SSE2
02443 asm(
02444 "xor %%"REG_a", %%"REG_a" \n\t"
02445 "1: \n\t"
02446 PREFETCH" 64(%1, %%"REG_a") \n\t"
02447 PREFETCH" 64(%2, %%"REG_a") \n\t"
02448 "movdqa (%1, %%"REG_a"), %%xmm0 \n\t"
02449 "movdqa (%1, %%"REG_a"), %%xmm1 \n\t"
02450 "movdqa (%2, %%"REG_a"), %%xmm2 \n\t"
02451 "punpcklbw %%xmm2, %%xmm0 \n\t"
02452 "punpckhbw %%xmm2, %%xmm1 \n\t"
02453 "movntdq %%xmm0, (%0, %%"REG_a", 2) \n\t"
02454 "movntdq %%xmm1, 16(%0, %%"REG_a", 2) \n\t"
02455 "add $16, %%"REG_a" \n\t"
02456 "cmp %3, %%"REG_a" \n\t"
02457 " jb 1b \n\t"
02458 ::"r"(dest), "r"(src1), "r"(src2), "r" (width-15)
02459 : "memory", "%"REG_a""
02460 );
02461 #else
02462 asm(
02463 "xor %%"REG_a", %%"REG_a" \n\t"
02464 "1: \n\t"
02465 PREFETCH" 64(%1, %%"REG_a") \n\t"
02466 PREFETCH" 64(%2, %%"REG_a") \n\t"
02467 "movq (%1, %%"REG_a"), %%mm0 \n\t"
02468 "movq 8(%1, %%"REG_a"), %%mm2 \n\t"
02469 "movq %%mm0, %%mm1 \n\t"
02470 "movq %%mm2, %%mm3 \n\t"
02471 "movq (%2, %%"REG_a"), %%mm4 \n\t"
02472 "movq 8(%2, %%"REG_a"), %%mm5 \n\t"
02473 "punpcklbw %%mm4, %%mm0 \n\t"
02474 "punpckhbw %%mm4, %%mm1 \n\t"
02475 "punpcklbw %%mm5, %%mm2 \n\t"
02476 "punpckhbw %%mm5, %%mm3 \n\t"
02477 MOVNTQ" %%mm0, (%0, %%"REG_a", 2) \n\t"
02478 MOVNTQ" %%mm1, 8(%0, %%"REG_a", 2) \n\t"
02479 MOVNTQ" %%mm2, 16(%0, %%"REG_a", 2) \n\t"
02480 MOVNTQ" %%mm3, 24(%0, %%"REG_a", 2) \n\t"
02481 "add $16, %%"REG_a" \n\t"
02482 "cmp %3, %%"REG_a" \n\t"
02483 " jb 1b \n\t"
02484 ::"r"(dest), "r"(src1), "r"(src2), "r" (width-15)
02485 : "memory", "%"REG_a
02486 );
02487 #endif
02488 for (w= (width&(~15)); w < width; w++)
02489 {
02490 dest[2*w+0] = src1[w];
02491 dest[2*w+1] = src2[w];
02492 }
02493 #else
02494 for (w=0; w < width; w++)
02495 {
02496 dest[2*w+0] = src1[w];
02497 dest[2*w+1] = src2[w];
02498 }
02499 #endif
02500 dest += dstStride;
02501 src1 += src1Stride;
02502 src2 += src2Stride;
02503 }
02504 #ifdef HAVE_MMX
02505 asm(
02506 EMMS" \n\t"
02507 SFENCE" \n\t"
02508 ::: "memory"
02509 );
02510 #endif
02511 }
02512
02513 static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,
02514 uint8_t *dst1, uint8_t *dst2,
02515 long width, long height,
02516 long srcStride1, long srcStride2,
02517 long dstStride1, long dstStride2)
02518 {
02519 long y,x,w,h;
02520 w=width/2; h=height/2;
02521 #ifdef HAVE_MMX
02522 asm volatile(
02523 PREFETCH" %0 \n\t"
02524 PREFETCH" %1 \n\t"
02525 ::"m"(*(src1+srcStride1)),"m"(*(src2+srcStride2)):"memory");
02526 #endif
02527 for (y=0;y<h;y++){
02528 const uint8_t* s1=src1+srcStride1*(y>>1);
02529 uint8_t* d=dst1+dstStride1*y;
02530 x=0;
02531 #ifdef HAVE_MMX
02532 for (;x<w-31;x+=32)
02533 {
02534 asm volatile(
02535 PREFETCH" 32%1 \n\t"
02536 "movq %1, %%mm0 \n\t"
02537 "movq 8%1, %%mm2 \n\t"
02538 "movq 16%1, %%mm4 \n\t"
02539 "movq 24%1, %%mm6 \n\t"
02540 "movq %%mm0, %%mm1 \n\t"
02541 "movq %%mm2, %%mm3 \n\t"
02542 "movq %%mm4, %%mm5 \n\t"
02543 "movq %%mm6, %%mm7 \n\t"
02544 "punpcklbw %%mm0, %%mm0 \n\t"
02545 "punpckhbw %%mm1, %%mm1 \n\t"
02546 "punpcklbw %%mm2, %%mm2 \n\t"
02547 "punpckhbw %%mm3, %%mm3 \n\t"
02548 "punpcklbw %%mm4, %%mm4 \n\t"
02549 "punpckhbw %%mm5, %%mm5 \n\t"
02550 "punpcklbw %%mm6, %%mm6 \n\t"
02551 "punpckhbw %%mm7, %%mm7 \n\t"
02552 MOVNTQ" %%mm0, %0 \n\t"
02553 MOVNTQ" %%mm1, 8%0 \n\t"
02554 MOVNTQ" %%mm2, 16%0 \n\t"
02555 MOVNTQ" %%mm3, 24%0 \n\t"
02556 MOVNTQ" %%mm4, 32%0 \n\t"
02557 MOVNTQ" %%mm5, 40%0 \n\t"
02558 MOVNTQ" %%mm6, 48%0 \n\t"
02559 MOVNTQ" %%mm7, 56%0"
02560 :"=m"(d[2*x])
02561 :"m"(s1[x])
02562 :"memory");
02563 }
02564 #endif
02565 for (;x<w;x++) d[2*x]=d[2*x+1]=s1[x];
02566 }
02567 for (y=0;y<h;y++){
02568 const uint8_t* s2=src2+srcStride2*(y>>1);
02569 uint8_t* d=dst2+dstStride2*y;
02570 x=0;
02571 #ifdef HAVE_MMX
02572 for (;x<w-31;x+=32)
02573 {
02574 asm volatile(
02575 PREFETCH" 32%1 \n\t"
02576 "movq %1, %%mm0 \n\t"
02577 "movq 8%1, %%mm2 \n\t"
02578 "movq 16%1, %%mm4 \n\t"
02579 "movq 24%1, %%mm6 \n\t"
02580 "movq %%mm0, %%mm1 \n\t"
02581 "movq %%mm2, %%mm3 \n\t"
02582 "movq %%mm4, %%mm5 \n\t"
02583 "movq %%mm6, %%mm7 \n\t"
02584 "punpcklbw %%mm0, %%mm0 \n\t"
02585 "punpckhbw %%mm1, %%mm1 \n\t"
02586 "punpcklbw %%mm2, %%mm2 \n\t"
02587 "punpckhbw %%mm3, %%mm3 \n\t"
02588 "punpcklbw %%mm4, %%mm4 \n\t"
02589 "punpckhbw %%mm5, %%mm5 \n\t"
02590 "punpcklbw %%mm6, %%mm6 \n\t"
02591 "punpckhbw %%mm7, %%mm7 \n\t"
02592 MOVNTQ" %%mm0, %0 \n\t"
02593 MOVNTQ" %%mm1, 8%0 \n\t"
02594 MOVNTQ" %%mm2, 16%0 \n\t"
02595 MOVNTQ" %%mm3, 24%0 \n\t"
02596 MOVNTQ" %%mm4, 32%0 \n\t"
02597 MOVNTQ" %%mm5, 40%0 \n\t"
02598 MOVNTQ" %%mm6, 48%0 \n\t"
02599 MOVNTQ" %%mm7, 56%0"
02600 :"=m"(d[2*x])
02601 :"m"(s2[x])
02602 :"memory");
02603 }
02604 #endif
02605 for (;x<w;x++) d[2*x]=d[2*x+1]=s2[x];
02606 }
02607 #ifdef HAVE_MMX
02608 asm(
02609 EMMS" \n\t"
02610 SFENCE" \n\t"
02611 ::: "memory"
02612 );
02613 #endif
02614 }
02615
02616 static inline void RENAME(yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2, const uint8_t *src3,
02617 uint8_t *dst,
02618 long width, long height,
02619 long srcStride1, long srcStride2,
02620 long srcStride3, long dstStride)
02621 {
02622 long y,x,w,h;
02623 w=width/2; h=height;
02624 for (y=0;y<h;y++){
02625 const uint8_t* yp=src1+srcStride1*y;
02626 const uint8_t* up=src2+srcStride2*(y>>2);
02627 const uint8_t* vp=src3+srcStride3*(y>>2);
02628 uint8_t* d=dst+dstStride*y;
02629 x=0;
02630 #ifdef HAVE_MMX
02631 for (;x<w-7;x+=8)
02632 {
02633 asm volatile(
02634 PREFETCH" 32(%1, %0) \n\t"
02635 PREFETCH" 32(%2, %0) \n\t"
02636 PREFETCH" 32(%3, %0) \n\t"
02637 "movq (%1, %0, 4), %%mm0 \n\t"
02638 "movq (%2, %0), %%mm1 \n\t"
02639 "movq (%3, %0), %%mm2 \n\t"
02640 "movq %%mm0, %%mm3 \n\t"
02641 "movq %%mm1, %%mm4 \n\t"
02642 "movq %%mm2, %%mm5 \n\t"
02643 "punpcklbw %%mm1, %%mm1 \n\t"
02644 "punpcklbw %%mm2, %%mm2 \n\t"
02645 "punpckhbw %%mm4, %%mm4 \n\t"
02646 "punpckhbw %%mm5, %%mm5 \n\t"
02647
02648 "movq %%mm1, %%mm6 \n\t"
02649 "punpcklbw %%mm2, %%mm1 \n\t"
02650 "punpcklbw %%mm1, %%mm0 \n\t"
02651 "punpckhbw %%mm1, %%mm3 \n\t"
02652 MOVNTQ" %%mm0, (%4, %0, 8) \n\t"
02653 MOVNTQ" %%mm3, 8(%4, %0, 8) \n\t"
02654
02655 "punpckhbw %%mm2, %%mm6 \n\t"
02656 "movq 8(%1, %0, 4), %%mm0 \n\t"
02657 "movq %%mm0, %%mm3 \n\t"
02658 "punpcklbw %%mm6, %%mm0 \n\t"
02659 "punpckhbw %%mm6, %%mm3 \n\t"
02660 MOVNTQ" %%mm0, 16(%4, %0, 8) \n\t"
02661 MOVNTQ" %%mm3, 24(%4, %0, 8) \n\t"
02662
02663 "movq %%mm4, %%mm6 \n\t"
02664 "movq 16(%1, %0, 4), %%mm0 \n\t"
02665 "movq %%mm0, %%mm3 \n\t"
02666 "punpcklbw %%mm5, %%mm4 \n\t"
02667 "punpcklbw %%mm4, %%mm0 \n\t"
02668 "punpckhbw %%mm4, %%mm3 \n\t"
02669 MOVNTQ" %%mm0, 32(%4, %0, 8) \n\t"
02670 MOVNTQ" %%mm3, 40(%4, %0, 8) \n\t"
02671
02672 "punpckhbw %%mm5, %%mm6 \n\t"
02673 "movq 24(%1, %0, 4), %%mm0 \n\t"
02674 "movq %%mm0, %%mm3 \n\t"
02675 "punpcklbw %%mm6, %%mm0 \n\t"
02676 "punpckhbw %%mm6, %%mm3 \n\t"
02677 MOVNTQ" %%mm0, 48(%4, %0, 8) \n\t"
02678 MOVNTQ" %%mm3, 56(%4, %0, 8) \n\t"
02679
02680 : "+r" (x)
02681 : "r"(yp), "r" (up), "r"(vp), "r"(d)
02682 :"memory");
02683 }
02684 #endif
02685 for (; x<w; x++)
02686 {
02687 const long x2 = x<<2;
02688 d[8*x+0] = yp[x2];
02689 d[8*x+1] = up[x];
02690 d[8*x+2] = yp[x2+1];
02691 d[8*x+3] = vp[x];
02692 d[8*x+4] = yp[x2+2];
02693 d[8*x+5] = up[x];
02694 d[8*x+6] = yp[x2+3];
02695 d[8*x+7] = vp[x];
02696 }
02697 }
02698 #ifdef HAVE_MMX
02699 asm(
02700 EMMS" \n\t"
02701 SFENCE" \n\t"
02702 ::: "memory"
02703 );
02704 #endif
02705 }
02706
02707 static inline void RENAME(rgb2rgb_init)(void){
02708 rgb15to16 = RENAME(rgb15to16);
02709 rgb15to24 = RENAME(rgb15to24);
02710 rgb15to32 = RENAME(rgb15to32);
02711 rgb16to24 = RENAME(rgb16to24);
02712 rgb16to32 = RENAME(rgb16to32);
02713 rgb16to15 = RENAME(rgb16to15);
02714 rgb24to16 = RENAME(rgb24to16);
02715 rgb24to15 = RENAME(rgb24to15);
02716 rgb24to32 = RENAME(rgb24to32);
02717 rgb32to16 = RENAME(rgb32to16);
02718 rgb32to15 = RENAME(rgb32to15);
02719 rgb32to24 = RENAME(rgb32to24);
02720 rgb24tobgr15 = RENAME(rgb24tobgr15);
02721 rgb24tobgr16 = RENAME(rgb24tobgr16);
02722 rgb24tobgr24 = RENAME(rgb24tobgr24);
02723 rgb32tobgr32 = RENAME(rgb32tobgr32);
02724 rgb32tobgr16 = RENAME(rgb32tobgr16);
02725 rgb32tobgr15 = RENAME(rgb32tobgr15);
02726 yv12toyuy2 = RENAME(yv12toyuy2);
02727 yv12touyvy = RENAME(yv12touyvy);
02728 yuv422ptoyuy2 = RENAME(yuv422ptoyuy2);
02729 yuy2toyv12 = RENAME(yuy2toyv12);
02730
02731
02732 planar2x = RENAME(planar2x);
02733 rgb24toyv12 = RENAME(rgb24toyv12);
02734 interleaveBytes = RENAME(interleaveBytes);
02735 vu9_to_vu12 = RENAME(vu9_to_vu12);
02736 yvu9_to_yuy2 = RENAME(yvu9_to_yuy2);
02737 }