00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00027 #ifdef ARCH_X86_64
00028 # define REGa rax
00029 # define REGc rcx
00030 # define REGd rdx
00031 # define REG_a "rax"
00032 # define REG_c "rcx"
00033 # define REG_d "rdx"
00034 # define REG_SP "rsp"
00035 # define ALIGN_MASK "$0xFFFFFFFFFFFFFFF8"
00036 #else
00037 # define REGa eax
00038 # define REGc ecx
00039 # define REGd edx
00040 # define REG_a "eax"
00041 # define REG_c "ecx"
00042 # define REG_d "edx"
00043 # define REG_SP "esp"
00044 # define ALIGN_MASK "$0xFFFFFFF8"
00045 #endif
00046
00047
00048 #undef PAVGB
00049 #undef PMINUB
00050 #undef PMAXUB
00051
00052 #ifdef HAVE_MMX2
00053 #define REAL_PAVGB(a,b) "pavgb " #a ", " #b " \n\t"
00054 #elif defined (HAVE_3DNOW)
00055 #define REAL_PAVGB(a,b) "pavgusb " #a ", " #b " \n\t"
00056 #endif
00057 #define PAVGB(a,b) REAL_PAVGB(a,b)
00058
00059 #ifdef HAVE_MMX2
00060 #define PMINUB(a,b,t) "pminub " #a ", " #b " \n\t"
00061 #elif defined (HAVE_MMX)
00062 #define PMINUB(b,a,t) \
00063 "movq " #a ", " #t " \n\t"\
00064 "psubusb " #b ", " #t " \n\t"\
00065 "psubb " #t ", " #a " \n\t"
00066 #endif
00067
00068 #ifdef HAVE_MMX2
00069 #define PMAXUB(a,b) "pmaxub " #a ", " #b " \n\t"
00070 #elif defined (HAVE_MMX)
00071 #define PMAXUB(a,b) \
00072 "psubusb " #a ", " #b " \n\t"\
00073 "paddb " #a ", " #b " \n\t"
00074 #endif
00075
00076
00077 #ifdef HAVE_MMX
00078
00081 static inline int RENAME(vertClassify)(uint8_t src[], int stride, PPContext *c){
00082 int numEq= 0, dcOk;
00083 src+= stride*4;
00084 asm volatile(
00085 "movq %0, %%mm7 \n\t"
00086 "movq %1, %%mm6 \n\t"
00087 : : "m" (c->mmxDcOffset[c->nonBQP]), "m" (c->mmxDcThreshold[c->nonBQP])
00088 );
00089
00090 asm volatile(
00091 "lea (%2, %3), %%"REG_a" \n\t"
00092
00093
00094
00095 "movq (%2), %%mm0 \n\t"
00096 "movq (%%"REG_a"), %%mm1 \n\t"
00097 "movq %%mm0, %%mm3 \n\t"
00098 "movq %%mm0, %%mm4 \n\t"
00099 PMAXUB(%%mm1, %%mm4)
00100 PMINUB(%%mm1, %%mm3, %%mm5)
00101 "psubb %%mm1, %%mm0 \n\t"
00102 "paddb %%mm7, %%mm0 \n\t"
00103 "pcmpgtb %%mm6, %%mm0 \n\t"
00104
00105 "movq (%%"REG_a",%3), %%mm2 \n\t"
00106 PMAXUB(%%mm2, %%mm4)
00107 PMINUB(%%mm2, %%mm3, %%mm5)
00108 "psubb %%mm2, %%mm1 \n\t"
00109 "paddb %%mm7, %%mm1 \n\t"
00110 "pcmpgtb %%mm6, %%mm1 \n\t"
00111 "paddb %%mm1, %%mm0 \n\t"
00112
00113 "movq (%%"REG_a", %3, 2), %%mm1 \n\t"
00114 PMAXUB(%%mm1, %%mm4)
00115 PMINUB(%%mm1, %%mm3, %%mm5)
00116 "psubb %%mm1, %%mm2 \n\t"
00117 "paddb %%mm7, %%mm2 \n\t"
00118 "pcmpgtb %%mm6, %%mm2 \n\t"
00119 "paddb %%mm2, %%mm0 \n\t"
00120
00121 "lea (%%"REG_a", %3, 4), %%"REG_a" \n\t"
00122
00123 "movq (%2, %3, 4), %%mm2 \n\t"
00124 PMAXUB(%%mm2, %%mm4)
00125 PMINUB(%%mm2, %%mm3, %%mm5)
00126 "psubb %%mm2, %%mm1 \n\t"
00127 "paddb %%mm7, %%mm1 \n\t"
00128 "pcmpgtb %%mm6, %%mm1 \n\t"
00129 "paddb %%mm1, %%mm0 \n\t"
00130
00131 "movq (%%"REG_a"), %%mm1 \n\t"
00132 PMAXUB(%%mm1, %%mm4)
00133 PMINUB(%%mm1, %%mm3, %%mm5)
00134 "psubb %%mm1, %%mm2 \n\t"
00135 "paddb %%mm7, %%mm2 \n\t"
00136 "pcmpgtb %%mm6, %%mm2 \n\t"
00137 "paddb %%mm2, %%mm0 \n\t"
00138
00139 "movq (%%"REG_a", %3), %%mm2 \n\t"
00140 PMAXUB(%%mm2, %%mm4)
00141 PMINUB(%%mm2, %%mm3, %%mm5)
00142 "psubb %%mm2, %%mm1 \n\t"
00143 "paddb %%mm7, %%mm1 \n\t"
00144 "pcmpgtb %%mm6, %%mm1 \n\t"
00145 "paddb %%mm1, %%mm0 \n\t"
00146
00147 "movq (%%"REG_a", %3, 2), %%mm1 \n\t"
00148 PMAXUB(%%mm1, %%mm4)
00149 PMINUB(%%mm1, %%mm3, %%mm5)
00150 "psubb %%mm1, %%mm2 \n\t"
00151 "paddb %%mm7, %%mm2 \n\t"
00152 "pcmpgtb %%mm6, %%mm2 \n\t"
00153 "paddb %%mm2, %%mm0 \n\t"
00154 "psubusb %%mm3, %%mm4 \n\t"
00155
00156 " \n\t"
00157 #ifdef HAVE_MMX2
00158 "pxor %%mm7, %%mm7 \n\t"
00159 "psadbw %%mm7, %%mm0 \n\t"
00160 #else
00161 "movq %%mm0, %%mm1 \n\t"
00162 "psrlw $8, %%mm0 \n\t"
00163 "paddb %%mm1, %%mm0 \n\t"
00164 "movq %%mm0, %%mm1 \n\t"
00165 "psrlq $16, %%mm0 \n\t"
00166 "paddb %%mm1, %%mm0 \n\t"
00167 "movq %%mm0, %%mm1 \n\t"
00168 "psrlq $32, %%mm0 \n\t"
00169 "paddb %%mm1, %%mm0 \n\t"
00170 #endif
00171 "movq %4, %%mm7 \n\t"
00172 "paddusb %%mm7, %%mm7 \n\t"
00173 "psubusb %%mm7, %%mm4 \n\t"
00174 "packssdw %%mm4, %%mm4 \n\t"
00175 "movd %%mm0, %0 \n\t"
00176 "movd %%mm4, %1 \n\t"
00177
00178 : "=r" (numEq), "=r" (dcOk)
00179 : "r" (src), "r" ((long)stride), "m" (c->pQPb)
00180 : "%"REG_a
00181 );
00182
00183 numEq= (-numEq) &0xFF;
00184 if(numEq > c->ppMode.flatnessThreshold){
00185 if(dcOk) return 0;
00186 else return 1;
00187 }else{
00188 return 2;
00189 }
00190 }
00191 #endif //HAVE_MMX
00192
00197 #ifndef HAVE_ALTIVEC
00198 static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c)
00199 {
00200 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
00201 src+= stride*3;
00202 asm volatile(
00203 "movq %2, %%mm0 \n\t"
00204 "pxor %%mm4, %%mm4 \n\t"
00205
00206 "movq (%0), %%mm6 \n\t"
00207 "movq (%0, %1), %%mm5 \n\t"
00208 "movq %%mm5, %%mm1 \n\t"
00209 "movq %%mm6, %%mm2 \n\t"
00210 "psubusb %%mm6, %%mm5 \n\t"
00211 "psubusb %%mm1, %%mm2 \n\t"
00212 "por %%mm5, %%mm2 \n\t"
00213 "psubusb %%mm0, %%mm2 \n\t"
00214 "pcmpeqb %%mm4, %%mm2 \n\t"
00215
00216 "pand %%mm2, %%mm6 \n\t"
00217 "pandn %%mm1, %%mm2 \n\t"
00218 "por %%mm2, %%mm6 \n\t"
00219
00220 "movq (%0, %1, 8), %%mm5 \n\t"
00221 "lea (%0, %1, 4), %%"REG_a" \n\t"
00222 "lea (%0, %1, 8), %%"REG_c" \n\t"
00223 "sub %1, %%"REG_c" \n\t"
00224 "add %1, %0 \n\t"
00225 "movq (%0, %1, 8), %%mm7 \n\t"
00226 "movq %%mm5, %%mm1 \n\t"
00227 "movq %%mm7, %%mm2 \n\t"
00228 "psubusb %%mm7, %%mm5 \n\t"
00229 "psubusb %%mm1, %%mm2 \n\t"
00230 "por %%mm5, %%mm2 \n\t"
00231 "psubusb %%mm0, %%mm2 \n\t"
00232 "pcmpeqb %%mm4, %%mm2 \n\t"
00233
00234 "pand %%mm2, %%mm7 \n\t"
00235 "pandn %%mm1, %%mm2 \n\t"
00236 "por %%mm2, %%mm7 \n\t"
00237
00238
00239
00240
00241
00242
00243
00244
00245 "movq (%0, %1), %%mm0 \n\t"
00246 "movq %%mm0, %%mm1 \n\t"
00247 PAVGB(%%mm6, %%mm0)
00248 PAVGB(%%mm6, %%mm0)
00249
00250 "movq (%0, %1, 4), %%mm2 \n\t"
00251 "movq %%mm2, %%mm5 \n\t"
00252 PAVGB((%%REGa), %%mm2)
00253 PAVGB((%0, %1, 2), %%mm2)
00254 "movq %%mm2, %%mm3 \n\t"
00255 "movq (%0), %%mm4 \n\t"
00256 PAVGB(%%mm4, %%mm3)
00257 PAVGB(%%mm0, %%mm3)
00258 "movq %%mm3, (%0) \n\t"
00259
00260 "movq %%mm1, %%mm0 \n\t"
00261 PAVGB(%%mm6, %%mm0)
00262 "movq %%mm4, %%mm3 \n\t"
00263 PAVGB((%0,%1,2), %%mm3)
00264 PAVGB((%%REGa,%1,2), %%mm5)
00265 PAVGB((%%REGa), %%mm5)
00266 PAVGB(%%mm5, %%mm3)
00267 PAVGB(%%mm0, %%mm3)
00268 "movq %%mm3, (%0,%1) \n\t"
00269
00270 PAVGB(%%mm4, %%mm6)
00271 "movq (%%"REG_c"), %%mm0 \n\t"
00272 PAVGB((%%REGa, %1, 2), %%mm0)
00273 "movq %%mm0, %%mm3 \n\t"
00274 PAVGB(%%mm1, %%mm0)
00275 PAVGB(%%mm6, %%mm0)
00276 PAVGB(%%mm2, %%mm0)
00277 "movq (%0, %1, 2), %%mm2 \n\t"
00278 "movq %%mm0, (%0, %1, 2) \n\t"
00279
00280 "movq (%%"REG_a", %1, 4), %%mm0 \n\t"
00281 PAVGB((%%REGc), %%mm0)
00282 PAVGB(%%mm0, %%mm6)
00283 PAVGB(%%mm1, %%mm4)
00284 PAVGB(%%mm2, %%mm1)
00285 PAVGB(%%mm1, %%mm6)
00286 PAVGB(%%mm5, %%mm6)
00287 "movq (%%"REG_a"), %%mm5 \n\t"
00288 "movq %%mm6, (%%"REG_a") \n\t"
00289
00290 "movq (%%"REG_a", %1, 4), %%mm6 \n\t"
00291 PAVGB(%%mm7, %%mm6)
00292 PAVGB(%%mm4, %%mm6)
00293 PAVGB(%%mm3, %%mm6)
00294 PAVGB(%%mm5, %%mm2)
00295 "movq (%0, %1, 4), %%mm4 \n\t"
00296 PAVGB(%%mm4, %%mm2)
00297 PAVGB(%%mm2, %%mm6)
00298 "movq %%mm6, (%0, %1, 4) \n\t"
00299
00300 PAVGB(%%mm7, %%mm1)
00301 PAVGB(%%mm4, %%mm5)
00302 PAVGB(%%mm5, %%mm0)
00303 "movq (%%"REG_a", %1, 2), %%mm6 \n\t"
00304 PAVGB(%%mm6, %%mm1)
00305 PAVGB(%%mm0, %%mm1)
00306 "movq %%mm1, (%%"REG_a", %1, 2) \n\t"
00307
00308 PAVGB((%%REGc), %%mm2)
00309 "movq (%%"REG_a", %1, 4), %%mm0 \n\t"
00310 PAVGB(%%mm0, %%mm6)
00311 PAVGB(%%mm7, %%mm6)
00312 PAVGB(%%mm2, %%mm6)
00313 "movq %%mm6, (%%"REG_c") \n\t"
00314
00315 PAVGB(%%mm7, %%mm5)
00316 PAVGB(%%mm7, %%mm5)
00317
00318 PAVGB(%%mm3, %%mm0)
00319 PAVGB(%%mm0, %%mm5)
00320 "movq %%mm5, (%%"REG_a", %1, 4) \n\t"
00321 "sub %1, %0 \n\t"
00322
00323 :
00324 : "r" (src), "r" ((long)stride), "m" (c->pQPb)
00325 : "%"REG_a, "%"REG_c
00326 );
00327 #else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
00328 const int l1= stride;
00329 const int l2= stride + l1;
00330 const int l3= stride + l2;
00331 const int l4= stride + l3;
00332 const int l5= stride + l4;
00333 const int l6= stride + l5;
00334 const int l7= stride + l6;
00335 const int l8= stride + l7;
00336 const int l9= stride + l8;
00337 int x;
00338 src+= stride*3;
00339 for(x=0; x<BLOCK_SIZE; x++)
00340 {
00341 const int first= FFABS(src[0] - src[l1]) < c->QP ? src[0] : src[l1];
00342 const int last= FFABS(src[l8] - src[l9]) < c->QP ? src[l9] : src[l8];
00343
00344 int sums[10];
00345 sums[0] = 4*first + src[l1] + src[l2] + src[l3] + 4;
00346 sums[1] = sums[0] - first + src[l4];
00347 sums[2] = sums[1] - first + src[l5];
00348 sums[3] = sums[2] - first + src[l6];
00349 sums[4] = sums[3] - first + src[l7];
00350 sums[5] = sums[4] - src[l1] + src[l8];
00351 sums[6] = sums[5] - src[l2] + last;
00352 sums[7] = sums[6] - src[l3] + last;
00353 sums[8] = sums[7] - src[l4] + last;
00354 sums[9] = sums[8] - src[l5] + last;
00355
00356 src[l1]= (sums[0] + sums[2] + 2*src[l1])>>4;
00357 src[l2]= (sums[1] + sums[3] + 2*src[l2])>>4;
00358 src[l3]= (sums[2] + sums[4] + 2*src[l3])>>4;
00359 src[l4]= (sums[3] + sums[5] + 2*src[l4])>>4;
00360 src[l5]= (sums[4] + sums[6] + 2*src[l5])>>4;
00361 src[l6]= (sums[5] + sums[7] + 2*src[l6])>>4;
00362 src[l7]= (sums[6] + sums[8] + 2*src[l7])>>4;
00363 src[l8]= (sums[7] + sums[9] + 2*src[l8])>>4;
00364
00365 src++;
00366 }
00367 #endif //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
00368 }
00369 #endif //HAVE_ALTIVEC
00370
00371 #if 0
00372
00384 static inline void RENAME(vertRK1Filter)(uint8_t *src, int stride, int QP)
00385 {
00386 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
00387 src+= stride*3;
00388
00389 asm volatile(
00390 "pxor %%mm7, %%mm7 \n\t"
00391 "movq "MANGLE(b80)", %%mm6 \n\t"
00392 "leal (%0, %1), %%"REG_a" \n\t"
00393 "leal (%%"REG_a", %1, 4), %%"REG_c" \n\t"
00394
00395
00396 "movq "MANGLE(pQPb)", %%mm0 \n\t"
00397 "movq %%mm0, %%mm1 \n\t"
00398 "paddusb "MANGLE(b02)", %%mm0 \n\t"
00399 "psrlw $2, %%mm0 \n\t"
00400 "pand "MANGLE(b3F)", %%mm0 \n\t"
00401 "paddusb %%mm1, %%mm0 \n\t"
00402 "movq (%0, %1, 4), %%mm2 \n\t"
00403 "movq (%%"REG_c"), %%mm3 \n\t"
00404 "movq %%mm2, %%mm4 \n\t"
00405 "pcmpeqb %%mm5, %%mm5 \n\t"
00406 "pxor %%mm2, %%mm5 \n\t"
00407 PAVGB(%%mm3, %%mm5)
00408 "paddb %%mm6, %%mm5 \n\t"
00409 "psubusb %%mm3, %%mm4 \n\t"
00410 "psubusb %%mm2, %%mm3 \n\t"
00411 "por %%mm3, %%mm4 \n\t"
00412 "psubusb %%mm0, %%mm4 \n\t"
00413 "pcmpeqb %%mm7, %%mm4 \n\t"
00414 "pand %%mm4, %%mm5 \n\t"
00415
00416
00417 "paddb %%mm5, %%mm2 \n\t"
00418
00419 "movq %%mm2, (%0,%1, 4) \n\t"
00420
00421 "movq (%%"REG_c"), %%mm2 \n\t"
00422
00423 "psubb %%mm5, %%mm2 \n\t"
00424
00425 "movq %%mm2, (%%"REG_c") \n\t"
00426
00427 "paddb %%mm6, %%mm5 \n\t"
00428 "psrlw $2, %%mm5 \n\t"
00429 "pand "MANGLE(b3F)", %%mm5 \n\t"
00430 "psubb "MANGLE(b20)", %%mm5 \n\t"
00431
00432 "movq (%%"REG_a", %1, 2), %%mm2 \n\t"
00433 "paddb %%mm6, %%mm2 \n\t"
00434 "paddsb %%mm5, %%mm2 \n\t"
00435 "psubb %%mm6, %%mm2 \n\t"
00436 "movq %%mm2, (%%"REG_a", %1, 2) \n\t"
00437
00438 "movq (%%"REG_c", %1), %%mm2 \n\t"
00439 "paddb %%mm6, %%mm2 \n\t"
00440 "psubsb %%mm5, %%mm2 \n\t"
00441 "psubb %%mm6, %%mm2 \n\t"
00442 "movq %%mm2, (%%"REG_c", %1) \n\t"
00443
00444 :
00445 : "r" (src), "r" ((long)stride)
00446 : "%"REG_a, "%"REG_c
00447 );
00448 #else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
00449 const int l1= stride;
00450 const int l2= stride + l1;
00451 const int l3= stride + l2;
00452 const int l4= stride + l3;
00453 const int l5= stride + l4;
00454 const int l6= stride + l5;
00455
00456
00457
00458 int x;
00459 const int QP15= QP + (QP>>2);
00460 src+= stride*3;
00461 for(x=0; x<BLOCK_SIZE; x++)
00462 {
00463 const int v = (src[x+l5] - src[x+l4]);
00464 if(FFABS(v) < QP15)
00465 {
00466 src[x+l3] +=v>>3;
00467 src[x+l4] +=v>>1;
00468 src[x+l5] -=v>>1;
00469 src[x+l6] -=v>>3;
00470
00471 }
00472 }
00473
00474 #endif //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
00475 }
00476 #endif //0
00477
00485 static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, PPContext *co)
00486 {
00487 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
00488 src+= stride*3;
00489
00490 asm volatile(
00491 "pxor %%mm7, %%mm7 \n\t"
00492 "lea (%0, %1), %%"REG_a" \n\t"
00493 "lea (%%"REG_a", %1, 4), %%"REG_c" \n\t"
00494
00495
00496 "movq (%%"REG_a", %1, 2), %%mm0 \n\t"
00497 "movq (%0, %1, 4), %%mm1 \n\t"
00498 "movq %%mm1, %%mm2 \n\t"
00499 "psubusb %%mm0, %%mm1 \n\t"
00500 "psubusb %%mm2, %%mm0 \n\t"
00501 "por %%mm1, %%mm0 \n\t"
00502 "movq (%%"REG_c"), %%mm3 \n\t"
00503 "movq (%%"REG_c", %1), %%mm4 \n\t"
00504 "movq %%mm3, %%mm5 \n\t"
00505 "psubusb %%mm4, %%mm3 \n\t"
00506 "psubusb %%mm5, %%mm4 \n\t"
00507 "por %%mm4, %%mm3 \n\t"
00508 PAVGB(%%mm3, %%mm0)
00509 "movq %%mm2, %%mm1 \n\t"
00510 "psubusb %%mm5, %%mm2 \n\t"
00511 "movq %%mm2, %%mm4 \n\t"
00512 "pcmpeqb %%mm7, %%mm2 \n\t"
00513 "psubusb %%mm1, %%mm5 \n\t"
00514 "por %%mm5, %%mm4 \n\t"
00515 "psubusb %%mm0, %%mm4 \n\t"
00516 "movq %%mm4, %%mm3 \n\t"
00517 "movq %2, %%mm0 \n\t"
00518 "paddusb %%mm0, %%mm0 \n\t"
00519 "psubusb %%mm0, %%mm4 \n\t"
00520 "pcmpeqb %%mm7, %%mm4 \n\t"
00521 "psubusb "MANGLE(b01)", %%mm3 \n\t"
00522 "pand %%mm4, %%mm3 \n\t"
00523
00524 PAVGB(%%mm7, %%mm3)
00525 "movq %%mm3, %%mm1 \n\t"
00526 PAVGB(%%mm7, %%mm3)
00527 PAVGB(%%mm1, %%mm3)
00528
00529 "movq (%0, %1, 4), %%mm0 \n\t"
00530 "pxor %%mm2, %%mm0 \n\t"
00531 "psubusb %%mm3, %%mm0 \n\t"
00532 "pxor %%mm2, %%mm0 \n\t"
00533 "movq %%mm0, (%0, %1, 4) \n\t"
00534
00535 "movq (%%"REG_c"), %%mm0 \n\t"
00536 "pxor %%mm2, %%mm0 \n\t"
00537 "paddusb %%mm3, %%mm0 \n\t"
00538 "pxor %%mm2, %%mm0 \n\t"
00539 "movq %%mm0, (%%"REG_c") \n\t"
00540
00541 PAVGB(%%mm7, %%mm1)
00542
00543 "movq (%%"REG_a", %1, 2), %%mm0 \n\t"
00544 "pxor %%mm2, %%mm0 \n\t"
00545 "psubusb %%mm1, %%mm0 \n\t"
00546 "pxor %%mm2, %%mm0 \n\t"
00547 "movq %%mm0, (%%"REG_a", %1, 2) \n\t"
00548
00549 "movq (%%"REG_c", %1), %%mm0 \n\t"
00550 "pxor %%mm2, %%mm0 \n\t"
00551 "paddusb %%mm1, %%mm0 \n\t"
00552 "pxor %%mm2, %%mm0 \n\t"
00553 "movq %%mm0, (%%"REG_c", %1) \n\t"
00554
00555 PAVGB(%%mm7, %%mm1)
00556
00557 "movq (%%"REG_a", %1), %%mm0 \n\t"
00558 "pxor %%mm2, %%mm0 \n\t"
00559 "psubusb %%mm1, %%mm0 \n\t"
00560 "pxor %%mm2, %%mm0 \n\t"
00561 "movq %%mm0, (%%"REG_a", %1) \n\t"
00562
00563 "movq (%%"REG_c", %1, 2), %%mm0 \n\t"
00564 "pxor %%mm2, %%mm0 \n\t"
00565 "paddusb %%mm1, %%mm0 \n\t"
00566 "pxor %%mm2, %%mm0 \n\t"
00567 "movq %%mm0, (%%"REG_c", %1, 2) \n\t"
00568
00569 :
00570 : "r" (src), "r" ((long)stride), "m" (co->pQPb)
00571 : "%"REG_a, "%"REG_c
00572 );
00573 #else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
00574
00575 const int l1= stride;
00576 const int l2= stride + l1;
00577 const int l3= stride + l2;
00578 const int l4= stride + l3;
00579 const int l5= stride + l4;
00580 const int l6= stride + l5;
00581 const int l7= stride + l6;
00582
00583
00584 int x;
00585
00586 src+= stride*3;
00587 for(x=0; x<BLOCK_SIZE; x++)
00588 {
00589 int a= src[l3] - src[l4];
00590 int b= src[l4] - src[l5];
00591 int c= src[l5] - src[l6];
00592
00593 int d= FFABS(b) - ((FFABS(a) + FFABS(c))>>1);
00594 d= FFMAX(d, 0);
00595
00596 if(d < co->QP*2)
00597 {
00598 int v = d * FFSIGN(-b);
00599
00600 src[l2] +=v>>3;
00601 src[l3] +=v>>2;
00602 src[l4] +=(3*v)>>3;
00603 src[l5] -=(3*v)>>3;
00604 src[l6] -=v>>2;
00605 src[l7] -=v>>3;
00606
00607 }
00608 src++;
00609 }
00610 #endif //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
00611 }
00612
00613 #ifndef HAVE_ALTIVEC
00614 static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext *c)
00615 {
00616 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
00617
00618
00619
00620
00621
00622
00623
00624
00625
00626
00627
00628
00629
00630
00631 src+= stride*4;
00632 asm volatile(
00633
00634 #if 0 //sligtly more accurate and slightly slower
00635 "pxor %%mm7, %%mm7 \n\t"
00636 "lea (%0, %1), %%"REG_a" \n\t"
00637 "lea (%%"REG_a", %1, 4), %%"REG_c" \n\t"
00638
00639
00640
00641
00642
00643 "movq (%0, %1, 2), %%mm0 \n\t"
00644 "movq (%0), %%mm1 \n\t"
00645 "movq %%mm0, %%mm2 \n\t"
00646 PAVGB(%%mm7, %%mm0)
00647 PAVGB(%%mm1, %%mm0)
00648 PAVGB(%%mm2, %%mm0)
00649
00650 "movq (%%"REG_a"), %%mm1 \n\t"
00651 "movq (%%"REG_a", %1, 2), %%mm3 \n\t"
00652 "movq %%mm1, %%mm4 \n\t"
00653 PAVGB(%%mm7, %%mm1)
00654 PAVGB(%%mm3, %%mm1)
00655 PAVGB(%%mm4, %%mm1)
00656
00657 "movq %%mm0, %%mm4 \n\t"
00658 "psubusb %%mm1, %%mm0 \n\t"
00659 "psubusb %%mm4, %%mm1 \n\t"
00660 "por %%mm0, %%mm1 \n\t"
00661
00662
00663 "movq (%0, %1, 4), %%mm0 \n\t"
00664 "movq %%mm0, %%mm4 \n\t"
00665 PAVGB(%%mm7, %%mm0)
00666 PAVGB(%%mm2, %%mm0)
00667 PAVGB(%%mm4, %%mm0)
00668
00669 "movq (%%"REG_c"), %%mm2 \n\t"
00670 "movq %%mm3, %%mm5 \n\t"
00671 PAVGB(%%mm7, %%mm3)
00672 PAVGB(%%mm2, %%mm3)
00673 PAVGB(%%mm5, %%mm3)
00674
00675 "movq %%mm0, %%mm6 \n\t"
00676 "psubusb %%mm3, %%mm0 \n\t"
00677 "psubusb %%mm6, %%mm3 \n\t"
00678 "por %%mm0, %%mm3 \n\t"
00679 "pcmpeqb %%mm7, %%mm0 \n\t"
00680
00681
00682 "movq (%%"REG_c", %1), %%mm6 \n\t"
00683 "movq %%mm6, %%mm5 \n\t"
00684 PAVGB(%%mm7, %%mm6)
00685 PAVGB(%%mm4, %%mm6)
00686 PAVGB(%%mm5, %%mm6)
00687
00688 "movq (%%"REG_c", %1, 2), %%mm5 \n\t"
00689 "movq %%mm2, %%mm4 \n\t"
00690 PAVGB(%%mm7, %%mm2)
00691 PAVGB(%%mm5, %%mm2)
00692 PAVGB(%%mm4, %%mm2)
00693
00694 "movq %%mm6, %%mm4 \n\t"
00695 "psubusb %%mm2, %%mm6 \n\t"
00696 "psubusb %%mm4, %%mm2 \n\t"
00697 "por %%mm6, %%mm2 \n\t"
00698
00699
00700
00701 PMINUB(%%mm2, %%mm1, %%mm4)
00702 "movq %2, %%mm4 \n\t"
00703 "paddusb "MANGLE(b01)", %%mm4 \n\t"
00704 "pcmpgtb %%mm3, %%mm4 \n\t"
00705 "psubusb %%mm1, %%mm3 \n\t"
00706 "pand %%mm4, %%mm3 \n\t"
00707
00708 "movq %%mm3, %%mm1 \n\t"
00709
00710 PAVGB(%%mm7, %%mm3)
00711 PAVGB(%%mm7, %%mm3)
00712 "paddusb %%mm1, %%mm3 \n\t"
00713
00714
00715 "movq (%%"REG_a", %1, 2), %%mm6 \n\t"
00716 "movq (%0, %1, 4), %%mm5 \n\t"
00717 "movq (%0, %1, 4), %%mm4 \n\t"
00718 "psubusb %%mm6, %%mm5 \n\t"
00719 "psubusb %%mm4, %%mm6 \n\t"
00720 "por %%mm6, %%mm5 \n\t"
00721 "pcmpeqb %%mm7, %%mm6 \n\t"
00722 "pxor %%mm6, %%mm0 \n\t"
00723 "pand %%mm0, %%mm3 \n\t"
00724 PMINUB(%%mm5, %%mm3, %%mm0)
00725
00726 "psubusb "MANGLE(b01)", %%mm3 \n\t"
00727 PAVGB(%%mm7, %%mm3)
00728
00729 "movq (%%"REG_a", %1, 2), %%mm0 \n\t"
00730 "movq (%0, %1, 4), %%mm2 \n\t"
00731 "pxor %%mm6, %%mm0 \n\t"
00732 "pxor %%mm6, %%mm2 \n\t"
00733 "psubb %%mm3, %%mm0 \n\t"
00734 "paddb %%mm3, %%mm2 \n\t"
00735 "pxor %%mm6, %%mm0 \n\t"
00736 "pxor %%mm6, %%mm2 \n\t"
00737 "movq %%mm0, (%%"REG_a", %1, 2) \n\t"
00738 "movq %%mm2, (%0, %1, 4) \n\t"
00739 #endif //0
00740
00741 "lea (%0, %1), %%"REG_a" \n\t"
00742 "pcmpeqb %%mm6, %%mm6 \n\t"
00743
00744
00745
00746
00747
00748 "movq (%%"REG_a", %1, 2), %%mm1 \n\t"
00749 "movq (%0, %1, 4), %%mm0 \n\t"
00750 "pxor %%mm6, %%mm1 \n\t"
00751 PAVGB(%%mm1, %%mm0)
00752
00753
00754 "movq (%%"REG_a", %1, 4), %%mm2 \n\t"
00755 "movq (%%"REG_a", %1), %%mm3 \n\t"
00756 "pxor %%mm6, %%mm2 \n\t"
00757 "movq %%mm2, %%mm5 \n\t"
00758 "movq "MANGLE(b80)", %%mm4 \n\t"
00759 "lea (%%"REG_a", %1, 4), %%"REG_c" \n\t"
00760 PAVGB(%%mm3, %%mm2)
00761 PAVGB(%%mm0, %%mm4)
00762 PAVGB(%%mm2, %%mm4)
00763 PAVGB(%%mm0, %%mm4)
00764
00765
00766 "movq (%%"REG_a"), %%mm2 \n\t"
00767 "pxor %%mm6, %%mm2 \n\t"
00768 PAVGB(%%mm3, %%mm2)
00769 PAVGB((%0), %%mm1)
00770 "movq "MANGLE(b80)", %%mm3 \n\t"
00771 PAVGB(%%mm2, %%mm3)
00772 PAVGB(%%mm1, %%mm3)
00773 PAVGB(%%mm2, %%mm3)
00774
00775
00776 PAVGB((%%REGc, %1), %%mm5)
00777 "movq (%%"REG_c", %1, 2), %%mm1 \n\t"
00778 "pxor %%mm6, %%mm1 \n\t"
00779 PAVGB((%0, %1, 4), %%mm1)
00780 "movq "MANGLE(b80)", %%mm2 \n\t"
00781 PAVGB(%%mm5, %%mm2)
00782 PAVGB(%%mm1, %%mm2)
00783 PAVGB(%%mm5, %%mm2)
00784
00785
00786 "movq "MANGLE(b00)", %%mm1 \n\t"
00787 "movq "MANGLE(b00)", %%mm5 \n\t"
00788 "psubb %%mm2, %%mm1 \n\t"
00789 "psubb %%mm3, %%mm5 \n\t"
00790 PMAXUB(%%mm1, %%mm2)
00791 PMAXUB(%%mm5, %%mm3)
00792 PMINUB(%%mm2, %%mm3, %%mm1)
00793
00794
00795
00796 "movq "MANGLE(b00)", %%mm7 \n\t"
00797 "movq %2, %%mm2 \n\t"
00798 PAVGB(%%mm6, %%mm2)
00799 "psubb %%mm6, %%mm2 \n\t"
00800
00801 "movq %%mm4, %%mm1 \n\t"
00802 "pcmpgtb %%mm7, %%mm1 \n\t"
00803 "pxor %%mm1, %%mm4 \n\t"
00804 "psubb %%mm1, %%mm4 \n\t"
00805 "pcmpgtb %%mm4, %%mm2 \n\t"
00806 "psubusb %%mm3, %%mm4 \n\t"
00807
00808
00809 "movq %%mm4, %%mm3 \n\t"
00810 "psubusb "MANGLE(b01)", %%mm4 \n\t"
00811 PAVGB(%%mm7, %%mm4)
00812 PAVGB(%%mm7, %%mm4)
00813 "paddb %%mm3, %%mm4 \n\t"
00814 "pand %%mm2, %%mm4 \n\t"
00815
00816 "movq "MANGLE(b80)", %%mm5 \n\t"
00817 "psubb %%mm0, %%mm5 \n\t"
00818 "paddsb %%mm6, %%mm5 \n\t"
00819 "pcmpgtb %%mm5, %%mm7 \n\t"
00820 "pxor %%mm7, %%mm5 \n\t"
00821
00822 PMINUB(%%mm5, %%mm4, %%mm3)
00823 "pxor %%mm1, %%mm7 \n\t"
00824
00825 "pand %%mm7, %%mm4 \n\t"
00826 "movq (%%"REG_a", %1, 2), %%mm0 \n\t"
00827 "movq (%0, %1, 4), %%mm2 \n\t"
00828 "pxor %%mm1, %%mm0 \n\t"
00829 "pxor %%mm1, %%mm2 \n\t"
00830 "paddb %%mm4, %%mm0 \n\t"
00831 "psubb %%mm4, %%mm2 \n\t"
00832 "pxor %%mm1, %%mm0 \n\t"
00833 "pxor %%mm1, %%mm2 \n\t"
00834 "movq %%mm0, (%%"REG_a", %1, 2) \n\t"
00835 "movq %%mm2, (%0, %1, 4) \n\t"
00836
00837 :
00838 : "r" (src), "r" ((long)stride), "m" (c->pQPb)
00839 : "%"REG_a, "%"REG_c
00840 );
00841
00842
00843
00844
00845
00846
00847
00848
00849
00850
00851
00852
00853
00854
00855
00856
00857
00858
00859
00860
00861
00862
00863
00864
00865
00866
00867
00868
00869
00870
00871
00872
00873
00874
00875
00876
00877
00878
00879
00880
00881
00882
00883
00884
00885
00886
00887
00888
00889
00890
00891
00892
00893
00894
00895
00896
00897
00898
00899
00900
00901
00902
00903
00904
00905
00906 #elif defined (HAVE_MMX)
00907 src+= stride*4;
00908 asm volatile(
00909 "pxor %%mm7, %%mm7 \n\t"
00910 "lea -40(%%"REG_SP"), %%"REG_c" \n\t"
00911 "and "ALIGN_MASK", %%"REG_c" \n\t"
00912
00913
00914
00915
00916 "movq (%0), %%mm0 \n\t"
00917 "movq %%mm0, %%mm1 \n\t"
00918 "punpcklbw %%mm7, %%mm0 \n\t"
00919 "punpckhbw %%mm7, %%mm1 \n\t"
00920
00921 "movq (%0, %1), %%mm2 \n\t"
00922 "lea (%0, %1, 2), %%"REG_a" \n\t"
00923 "movq %%mm2, %%mm3 \n\t"
00924 "punpcklbw %%mm7, %%mm2 \n\t"
00925 "punpckhbw %%mm7, %%mm3 \n\t"
00926
00927 "movq (%%"REG_a"), %%mm4 \n\t"
00928 "movq %%mm4, %%mm5 \n\t"
00929 "punpcklbw %%mm7, %%mm4 \n\t"
00930 "punpckhbw %%mm7, %%mm5 \n\t"
00931
00932 "paddw %%mm0, %%mm0 \n\t"
00933 "paddw %%mm1, %%mm1 \n\t"
00934 "psubw %%mm4, %%mm2 \n\t"
00935 "psubw %%mm5, %%mm3 \n\t"
00936 "psubw %%mm2, %%mm0 \n\t"
00937 "psubw %%mm3, %%mm1 \n\t"
00938
00939 "psllw $2, %%mm2 \n\t"
00940 "psllw $2, %%mm3 \n\t"
00941 "psubw %%mm2, %%mm0 \n\t"
00942 "psubw %%mm3, %%mm1 \n\t"
00943
00944 "movq (%%"REG_a", %1), %%mm2 \n\t"
00945 "movq %%mm2, %%mm3 \n\t"
00946 "punpcklbw %%mm7, %%mm2 \n\t"
00947 "punpckhbw %%mm7, %%mm3 \n\t"
00948
00949 "psubw %%mm2, %%mm0 \n\t"
00950 "psubw %%mm3, %%mm1 \n\t"
00951 "psubw %%mm2, %%mm0 \n\t"
00952 "psubw %%mm3, %%mm1 \n\t"
00953 "movq %%mm0, (%%"REG_c") \n\t"
00954 "movq %%mm1, 8(%%"REG_c") \n\t"
00955
00956 "movq (%%"REG_a", %1, 2), %%mm0 \n\t"
00957 "movq %%mm0, %%mm1 \n\t"
00958 "punpcklbw %%mm7, %%mm0 \n\t"
00959 "punpckhbw %%mm7, %%mm1 \n\t"
00960
00961 "psubw %%mm0, %%mm2 \n\t"
00962 "psubw %%mm1, %%mm3 \n\t"
00963 "movq %%mm2, 16(%%"REG_c") \n\t"
00964 "movq %%mm3, 24(%%"REG_c") \n\t"
00965 "paddw %%mm4, %%mm4 \n\t"
00966 "paddw %%mm5, %%mm5 \n\t"
00967 "psubw %%mm2, %%mm4 \n\t"
00968 "psubw %%mm3, %%mm5 \n\t"
00969
00970 "lea (%%"REG_a", %1), %0 \n\t"
00971 "psllw $2, %%mm2 \n\t"
00972 "psllw $2, %%mm3 \n\t"
00973 "psubw %%mm2, %%mm4 \n\t"
00974 "psubw %%mm3, %%mm5 \n\t"
00975
00976 "movq (%0, %1, 2), %%mm2 \n\t"
00977 "movq %%mm2, %%mm3 \n\t"
00978 "punpcklbw %%mm7, %%mm2 \n\t"
00979 "punpckhbw %%mm7, %%mm3 \n\t"
00980 "psubw %%mm2, %%mm4 \n\t"
00981 "psubw %%mm3, %%mm5 \n\t"
00982 "psubw %%mm2, %%mm4 \n\t"
00983 "psubw %%mm3, %%mm5 \n\t"
00984
00985 "movq (%%"REG_a", %1, 4), %%mm6 \n\t"
00986 "punpcklbw %%mm7, %%mm6 \n\t"
00987 "psubw %%mm6, %%mm2 \n\t"
00988 "movq (%%"REG_a", %1, 4), %%mm6 \n\t"
00989 "punpckhbw %%mm7, %%mm6 \n\t"
00990 "psubw %%mm6, %%mm3 \n\t"
00991
00992 "paddw %%mm0, %%mm0 \n\t"
00993 "paddw %%mm1, %%mm1 \n\t"
00994 "psubw %%mm2, %%mm0 \n\t"
00995 "psubw %%mm3, %%mm1 \n\t"
00996
00997 "psllw $2, %%mm2 \n\t"
00998 "psllw $2, %%mm3 \n\t"
00999 "psubw %%mm2, %%mm0 \n\t"
01000 "psubw %%mm3, %%mm1 \n\t"
01001
01002 "movq (%0, %1, 4), %%mm2 \n\t"
01003 "movq %%mm2, %%mm3 \n\t"
01004 "punpcklbw %%mm7, %%mm2 \n\t"
01005 "punpckhbw %%mm7, %%mm3 \n\t"
01006
01007 "paddw %%mm2, %%mm2 \n\t"
01008 "paddw %%mm3, %%mm3 \n\t"
01009 "psubw %%mm2, %%mm0 \n\t"
01010 "psubw %%mm3, %%mm1 \n\t"
01011
01012 "movq (%%"REG_c"), %%mm2 \n\t"
01013 "movq 8(%%"REG_c"), %%mm3 \n\t"
01014
01015 #ifdef HAVE_MMX2
01016 "movq %%mm7, %%mm6 \n\t"
01017 "psubw %%mm0, %%mm6 \n\t"
01018 "pmaxsw %%mm6, %%mm0 \n\t"
01019 "movq %%mm7, %%mm6 \n\t"
01020 "psubw %%mm1, %%mm6 \n\t"
01021 "pmaxsw %%mm6, %%mm1 \n\t"
01022 "movq %%mm7, %%mm6 \n\t"
01023 "psubw %%mm2, %%mm6 \n\t"
01024 "pmaxsw %%mm6, %%mm2 \n\t"
01025 "movq %%mm7, %%mm6 \n\t"
01026 "psubw %%mm3, %%mm6 \n\t"
01027 "pmaxsw %%mm6, %%mm3 \n\t"
01028 #else
01029 "movq %%mm7, %%mm6 \n\t"
01030 "pcmpgtw %%mm0, %%mm6 \n\t"
01031 "pxor %%mm6, %%mm0 \n\t"
01032 "psubw %%mm6, %%mm0 \n\t"
01033 "movq %%mm7, %%mm6 \n\t"
01034 "pcmpgtw %%mm1, %%mm6 \n\t"
01035 "pxor %%mm6, %%mm1 \n\t"
01036 "psubw %%mm6, %%mm1 \n\t"
01037 "movq %%mm7, %%mm6 \n\t"
01038 "pcmpgtw %%mm2, %%mm6 \n\t"
01039 "pxor %%mm6, %%mm2 \n\t"
01040 "psubw %%mm6, %%mm2 \n\t"
01041 "movq %%mm7, %%mm6 \n\t"
01042 "pcmpgtw %%mm3, %%mm6 \n\t"
01043 "pxor %%mm6, %%mm3 \n\t"
01044 "psubw %%mm6, %%mm3 \n\t"
01045 #endif
01046
01047 #ifdef HAVE_MMX2
01048 "pminsw %%mm2, %%mm0 \n\t"
01049 "pminsw %%mm3, %%mm1 \n\t"
01050 #else
01051 "movq %%mm0, %%mm6 \n\t"
01052 "psubusw %%mm2, %%mm6 \n\t"
01053 "psubw %%mm6, %%mm0 \n\t"
01054 "movq %%mm1, %%mm6 \n\t"
01055 "psubusw %%mm3, %%mm6 \n\t"
01056 "psubw %%mm6, %%mm1 \n\t"
01057 #endif
01058
01059 "movd %2, %%mm2 \n\t"
01060 "punpcklbw %%mm7, %%mm2 \n\t"
01061
01062 "movq %%mm7, %%mm6 \n\t"
01063 "pcmpgtw %%mm4, %%mm6 \n\t"
01064 "pxor %%mm6, %%mm4 \n\t"
01065 "psubw %%mm6, %%mm4 \n\t"
01066 "pcmpgtw %%mm5, %%mm7 \n\t"
01067 "pxor %%mm7, %%mm5 \n\t"
01068 "psubw %%mm7, %%mm5 \n\t"
01069
01070 "psllw $3, %%mm2 \n\t"
01071 "movq %%mm2, %%mm3 \n\t"
01072 "pcmpgtw %%mm4, %%mm2 \n\t"
01073 "pcmpgtw %%mm5, %%mm3 \n\t"
01074 "pand %%mm2, %%mm4 \n\t"
01075 "pand %%mm3, %%mm5 \n\t"
01076
01077
01078 "psubusw %%mm0, %%mm4 \n\t"
01079 "psubusw %%mm1, %%mm5 \n\t"
01080
01081
01082 "movq "MANGLE(w05)", %%mm2 \n\t"
01083 "pmullw %%mm2, %%mm4 \n\t"
01084 "pmullw %%mm2, %%mm5 \n\t"
01085 "movq "MANGLE(w20)", %%mm2 \n\t"
01086 "paddw %%mm2, %%mm4 \n\t"
01087 "paddw %%mm2, %%mm5 \n\t"
01088 "psrlw $6, %%mm4 \n\t"
01089 "psrlw $6, %%mm5 \n\t"
01090
01091 "movq 16(%%"REG_c"), %%mm0 \n\t"
01092 "movq 24(%%"REG_c"), %%mm1 \n\t"
01093
01094 "pxor %%mm2, %%mm2 \n\t"
01095 "pxor %%mm3, %%mm3 \n\t"
01096
01097 "pcmpgtw %%mm0, %%mm2 \n\t"
01098 "pcmpgtw %%mm1, %%mm3 \n\t"
01099 "pxor %%mm2, %%mm0 \n\t"
01100 "pxor %%mm3, %%mm1 \n\t"
01101 "psubw %%mm2, %%mm0 \n\t"
01102 "psubw %%mm3, %%mm1 \n\t"
01103 "psrlw $1, %%mm0 \n\t"
01104 "psrlw $1, %%mm1 \n\t"
01105
01106 "pxor %%mm6, %%mm2 \n\t"
01107 "pxor %%mm7, %%mm3 \n\t"
01108 "pand %%mm2, %%mm4 \n\t"
01109 "pand %%mm3, %%mm5 \n\t"
01110
01111 #ifdef HAVE_MMX2
01112 "pminsw %%mm0, %%mm4 \n\t"
01113 "pminsw %%mm1, %%mm5 \n\t"
01114 #else
01115 "movq %%mm4, %%mm2 \n\t"
01116 "psubusw %%mm0, %%mm2 \n\t"
01117 "psubw %%mm2, %%mm4 \n\t"
01118 "movq %%mm5, %%mm2 \n\t"
01119 "psubusw %%mm1, %%mm2 \n\t"
01120 "psubw %%mm2, %%mm5 \n\t"
01121 #endif
01122 "pxor %%mm6, %%mm4 \n\t"
01123 "pxor %%mm7, %%mm5 \n\t"
01124 "psubw %%mm6, %%mm4 \n\t"
01125 "psubw %%mm7, %%mm5 \n\t"
01126 "packsswb %%mm5, %%mm4 \n\t"
01127 "movq (%0), %%mm0 \n\t"
01128 "paddb %%mm4, %%mm0 \n\t"
01129 "movq %%mm0, (%0) \n\t"
01130 "movq (%0, %1), %%mm0 \n\t"
01131 "psubb %%mm4, %%mm0 \n\t"
01132 "movq %%mm0, (%0, %1) \n\t"
01133
01134 : "+r" (src)
01135 : "r" ((long)stride), "m" (c->pQPb)
01136 : "%"REG_a, "%"REG_c
01137 );
01138 #else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
01139 const int l1= stride;
01140 const int l2= stride + l1;
01141 const int l3= stride + l2;
01142 const int l4= stride + l3;
01143 const int l5= stride + l4;
01144 const int l6= stride + l5;
01145 const int l7= stride + l6;
01146 const int l8= stride + l7;
01147
01148 int x;
01149 src+= stride*3;
01150 for(x=0; x<BLOCK_SIZE; x++)
01151 {
01152 const int middleEnergy= 5*(src[l5] - src[l4]) + 2*(src[l3] - src[l6]);
01153 if(FFABS(middleEnergy) < 8*c->QP)
01154 {
01155 const int q=(src[l4] - src[l5])/2;
01156 const int leftEnergy= 5*(src[l3] - src[l2]) + 2*(src[l1] - src[l4]);
01157 const int rightEnergy= 5*(src[l7] - src[l6]) + 2*(src[l5] - src[l8]);
01158
01159 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
01160 d= FFMAX(d, 0);
01161
01162 d= (5*d + 32) >> 6;
01163 d*= FFSIGN(-middleEnergy);
01164
01165 if(q>0)
01166 {
01167 d= d<0 ? 0 : d;
01168 d= d>q ? q : d;
01169 }
01170 else
01171 {
01172 d= d>0 ? 0 : d;
01173 d= d<q ? q : d;
01174 }
01175
01176 src[l4]-= d;
01177 src[l5]+= d;
01178 }
01179 src++;
01180 }
01181 #endif //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
01182 }
01183 #endif //HAVE_ALTIVEC
01184
01185 #ifndef HAVE_ALTIVEC
01186 static inline void RENAME(dering)(uint8_t src[], int stride, PPContext *c)
01187 {
01188 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
01189 asm volatile(
01190 "pxor %%mm6, %%mm6 \n\t"
01191 "pcmpeqb %%mm7, %%mm7 \n\t"
01192 "movq %2, %%mm0 \n\t"
01193 "punpcklbw %%mm6, %%mm0 \n\t"
01194 "psrlw $1, %%mm0 \n\t"
01195 "psubw %%mm7, %%mm0 \n\t"
01196 "packuswb %%mm0, %%mm0 \n\t"
01197 "movq %%mm0, %3 \n\t"
01198
01199 "lea (%0, %1), %%"REG_a" \n\t"
01200 "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t"
01201
01202
01203
01204
01205 #undef FIND_MIN_MAX
01206 #ifdef HAVE_MMX2
01207 #define REAL_FIND_MIN_MAX(addr)\
01208 "movq " #addr ", %%mm0 \n\t"\
01209 "pminub %%mm0, %%mm7 \n\t"\
01210 "pmaxub %%mm0, %%mm6 \n\t"
01211 #else
01212 #define REAL_FIND_MIN_MAX(addr)\
01213 "movq " #addr ", %%mm0 \n\t"\
01214 "movq %%mm7, %%mm1 \n\t"\
01215 "psubusb %%mm0, %%mm6 \n\t"\
01216 "paddb %%mm0, %%mm6 \n\t"\
01217 "psubusb %%mm0, %%mm1 \n\t"\
01218 "psubb %%mm1, %%mm7 \n\t"
01219 #endif
01220 #define FIND_MIN_MAX(addr) REAL_FIND_MIN_MAX(addr)
01221
01222 FIND_MIN_MAX((%%REGa))
01223 FIND_MIN_MAX((%%REGa, %1))
01224 FIND_MIN_MAX((%%REGa, %1, 2))
01225 FIND_MIN_MAX((%0, %1, 4))
01226 FIND_MIN_MAX((%%REGd))
01227 FIND_MIN_MAX((%%REGd, %1))
01228 FIND_MIN_MAX((%%REGd, %1, 2))
01229 FIND_MIN_MAX((%0, %1, 8))
01230
01231 "movq %%mm7, %%mm4 \n\t"
01232 "psrlq $8, %%mm7 \n\t"
01233 #ifdef HAVE_MMX2
01234 "pminub %%mm4, %%mm7 \n\t"
01235 "pshufw $0xF9, %%mm7, %%mm4 \n\t"
01236 "pminub %%mm4, %%mm7 \n\t"
01237 "pshufw $0xFE, %%mm7, %%mm4 \n\t"
01238 "pminub %%mm4, %%mm7 \n\t"
01239 #else
01240 "movq %%mm7, %%mm1 \n\t"
01241 "psubusb %%mm4, %%mm1 \n\t"
01242 "psubb %%mm1, %%mm7 \n\t"
01243 "movq %%mm7, %%mm4 \n\t"
01244 "psrlq $16, %%mm7 \n\t"
01245 "movq %%mm7, %%mm1 \n\t"
01246 "psubusb %%mm4, %%mm1 \n\t"
01247 "psubb %%mm1, %%mm7 \n\t"
01248 "movq %%mm7, %%mm4 \n\t"
01249 "psrlq $32, %%mm7 \n\t"
01250 "movq %%mm7, %%mm1 \n\t"
01251 "psubusb %%mm4, %%mm1 \n\t"
01252 "psubb %%mm1, %%mm7 \n\t"
01253 #endif
01254
01255
01256 "movq %%mm6, %%mm4 \n\t"
01257 "psrlq $8, %%mm6 \n\t"
01258 #ifdef HAVE_MMX2
01259 "pmaxub %%mm4, %%mm6 \n\t"
01260 "pshufw $0xF9, %%mm6, %%mm4 \n\t"
01261 "pmaxub %%mm4, %%mm6 \n\t"
01262 "pshufw $0xFE, %%mm6, %%mm4 \n\t"
01263 "pmaxub %%mm4, %%mm6 \n\t"
01264 #else
01265 "psubusb %%mm4, %%mm6 \n\t"
01266 "paddb %%mm4, %%mm6 \n\t"
01267 "movq %%mm6, %%mm4 \n\t"
01268 "psrlq $16, %%mm6 \n\t"
01269 "psubusb %%mm4, %%mm6 \n\t"
01270 "paddb %%mm4, %%mm6 \n\t"
01271 "movq %%mm6, %%mm4 \n\t"
01272 "psrlq $32, %%mm6 \n\t"
01273 "psubusb %%mm4, %%mm6 \n\t"
01274 "paddb %%mm4, %%mm6 \n\t"
01275 #endif
01276 "movq %%mm6, %%mm0 \n\t"
01277 "psubb %%mm7, %%mm6 \n\t"
01278 "movd %%mm6, %%ecx \n\t"
01279 "cmpb "MANGLE(deringThreshold)", %%cl \n\t"
01280 " jb 1f \n\t"
01281 "lea -24(%%"REG_SP"), %%"REG_c" \n\t"
01282 "and "ALIGN_MASK", %%"REG_c" \n\t"
01283 PAVGB(%%mm0, %%mm7)
01284 "punpcklbw %%mm7, %%mm7 \n\t"
01285 "punpcklbw %%mm7, %%mm7 \n\t"
01286 "punpcklbw %%mm7, %%mm7 \n\t"
01287 "movq %%mm7, (%%"REG_c") \n\t"
01288
01289 "movq (%0), %%mm0 \n\t"
01290 "movq %%mm0, %%mm1 \n\t"
01291 "movq %%mm0, %%mm2 \n\t"
01292 "psllq $8, %%mm1 \n\t"
01293 "psrlq $8, %%mm2 \n\t"
01294 "movd -4(%0), %%mm3 \n\t"
01295 "movd 8(%0), %%mm4 \n\t"
01296 "psrlq $24, %%mm3 \n\t"
01297 "psllq $56, %%mm4 \n\t"
01298 "por %%mm3, %%mm1 \n\t"
01299 "por %%mm4, %%mm2 \n\t"
01300 "movq %%mm1, %%mm3 \n\t"
01301 PAVGB(%%mm2, %%mm1)
01302 PAVGB(%%mm0, %%mm1)
01303 "psubusb %%mm7, %%mm0 \n\t"
01304 "psubusb %%mm7, %%mm2 \n\t"
01305 "psubusb %%mm7, %%mm3 \n\t"
01306 "pcmpeqb "MANGLE(b00)", %%mm0 \n\t"
01307 "pcmpeqb "MANGLE(b00)", %%mm2 \n\t"
01308 "pcmpeqb "MANGLE(b00)", %%mm3 \n\t"
01309 "paddb %%mm2, %%mm0 \n\t"
01310 "paddb %%mm3, %%mm0 \n\t"
01311
01312 "movq (%%"REG_a"), %%mm2 \n\t"
01313 "movq %%mm2, %%mm3 \n\t"
01314 "movq %%mm2, %%mm4 \n\t"
01315 "psllq $8, %%mm3 \n\t"
01316 "psrlq $8, %%mm4 \n\t"
01317 "movd -4(%%"REG_a"), %%mm5 \n\t"
01318 "movd 8(%%"REG_a"), %%mm6 \n\t"
01319 "psrlq $24, %%mm5 \n\t"
01320 "psllq $56, %%mm6 \n\t"
01321 "por %%mm5, %%mm3 \n\t"
01322 "por %%mm6, %%mm4 \n\t"
01323 "movq %%mm3, %%mm5 \n\t"
01324 PAVGB(%%mm4, %%mm3)
01325 PAVGB(%%mm2, %%mm3)
01326 "psubusb %%mm7, %%mm2 \n\t"
01327 "psubusb %%mm7, %%mm4 \n\t"
01328 "psubusb %%mm7, %%mm5 \n\t"
01329 "pcmpeqb "MANGLE(b00)", %%mm2 \n\t"
01330 "pcmpeqb "MANGLE(b00)", %%mm4 \n\t"
01331 "pcmpeqb "MANGLE(b00)", %%mm5 \n\t"
01332 "paddb %%mm4, %%mm2 \n\t"
01333 "paddb %%mm5, %%mm2 \n\t"
01334
01335 #define REAL_DERING_CORE(dst,src,ppsx,psx,sx,pplx,plx,lx,t0,t1) \
01336 "movq " #src ", " #sx " \n\t" \
01337 "movq " #sx ", " #lx " \n\t" \
01338 "movq " #sx ", " #t0 " \n\t" \
01339 "psllq $8, " #lx " \n\t"\
01340 "psrlq $8, " #t0 " \n\t"\
01341 "movd -4" #src ", " #t1 " \n\t"\
01342 "psrlq $24, " #t1 " \n\t"\
01343 "por " #t1 ", " #lx " \n\t" \
01344 "movd 8" #src ", " #t1 " \n\t"\
01345 "psllq $56, " #t1 " \n\t"\
01346 "por " #t1 ", " #t0 " \n\t" \
01347 "movq " #lx ", " #t1 " \n\t" \
01348 PAVGB(t0, lx) \
01349 PAVGB(sx, lx) \
01350 PAVGB(lx, pplx) \
01351 "movq " #lx ", 8(%%"REG_c") \n\t"\
01352 "movq (%%"REG_c"), " #lx " \n\t"\
01353 "psubusb " #lx ", " #t1 " \n\t"\
01354 "psubusb " #lx ", " #t0 " \n\t"\
01355 "psubusb " #lx ", " #sx " \n\t"\
01356 "movq "MANGLE(b00)", " #lx " \n\t"\
01357 "pcmpeqb " #lx ", " #t1 " \n\t" \
01358 "pcmpeqb " #lx ", " #t0 " \n\t" \
01359 "pcmpeqb " #lx ", " #sx " \n\t" \
01360 "paddb " #t1 ", " #t0 " \n\t"\
01361 "paddb " #t0 ", " #sx " \n\t"\
01362 \
01363 PAVGB(plx, pplx) \
01364 "movq " #dst ", " #t0 " \n\t" \
01365 "movq " #t0 ", " #t1 " \n\t" \
01366 "psubusb %3, " #t0 " \n\t"\
01367 "paddusb %3, " #t1 " \n\t"\
01368 PMAXUB(t0, pplx)\
01369 PMINUB(t1, pplx, t0)\
01370 "paddb " #sx ", " #ppsx " \n\t"\
01371 "paddb " #psx ", " #ppsx " \n\t"\
01372 "#paddb "MANGLE(b02)", " #ppsx " \n\t"\
01373 "pand "MANGLE(b08)", " #ppsx " \n\t"\
01374 "pcmpeqb " #lx ", " #ppsx " \n\t"\
01375 "pand " #ppsx ", " #pplx " \n\t"\
01376 "pandn " #dst ", " #ppsx " \n\t"\
01377 "por " #pplx ", " #ppsx " \n\t"\
01378 "movq " #ppsx ", " #dst " \n\t"\
01379 "movq 8(%%"REG_c"), " #lx " \n\t"
01380
01381 #define DERING_CORE(dst,src,ppsx,psx,sx,pplx,plx,lx,t0,t1) \
01382 REAL_DERING_CORE(dst,src,ppsx,psx,sx,pplx,plx,lx,t0,t1)
01383
01384
01385
01386
01387
01388
01389
01390
01391
01392
01393
01394
01395
01396
01397
01398
01399 DERING_CORE((%%REGa) ,(%%REGa, %1) ,%%mm0,%%mm2,%%mm4,%%mm1,%%mm3,%%mm5,%%mm6,%%mm7)
01400 DERING_CORE((%%REGa, %1) ,(%%REGa, %1, 2),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,%%mm6,%%mm7)
01401 DERING_CORE((%%REGa, %1, 2),(%0, %1, 4) ,%%mm4,%%mm0,%%mm2,%%mm5,%%mm1,%%mm3,%%mm6,%%mm7)
01402 DERING_CORE((%0, %1, 4) ,(%%REGd) ,%%mm0,%%mm2,%%mm4,%%mm1,%%mm3,%%mm5,%%mm6,%%mm7)
01403 DERING_CORE((%%REGd) ,(%%REGd, %1) ,%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,%%mm6,%%mm7)
01404 DERING_CORE((%%REGd, %1) ,(%%REGd, %1, 2),%%mm4,%%mm0,%%mm2,%%mm5,%%mm1,%%mm3,%%mm6,%%mm7)
01405 DERING_CORE((%%REGd, %1, 2),(%0, %1, 8) ,%%mm0,%%mm2,%%mm4,%%mm1,%%mm3,%%mm5,%%mm6,%%mm7)
01406 DERING_CORE((%0, %1, 8) ,(%%REGd, %1, 4),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,%%mm6,%%mm7)
01407
01408 "1: \n\t"
01409 : : "r" (src), "r" ((long)stride), "m" (c->pQPb), "m"(c->pQPb2)
01410 : "%"REG_a, "%"REG_d, "%"REG_c
01411 );
01412 #else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
01413 int y;
01414 int min=255;
01415 int max=0;
01416 int avg;
01417 uint8_t *p;
01418 int s[10];
01419 const int QP2= c->QP/2 + 1;
01420
01421 for(y=1; y<9; y++)
01422 {
01423 int x;
01424 p= src + stride*y;
01425 for(x=1; x<9; x++)
01426 {
01427 p++;
01428 if(*p > max) max= *p;
01429 if(*p < min) min= *p;
01430 }
01431 }
01432 avg= (min + max + 1)>>1;
01433
01434 if(max - min <deringThreshold) return;
01435
01436 for(y=0; y<10; y++)
01437 {
01438 int t = 0;
01439
01440 if(src[stride*y + 0] > avg) t+= 1;
01441 if(src[stride*y + 1] > avg) t+= 2;
01442 if(src[stride*y + 2] > avg) t+= 4;
01443 if(src[stride*y + 3] > avg) t+= 8;
01444 if(src[stride*y + 4] > avg) t+= 16;
01445 if(src[stride*y + 5] > avg) t+= 32;
01446 if(src[stride*y + 6] > avg) t+= 64;
01447 if(src[stride*y + 7] > avg) t+= 128;
01448 if(src[stride*y + 8] > avg) t+= 256;
01449 if(src[stride*y + 9] > avg) t+= 512;
01450
01451 t |= (~t)<<16;
01452 t &= (t<<1) & (t>>1);
01453 s[y] = t;
01454 }
01455
01456 for(y=1; y<9; y++)
01457 {
01458 int t = s[y-1] & s[y] & s[y+1];
01459 t|= t>>16;
01460 s[y-1]= t;
01461 }
01462
01463 for(y=1; y<9; y++)
01464 {
01465 int x;
01466 int t = s[y-1];
01467
01468 p= src + stride*y;
01469 for(x=1; x<9; x++)
01470 {
01471 p++;
01472 if(t & (1<<x))
01473 {
01474 int f= (*(p-stride-1)) + 2*(*(p-stride)) + (*(p-stride+1))
01475 +2*(*(p -1)) + 4*(*p ) + 2*(*(p +1))
01476 +(*(p+stride-1)) + 2*(*(p+stride)) + (*(p+stride+1));
01477 f= (f + 8)>>4;
01478
01479 #ifdef DEBUG_DERING_THRESHOLD
01480 asm volatile("emms\n\t":);
01481 {
01482 static long long numPixels=0;
01483 if(x!=1 && x!=8 && y!=1 && y!=8) numPixels++;
01484
01485
01486
01487 if(max-min < 20)
01488 {
01489 static int numSkiped=0;
01490 static int errorSum=0;
01491 static int worstQP=0;
01492 static int worstRange=0;
01493 static int worstDiff=0;
01494 int diff= (f - *p);
01495 int absDiff= FFABS(diff);
01496 int error= diff*diff;
01497
01498 if(x==1 || x==8 || y==1 || y==8) continue;
01499
01500 numSkiped++;
01501 if(absDiff > worstDiff)
01502 {
01503 worstDiff= absDiff;
01504 worstQP= QP;
01505 worstRange= max-min;
01506 }
01507 errorSum+= error;
01508
01509 if(1024LL*1024LL*1024LL % numSkiped == 0)
01510 {
01511 av_log(c, AV_LOG_INFO, "sum:%1.3f, skip:%d, wQP:%d, "
01512 "wRange:%d, wDiff:%d, relSkip:%1.3f\n",
01513 (float)errorSum/numSkiped, numSkiped, worstQP, worstRange,
01514 worstDiff, (float)numSkiped/numPixels);
01515 }
01516 }
01517 }
01518 #endif
01519 if (*p + QP2 < f) *p= *p + QP2;
01520 else if(*p - QP2 > f) *p= *p - QP2;
01521 else *p=f;
01522 }
01523 }
01524 }
01525 #ifdef DEBUG_DERING_THRESHOLD
01526 if(max-min < 20)
01527 {
01528 for(y=1; y<9; y++)
01529 {
01530 int x;
01531 int t = 0;
01532 p= src + stride*y;
01533 for(x=1; x<9; x++)
01534 {
01535 p++;
01536 *p = FFMIN(*p + 20, 255);
01537 }
01538 }
01539
01540 }
01541 #endif
01542 #endif //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
01543 }
01544 #endif //HAVE_ALTIVEC
01545
01552 static inline void RENAME(deInterlaceInterpolateLinear)(uint8_t src[], int stride)
01553 {
01554 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
01555 src+= 4*stride;
01556 asm volatile(
01557 "lea (%0, %1), %%"REG_a" \n\t"
01558 "lea (%%"REG_a", %1, 4), %%"REG_c" \n\t"
01559
01560
01561
01562 "movq (%0), %%mm0 \n\t"
01563 "movq (%%"REG_a", %1), %%mm1 \n\t"
01564 PAVGB(%%mm1, %%mm0)
01565 "movq %%mm0, (%%"REG_a") \n\t"
01566 "movq (%0, %1, 4), %%mm0 \n\t"
01567 PAVGB(%%mm0, %%mm1)
01568 "movq %%mm1, (%%"REG_a", %1, 2) \n\t"
01569 "movq (%%"REG_c", %1), %%mm1 \n\t"
01570 PAVGB(%%mm1, %%mm0)
01571 "movq %%mm0, (%%"REG_c") \n\t"
01572 "movq (%0, %1, 8), %%mm0 \n\t"
01573 PAVGB(%%mm0, %%mm1)
01574 "movq %%mm1, (%%"REG_c", %1, 2) \n\t"
01575
01576 : : "r" (src), "r" ((long)stride)
01577 : "%"REG_a, "%"REG_c
01578 );
01579 #else
01580 int a, b, x;
01581 src+= 4*stride;
01582
01583 for(x=0; x<2; x++){
01584 a= *(uint32_t*)&src[stride*0];
01585 b= *(uint32_t*)&src[stride*2];
01586 *(uint32_t*)&src[stride*1]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
01587 a= *(uint32_t*)&src[stride*4];
01588 *(uint32_t*)&src[stride*3]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
01589 b= *(uint32_t*)&src[stride*6];
01590 *(uint32_t*)&src[stride*5]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
01591 a= *(uint32_t*)&src[stride*8];
01592 *(uint32_t*)&src[stride*7]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
01593 src += 4;
01594 }
01595 #endif
01596 }
01597
01605 static inline void RENAME(deInterlaceInterpolateCubic)(uint8_t src[], int stride)
01606 {
01607 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
01608 src+= stride*3;
01609 asm volatile(
01610 "lea (%0, %1), %%"REG_a" \n\t"
01611 "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t"
01612 "lea (%%"REG_d", %1, 4), %%"REG_c" \n\t"
01613 "add %1, %%"REG_c" \n\t"
01614 "pxor %%mm7, %%mm7 \n\t"
01615
01616
01617
01618 #define REAL_DEINT_CUBIC(a,b,c,d,e)\
01619 "movq " #a ", %%mm0 \n\t"\
01620 "movq " #b ", %%mm1 \n\t"\
01621 "movq " #d ", %%mm2 \n\t"\
01622 "movq " #e ", %%mm3 \n\t"\
01623 PAVGB(%%mm2, %%mm1) \
01624 PAVGB(%%mm3, %%mm0) \
01625 "movq %%mm0, %%mm2 \n\t"\
01626 "punpcklbw %%mm7, %%mm0 \n\t"\
01627 "punpckhbw %%mm7, %%mm2 \n\t"\
01628 "movq %%mm1, %%mm3 \n\t"\
01629 "punpcklbw %%mm7, %%mm1 \n\t"\
01630 "punpckhbw %%mm7, %%mm3 \n\t"\
01631 "psubw %%mm1, %%mm0 \n\t" \
01632 "psubw %%mm3, %%mm2 \n\t" \
01633 "psraw $3, %%mm0 \n\t" \
01634 "psraw $3, %%mm2 \n\t" \
01635 "psubw %%mm0, %%mm1 \n\t" \
01636 "psubw %%mm2, %%mm3 \n\t" \
01637 "packuswb %%mm3, %%mm1 \n\t"\
01638 "movq %%mm1, " #c " \n\t"
01639 #define DEINT_CUBIC(a,b,c,d,e) REAL_DEINT_CUBIC(a,b,c,d,e)
01640
01641 DEINT_CUBIC((%0) , (%%REGa, %1), (%%REGa, %1, 2), (%0, %1, 4) , (%%REGd, %1))
01642 DEINT_CUBIC((%%REGa, %1), (%0, %1, 4) , (%%REGd) , (%%REGd, %1), (%0, %1, 8))
01643 DEINT_CUBIC((%0, %1, 4) , (%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8) , (%%REGc))
01644 DEINT_CUBIC((%%REGd, %1), (%0, %1, 8) , (%%REGd, %1, 4), (%%REGc) , (%%REGc, %1, 2))
01645
01646 : : "r" (src), "r" ((long)stride)
01647 : "%"REG_a, "%"REG_d, "%"REG_c
01648 );
01649 #else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
01650 int x;
01651 src+= stride*3;
01652 for(x=0; x<8; x++)
01653 {
01654 src[stride*3] = CLIP((-src[0] + 9*src[stride*2] + 9*src[stride*4] - src[stride*6])>>4);
01655 src[stride*5] = CLIP((-src[stride*2] + 9*src[stride*4] + 9*src[stride*6] - src[stride*8])>>4);
01656 src[stride*7] = CLIP((-src[stride*4] + 9*src[stride*6] + 9*src[stride*8] - src[stride*10])>>4);
01657 src[stride*9] = CLIP((-src[stride*6] + 9*src[stride*8] + 9*src[stride*10] - src[stride*12])>>4);
01658 src++;
01659 }
01660 #endif //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
01661 }
01662
01670 static inline void RENAME(deInterlaceFF)(uint8_t src[], int stride, uint8_t *tmp)
01671 {
01672 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
01673 src+= stride*4;
01674 asm volatile(
01675 "lea (%0, %1), %%"REG_a" \n\t"
01676 "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t"
01677 "pxor %%mm7, %%mm7 \n\t"
01678 "movq (%2), %%mm0 \n\t"
01679
01680
01681
01682 #define REAL_DEINT_FF(a,b,c,d)\
01683 "movq " #a ", %%mm1 \n\t"\
01684 "movq " #b ", %%mm2 \n\t"\
01685 "movq " #c ", %%mm3 \n\t"\
01686 "movq " #d ", %%mm4 \n\t"\
01687 PAVGB(%%mm3, %%mm1) \
01688 PAVGB(%%mm4, %%mm0) \
01689 "movq %%mm0, %%mm3 \n\t"\
01690 "punpcklbw %%mm7, %%mm0 \n\t"\
01691 "punpckhbw %%mm7, %%mm3 \n\t"\
01692 "movq %%mm1, %%mm4 \n\t"\
01693 "punpcklbw %%mm7, %%mm1 \n\t"\
01694 "punpckhbw %%mm7, %%mm4 \n\t"\
01695 "psllw $2, %%mm1 \n\t"\
01696 "psllw $2, %%mm4 \n\t"\
01697 "psubw %%mm0, %%mm1 \n\t"\
01698 "psubw %%mm3, %%mm4 \n\t"\
01699 "movq %%mm2, %%mm5 \n\t"\
01700 "movq %%mm2, %%mm0 \n\t"\
01701 "punpcklbw %%mm7, %%mm2 \n\t"\
01702 "punpckhbw %%mm7, %%mm5 \n\t"\
01703 "paddw %%mm2, %%mm1 \n\t"\
01704 "paddw %%mm5, %%mm4 \n\t"\
01705 "psraw $2, %%mm1 \n\t"\
01706 "psraw $2, %%mm4 \n\t"\
01707 "packuswb %%mm4, %%mm1 \n\t"\
01708 "movq %%mm1, " #b " \n\t"\
01709
01710 #define DEINT_FF(a,b,c,d) REAL_DEINT_FF(a,b,c,d)
01711
01712 DEINT_FF((%0) , (%%REGa) , (%%REGa, %1), (%%REGa, %1, 2))
01713 DEINT_FF((%%REGa, %1), (%%REGa, %1, 2), (%0, %1, 4) , (%%REGd) )
01714 DEINT_FF((%0, %1, 4) , (%%REGd) , (%%REGd, %1), (%%REGd, %1, 2))
01715 DEINT_FF((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8) , (%%REGd, %1, 4))
01716
01717 "movq %%mm0, (%2) \n\t"
01718 : : "r" (src), "r" ((long)stride), "r"(tmp)
01719 : "%"REG_a, "%"REG_d
01720 );
01721 #else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
01722 int x;
01723 src+= stride*4;
01724 for(x=0; x<8; x++)
01725 {
01726 int t1= tmp[x];
01727 int t2= src[stride*1];
01728
01729 src[stride*1]= CLIP((-t1 + 4*src[stride*0] + 2*t2 + 4*src[stride*2] - src[stride*3] + 4)>>3);
01730 t1= src[stride*4];
01731 src[stride*3]= CLIP((-t2 + 4*src[stride*2] + 2*t1 + 4*src[stride*4] - src[stride*5] + 4)>>3);
01732 t2= src[stride*6];
01733 src[stride*5]= CLIP((-t1 + 4*src[stride*4] + 2*t2 + 4*src[stride*6] - src[stride*7] + 4)>>3);
01734 t1= src[stride*8];
01735 src[stride*7]= CLIP((-t2 + 4*src[stride*6] + 2*t1 + 4*src[stride*8] - src[stride*9] + 4)>>3);
01736 tmp[x]= t1;
01737
01738 src++;
01739 }
01740 #endif //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
01741 }
01742
01750 static inline void RENAME(deInterlaceL5)(uint8_t src[], int stride, uint8_t *tmp, uint8_t *tmp2)
01751 {
01752 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
01753 src+= stride*4;
01754 asm volatile(
01755 "lea (%0, %1), %%"REG_a" \n\t"
01756 "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t"
01757 "pxor %%mm7, %%mm7 \n\t"
01758 "movq (%2), %%mm0 \n\t"
01759 "movq (%3), %%mm1 \n\t"
01760
01761
01762
01763 #define REAL_DEINT_L5(t1,t2,a,b,c)\
01764 "movq " #a ", %%mm2 \n\t"\
01765 "movq " #b ", %%mm3 \n\t"\
01766 "movq " #c ", %%mm4 \n\t"\
01767 PAVGB(t2, %%mm3) \
01768 PAVGB(t1, %%mm4) \
01769 "movq %%mm2, %%mm5 \n\t"\
01770 "movq %%mm2, " #t1 " \n\t"\
01771 "punpcklbw %%mm7, %%mm2 \n\t"\
01772 "punpckhbw %%mm7, %%mm5 \n\t"\
01773 "movq %%mm2, %%mm6 \n\t"\
01774 "paddw %%mm2, %%mm2 \n\t"\
01775 "paddw %%mm6, %%mm2 \n\t"\
01776 "movq %%mm5, %%mm6 \n\t"\
01777 "paddw %%mm5, %%mm5 \n\t"\
01778 "paddw %%mm6, %%mm5 \n\t"\
01779 "movq %%mm3, %%mm6 \n\t"\
01780 "punpcklbw %%mm7, %%mm3 \n\t"\
01781 "punpckhbw %%mm7, %%mm6 \n\t"\
01782 "paddw %%mm3, %%mm3 \n\t"\
01783 "paddw %%mm6, %%mm6 \n\t"\
01784 "paddw %%mm3, %%mm2 \n\t"\
01785 "paddw %%mm6, %%mm5 \n\t"\
01786 "movq %%mm4, %%mm6 \n\t"\
01787 "punpcklbw %%mm7, %%mm4 \n\t"\
01788 "punpckhbw %%mm7, %%mm6 \n\t"\
01789 "psubw %%mm4, %%mm2 \n\t"\
01790 "psubw %%mm6, %%mm5 \n\t"\
01791 "psraw $2, %%mm2 \n\t"\
01792 "psraw $2, %%mm5 \n\t"\
01793 "packuswb %%mm5, %%mm2 \n\t"\
01794 "movq %%mm2, " #a " \n\t"\
01795
01796 #define DEINT_L5(t1,t2,a,b,c) REAL_DEINT_L5(t1,t2,a,b,c)
01797
01798 DEINT_L5(%%mm0, %%mm1, (%0) , (%%REGa) , (%%REGa, %1) )
01799 DEINT_L5(%%mm1, %%mm0, (%%REGa) , (%%REGa, %1) , (%%REGa, %1, 2))
01800 DEINT_L5(%%mm0, %%mm1, (%%REGa, %1) , (%%REGa, %1, 2), (%0, %1, 4) )
01801 DEINT_L5(%%mm1, %%mm0, (%%REGa, %1, 2), (%0, %1, 4) , (%%REGd) )
01802 DEINT_L5(%%mm0, %%mm1, (%0, %1, 4) , (%%REGd) , (%%REGd, %1) )
01803 DEINT_L5(%%mm1, %%mm0, (%%REGd) , (%%REGd, %1) , (%%REGd, %1, 2))
01804 DEINT_L5(%%mm0, %%mm1, (%%REGd, %1) , (%%REGd, %1, 2), (%0, %1, 8) )
01805 DEINT_L5(%%mm1, %%mm0, (%%REGd, %1, 2), (%0, %1, 8) , (%%REGd, %1, 4))
01806
01807 "movq %%mm0, (%2) \n\t"
01808 "movq %%mm1, (%3) \n\t"
01809 : : "r" (src), "r" ((long)stride), "r"(tmp), "r"(tmp2)
01810 : "%"REG_a, "%"REG_d
01811 );
01812 #else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
01813 int x;
01814 src+= stride*4;
01815 for(x=0; x<8; x++)
01816 {
01817 int t1= tmp[x];
01818 int t2= tmp2[x];
01819 int t3= src[0];
01820
01821 src[stride*0]= CLIP((-(t1 + src[stride*2]) + 2*(t2 + src[stride*1]) + 6*t3 + 4)>>3);
01822 t1= src[stride*1];
01823 src[stride*1]= CLIP((-(t2 + src[stride*3]) + 2*(t3 + src[stride*2]) + 6*t1 + 4)>>3);
01824 t2= src[stride*2];
01825 src[stride*2]= CLIP((-(t3 + src[stride*4]) + 2*(t1 + src[stride*3]) + 6*t2 + 4)>>3);
01826 t3= src[stride*3];
01827 src[stride*3]= CLIP((-(t1 + src[stride*5]) + 2*(t2 + src[stride*4]) + 6*t3 + 4)>>3);
01828 t1= src[stride*4];
01829 src[stride*4]= CLIP((-(t2 + src[stride*6]) + 2*(t3 + src[stride*5]) + 6*t1 + 4)>>3);
01830 t2= src[stride*5];
01831 src[stride*5]= CLIP((-(t3 + src[stride*7]) + 2*(t1 + src[stride*6]) + 6*t2 + 4)>>3);
01832 t3= src[stride*6];
01833 src[stride*6]= CLIP((-(t1 + src[stride*8]) + 2*(t2 + src[stride*7]) + 6*t3 + 4)>>3);
01834 t1= src[stride*7];
01835 src[stride*7]= CLIP((-(t2 + src[stride*9]) + 2*(t3 + src[stride*8]) + 6*t1 + 4)>>3);
01836
01837 tmp[x]= t3;
01838 tmp2[x]= t1;
01839
01840 src++;
01841 }
01842 #endif //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
01843 }
01844
01852 static inline void RENAME(deInterlaceBlendLinear)(uint8_t src[], int stride, uint8_t *tmp)
01853 {
01854 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
01855 src+= 4*stride;
01856 asm volatile(
01857 "lea (%0, %1), %%"REG_a" \n\t"
01858 "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t"
01859
01860
01861
01862 "movq (%2), %%mm0 \n\t"
01863 "movq (%%"REG_a"), %%mm1 \n\t"
01864 PAVGB(%%mm1, %%mm0)
01865 "movq (%0), %%mm2 \n\t"
01866 PAVGB(%%mm2, %%mm0)
01867 "movq %%mm0, (%0) \n\t"
01868 "movq (%%"REG_a", %1), %%mm0 \n\t"
01869 PAVGB(%%mm0, %%mm2)
01870 PAVGB(%%mm1, %%mm2)
01871 "movq %%mm2, (%%"REG_a") \n\t"
01872 "movq (%%"REG_a", %1, 2), %%mm2 \n\t"
01873 PAVGB(%%mm2, %%mm1)
01874 PAVGB(%%mm0, %%mm1)
01875 "movq %%mm1, (%%"REG_a", %1) \n\t"
01876 "movq (%0, %1, 4), %%mm1 \n\t"
01877 PAVGB(%%mm1, %%mm0)
01878 PAVGB(%%mm2, %%mm0)
01879 "movq %%mm0, (%%"REG_a", %1, 2) \n\t"
01880 "movq (%%"REG_d"), %%mm0 \n\t"
01881 PAVGB(%%mm0, %%mm2)
01882 PAVGB(%%mm1, %%mm2)
01883 "movq %%mm2, (%0, %1, 4) \n\t"
01884 "movq (%%"REG_d", %1), %%mm2 \n\t"
01885 PAVGB(%%mm2, %%mm1)
01886 PAVGB(%%mm0, %%mm1)
01887 "movq %%mm1, (%%"REG_d") \n\t"
01888 "movq (%%"REG_d", %1, 2), %%mm1 \n\t"
01889 PAVGB(%%mm1, %%mm0)
01890 PAVGB(%%mm2, %%mm0)
01891 "movq %%mm0, (%%"REG_d", %1) \n\t"
01892 "movq (%0, %1, 8), %%mm0 \n\t"
01893 PAVGB(%%mm0, %%mm2)
01894 PAVGB(%%mm1, %%mm2)
01895 "movq %%mm2, (%%"REG_d", %1, 2) \n\t"
01896 "movq %%mm1, (%2) \n\t"
01897
01898 : : "r" (src), "r" ((long)stride), "r" (tmp)
01899 : "%"REG_a, "%"REG_d
01900 );
01901 #else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
01902 int a, b, c, x;
01903 src+= 4*stride;
01904
01905 for(x=0; x<2; x++){
01906 a= *(uint32_t*)&tmp[stride*0];
01907 b= *(uint32_t*)&src[stride*0];
01908 c= *(uint32_t*)&src[stride*1];
01909 a= (a&c) + (((a^c)&0xFEFEFEFEUL)>>1);
01910 *(uint32_t*)&src[stride*0]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
01911
01912 a= *(uint32_t*)&src[stride*2];
01913 b= (a&b) + (((a^b)&0xFEFEFEFEUL)>>1);
01914 *(uint32_t*)&src[stride*1]= (c|b) - (((c^b)&0xFEFEFEFEUL)>>1);
01915
01916 b= *(uint32_t*)&src[stride*3];
01917 c= (b&c) + (((b^c)&0xFEFEFEFEUL)>>1);
01918 *(uint32_t*)&src[stride*2]= (c|a) - (((c^a)&0xFEFEFEFEUL)>>1);
01919
01920 c= *(uint32_t*)&src[stride*4];
01921 a= (a&c) + (((a^c)&0xFEFEFEFEUL)>>1);
01922 *(uint32_t*)&src[stride*3]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
01923
01924 a= *(uint32_t*)&src[stride*5];
01925 b= (a&b) + (((a^b)&0xFEFEFEFEUL)>>1);
01926 *(uint32_t*)&src[stride*4]= (c|b) - (((c^b)&0xFEFEFEFEUL)>>1);
01927
01928 b= *(uint32_t*)&src[stride*6];
01929 c= (b&c) + (((b^c)&0xFEFEFEFEUL)>>1);
01930 *(uint32_t*)&src[stride*5]= (c|a) - (((c^a)&0xFEFEFEFEUL)>>1);
01931
01932 c= *(uint32_t*)&src[stride*7];
01933 a= (a&c) + (((a^c)&0xFEFEFEFEUL)>>1);
01934 *(uint32_t*)&src[stride*6]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
01935
01936 a= *(uint32_t*)&src[stride*8];
01937 b= (a&b) + (((a^b)&0xFEFEFEFEUL)>>1);
01938 *(uint32_t*)&src[stride*7]= (c|b) - (((c^b)&0xFEFEFEFEUL)>>1);
01939
01940 *(uint32_t*)&tmp[stride*0]= c;
01941 src += 4;
01942 tmp += 4;
01943 }
01944 #endif //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
01945 }
01946
01953 static inline void RENAME(deInterlaceMedian)(uint8_t src[], int stride)
01954 {
01955 #ifdef HAVE_MMX
01956 src+= 4*stride;
01957 #ifdef HAVE_MMX2
01958 asm volatile(
01959 "lea (%0, %1), %%"REG_a" \n\t"
01960 "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t"
01961
01962
01963
01964 "movq (%0), %%mm0 \n\t"
01965 "movq (%%"REG_a", %1), %%mm2 \n\t"
01966 "movq (%%"REG_a"), %%mm1 \n\t"
01967 "movq %%mm0, %%mm3 \n\t"
01968 "pmaxub %%mm1, %%mm0 \n\t"
01969 "pminub %%mm3, %%mm1 \n\t"
01970 "pmaxub %%mm2, %%mm1 \n\t"
01971 "pminub %%mm1, %%mm0 \n\t"
01972 "movq %%mm0, (%%"REG_a") \n\t"
01973
01974 "movq (%0, %1, 4), %%mm0 \n\t"
01975 "movq (%%"REG_a", %1, 2), %%mm1 \n\t"
01976 "movq %%mm2, %%mm3 \n\t"
01977 "pmaxub %%mm1, %%mm2 \n\t"
01978 "pminub %%mm3, %%mm1 \n\t"
01979 "pmaxub %%mm0, %%mm1 \n\t"
01980 "pminub %%mm1, %%mm2 \n\t"
01981 "movq %%mm2, (%%"REG_a", %1, 2) \n\t"
01982
01983 "movq (%%"REG_d"), %%mm2 \n\t"
01984 "movq (%%"REG_d", %1), %%mm1 \n\t"
01985 "movq %%mm2, %%mm3 \n\t"
01986 "pmaxub %%mm0, %%mm2 \n\t"
01987 "pminub %%mm3, %%mm0 \n\t"
01988 "pmaxub %%mm1, %%mm0 \n\t"
01989 "pminub %%mm0, %%mm2 \n\t"
01990 "movq %%mm2, (%%"REG_d") \n\t"
01991
01992 "movq (%%"REG_d", %1, 2), %%mm2 \n\t"
01993 "movq (%0, %1, 8), %%mm0 \n\t"
01994 "movq %%mm2, %%mm3 \n\t"
01995 "pmaxub %%mm0, %%mm2 \n\t"
01996 "pminub %%mm3, %%mm0 \n\t"
01997 "pmaxub %%mm1, %%mm0 \n\t"
01998 "pminub %%mm0, %%mm2 \n\t"
01999 "movq %%mm2, (%%"REG_d", %1, 2) \n\t"
02000
02001
02002 : : "r" (src), "r" ((long)stride)
02003 : "%"REG_a, "%"REG_d
02004 );
02005
02006 #else // MMX without MMX2
02007 asm volatile(
02008 "lea (%0, %1), %%"REG_a" \n\t"
02009 "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t"
02010
02011
02012 "pxor %%mm7, %%mm7 \n\t"
02013
02014 #define REAL_MEDIAN(a,b,c)\
02015 "movq " #a ", %%mm0 \n\t"\
02016 "movq " #b ", %%mm2 \n\t"\
02017 "movq " #c ", %%mm1 \n\t"\
02018 "movq %%mm0, %%mm3 \n\t"\
02019 "movq %%mm1, %%mm4 \n\t"\
02020 "movq %%mm2, %%mm5 \n\t"\
02021 "psubusb %%mm1, %%mm3 \n\t"\
02022 "psubusb %%mm2, %%mm4 \n\t"\
02023 "psubusb %%mm0, %%mm5 \n\t"\
02024 "pcmpeqb %%mm7, %%mm3 \n\t"\
02025 "pcmpeqb %%mm7, %%mm4 \n\t"\
02026 "pcmpeqb %%mm7, %%mm5 \n\t"\
02027 "movq %%mm3, %%mm6 \n\t"\
02028 "pxor %%mm4, %%mm3 \n\t"\
02029 "pxor %%mm5, %%mm4 \n\t"\
02030 "pxor %%mm6, %%mm5 \n\t"\
02031 "por %%mm3, %%mm1 \n\t"\
02032 "por %%mm4, %%mm2 \n\t"\
02033 "por %%mm5, %%mm0 \n\t"\
02034 "pand %%mm2, %%mm0 \n\t"\
02035 "pand %%mm1, %%mm0 \n\t"\
02036 "movq %%mm0, " #b " \n\t"
02037 #define MEDIAN(a,b,c) REAL_MEDIAN(a,b,c)
02038
02039 MEDIAN((%0) , (%%REGa) , (%%REGa, %1))
02040 MEDIAN((%%REGa, %1), (%%REGa, %1, 2), (%0, %1, 4))
02041 MEDIAN((%0, %1, 4) , (%%REGd) , (%%REGd, %1))
02042 MEDIAN((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8))
02043
02044 : : "r" (src), "r" ((long)stride)
02045 : "%"REG_a, "%"REG_d
02046 );
02047 #endif //HAVE_MMX2
02048 #else //HAVE_MMX
02049 int x, y;
02050 src+= 4*stride;
02051
02052 for(x=0; x<8; x++)
02053 {
02054 uint8_t *colsrc = src;
02055 for (y=0; y<4; y++)
02056 {
02057 int a, b, c, d, e, f;
02058 a = colsrc[0 ];
02059 b = colsrc[stride ];
02060 c = colsrc[stride*2];
02061 d = (a-b)>>31;
02062 e = (b-c)>>31;
02063 f = (c-a)>>31;
02064 colsrc[stride ] = (a|(d^f)) & (b|(d^e)) & (c|(e^f));
02065 colsrc += stride*2;
02066 }
02067 src++;
02068 }
02069 #endif //HAVE_MMX
02070 }
02071
02072 #ifdef HAVE_MMX
02073
02076 static inline void RENAME(transpose1)(uint8_t *dst1, uint8_t *dst2, uint8_t *src, int srcStride)
02077 {
02078 asm(
02079 "lea (%0, %1), %%"REG_a" \n\t"
02080
02081
02082 "movq (%0), %%mm0 \n\t"
02083 "movq (%%"REG_a"), %%mm1 \n\t"
02084 "movq %%mm0, %%mm2 \n\t"
02085 "punpcklbw %%mm1, %%mm0 \n\t"
02086 "punpckhbw %%mm1, %%mm2 \n\t"
02087
02088 "movq (%%"REG_a", %1), %%mm1 \n\t"
02089 "movq (%%"REG_a", %1, 2), %%mm3 \n\t"
02090 "movq %%mm1, %%mm4 \n\t"
02091 "punpcklbw %%mm3, %%mm1 \n\t"
02092 "punpckhbw %%mm3, %%mm4 \n\t"
02093
02094 "movq %%mm0, %%mm3 \n\t"
02095 "punpcklwd %%mm1, %%mm0 \n\t"
02096 "punpckhwd %%mm1, %%mm3 \n\t"
02097 "movq %%mm2, %%mm1 \n\t"
02098 "punpcklwd %%mm4, %%mm2 \n\t"
02099 "punpckhwd %%mm4, %%mm1 \n\t"
02100
02101 "movd %%mm0, 128(%2) \n\t"
02102 "psrlq $32, %%mm0 \n\t"
02103 "movd %%mm0, 144(%2) \n\t"
02104 "movd %%mm3, 160(%2) \n\t"
02105 "psrlq $32, %%mm3 \n\t"
02106 "movd %%mm3, 176(%2) \n\t"
02107 "movd %%mm3, 48(%3) \n\t"
02108 "movd %%mm2, 192(%2) \n\t"
02109 "movd %%mm2, 64(%3) \n\t"
02110 "psrlq $32, %%mm2 \n\t"
02111 "movd %%mm2, 80(%3) \n\t"
02112 "movd %%mm1, 96(%3) \n\t"
02113 "psrlq $32, %%mm1 \n\t"
02114 "movd %%mm1, 112(%3) \n\t"
02115
02116 "lea (%%"REG_a", %1, 4), %%"REG_a" \n\t"
02117
02118 "movq (%0, %1, 4), %%mm0 \n\t"
02119 "movq (%%"REG_a"), %%mm1 \n\t"
02120 "movq %%mm0, %%mm2 \n\t"
02121 "punpcklbw %%mm1, %%mm0 \n\t"
02122 "punpckhbw %%mm1, %%mm2 \n\t"
02123
02124 "movq (%%"REG_a", %1), %%mm1 \n\t"
02125 "movq (%%"REG_a", %1, 2), %%mm3 \n\t"
02126 "movq %%mm1, %%mm4 \n\t"
02127 "punpcklbw %%mm3, %%mm1 \n\t"
02128 "punpckhbw %%mm3, %%mm4 \n\t"
02129
02130 "movq %%mm0, %%mm3 \n\t"
02131 "punpcklwd %%mm1, %%mm0 \n\t"
02132 "punpckhwd %%mm1, %%mm3 \n\t"
02133 "movq %%mm2, %%mm1 \n\t"
02134 "punpcklwd %%mm4, %%mm2 \n\t"
02135 "punpckhwd %%mm4, %%mm1 \n\t"
02136
02137 "movd %%mm0, 132(%2) \n\t"
02138 "psrlq $32, %%mm0 \n\t"
02139 "movd %%mm0, 148(%2) \n\t"
02140 "movd %%mm3, 164(%2) \n\t"
02141 "psrlq $32, %%mm3 \n\t"
02142 "movd %%mm3, 180(%2) \n\t"
02143 "movd %%mm3, 52(%3) \n\t"
02144 "movd %%mm2, 196(%2) \n\t"
02145 "movd %%mm2, 68(%3) \n\t"
02146 "psrlq $32, %%mm2 \n\t"
02147 "movd %%mm2, 84(%3) \n\t"
02148 "movd %%mm1, 100(%3) \n\t"
02149 "psrlq $32, %%mm1 \n\t"
02150 "movd %%mm1, 116(%3) \n\t"
02151
02152
02153 :: "r" (src), "r" ((long)srcStride), "r" (dst1), "r" (dst2)
02154 : "%"REG_a
02155 );
02156 }
02157
02161 static inline void RENAME(transpose2)(uint8_t *dst, int dstStride, uint8_t *src)
02162 {
02163 asm(
02164 "lea (%0, %1), %%"REG_a" \n\t"
02165 "lea (%%"REG_a",%1,4), %%"REG_d" \n\t"
02166
02167
02168 "movq (%2), %%mm0 \n\t"
02169 "movq 16(%2), %%mm1 \n\t"
02170 "movq %%mm0, %%mm2 \n\t"
02171 "punpcklbw %%mm1, %%mm0 \n\t"
02172 "punpckhbw %%mm1, %%mm2 \n\t"
02173
02174 "movq 32(%2), %%mm1 \n\t"
02175 "movq 48(%2), %%mm3 \n\t"
02176 "movq %%mm1, %%mm4 \n\t"
02177 "punpcklbw %%mm3, %%mm1 \n\t"
02178 "punpckhbw %%mm3, %%mm4 \n\t"
02179
02180 "movq %%mm0, %%mm3 \n\t"
02181 "punpcklwd %%mm1, %%mm0 \n\t"
02182 "punpckhwd %%mm1, %%mm3 \n\t"
02183 "movq %%mm2, %%mm1 \n\t"
02184 "punpcklwd %%mm4, %%mm2 \n\t"
02185 "punpckhwd %%mm4, %%mm1 \n\t"
02186
02187 "movd %%mm0, (%0) \n\t"
02188 "psrlq $32, %%mm0 \n\t"
02189 "movd %%mm0, (%%"REG_a") \n\t"
02190 "movd %%mm3, (%%"REG_a", %1) \n\t"
02191 "psrlq $32, %%mm3 \n\t"
02192 "movd %%mm3, (%%"REG_a", %1, 2) \n\t"
02193 "movd %%mm2, (%0, %1, 4) \n\t"
02194 "psrlq $32, %%mm2 \n\t"
02195 "movd %%mm2, (%%"REG_d") \n\t"
02196 "movd %%mm1, (%%"REG_d", %1) \n\t"
02197 "psrlq $32, %%mm1 \n\t"
02198 "movd %%mm1, (%%"REG_d", %1, 2) \n\t"
02199
02200
02201 "movq 64(%2), %%mm0 \n\t"
02202 "movq 80(%2), %%mm1 \n\t"
02203 "movq %%mm0, %%mm2 \n\t"
02204 "punpcklbw %%mm1, %%mm0 \n\t"
02205 "punpckhbw %%mm1, %%mm2 \n\t"
02206
02207 "movq 96(%2), %%mm1 \n\t"
02208 "movq 112(%2), %%mm3 \n\t"
02209 "movq %%mm1, %%mm4 \n\t"
02210 "punpcklbw %%mm3, %%mm1 \n\t"
02211 "punpckhbw %%mm3, %%mm4 \n\t"
02212
02213 "movq %%mm0, %%mm3 \n\t"
02214 "punpcklwd %%mm1, %%mm0 \n\t"
02215 "punpckhwd %%mm1, %%mm3 \n\t"
02216 "movq %%mm2, %%mm1 \n\t"
02217 "punpcklwd %%mm4, %%mm2 \n\t"
02218 "punpckhwd %%mm4, %%mm1 \n\t"
02219
02220 "movd %%mm0, 4(%0) \n\t"
02221 "psrlq $32, %%mm0 \n\t"
02222 "movd %%mm0, 4(%%"REG_a") \n\t"
02223 "movd %%mm3, 4(%%"REG_a", %1) \n\t"
02224 "psrlq $32, %%mm3 \n\t"
02225 "movd %%mm3, 4(%%"REG_a", %1, 2) \n\t"
02226 "movd %%mm2, 4(%0, %1, 4) \n\t"
02227 "psrlq $32, %%mm2 \n\t"
02228 "movd %%mm2, 4(%%"REG_d") \n\t"
02229 "movd %%mm1, 4(%%"REG_d", %1) \n\t"
02230 "psrlq $32, %%mm1 \n\t"
02231 "movd %%mm1, 4(%%"REG_d", %1, 2) \n\t"
02232
02233 :: "r" (dst), "r" ((long)dstStride), "r" (src)
02234 : "%"REG_a, "%"REG_d
02235 );
02236 }
02237 #endif //HAVE_MMX
02238
02239
02240 #ifndef HAVE_ALTIVEC
02241 static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride,
02242 uint8_t *tempBlured, uint32_t *tempBluredPast, int *maxNoise)
02243 {
02244
02245 tempBluredPast[127]= maxNoise[0];
02246 tempBluredPast[128]= maxNoise[1];
02247 tempBluredPast[129]= maxNoise[2];
02248
02249 #define FAST_L2_DIFF
02250
02251 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
02252 asm volatile(
02253 "lea (%2, %2, 2), %%"REG_a" \n\t"
02254 "lea (%2, %2, 4), %%"REG_d" \n\t"
02255 "lea (%%"REG_d", %2, 2), %%"REG_c" \n\t"
02256
02257
02258
02259 #ifdef L1_DIFF //needs mmx2
02260 "movq (%0), %%mm0 \n\t"
02261 "psadbw (%1), %%mm0 \n\t"
02262 "movq (%0, %2), %%mm1 \n\t"
02263 "psadbw (%1, %2), %%mm1 \n\t"
02264 "movq (%0, %2, 2), %%mm2 \n\t"
02265 "psadbw (%1, %2, 2), %%mm2 \n\t"
02266 "movq (%0, %%"REG_a"), %%mm3 \n\t"
02267 "psadbw (%1, %%"REG_a"), %%mm3 \n\t"
02268
02269 "movq (%0, %2, 4), %%mm4 \n\t"
02270 "paddw %%mm1, %%mm0 \n\t"
02271 "psadbw (%1, %2, 4), %%mm4 \n\t"
02272 "movq (%0, %%"REG_d"), %%mm5 \n\t"
02273 "paddw %%mm2, %%mm0 \n\t"
02274 "psadbw (%1, %%"REG_d"), %%mm5 \n\t"
02275 "movq (%0, %%"REG_a", 2), %%mm6 \n\t"
02276 "paddw %%mm3, %%mm0 \n\t"
02277 "psadbw (%1, %%"REG_a", 2), %%mm6 \n\t"
02278 "movq (%0, %%"REG_c"), %%mm7 \n\t"
02279 "paddw %%mm4, %%mm0 \n\t"
02280 "psadbw (%1, %%"REG_c"), %%mm7 \n\t"
02281 "paddw %%mm5, %%mm6 \n\t"
02282 "paddw %%mm7, %%mm6 \n\t"
02283 "paddw %%mm6, %%mm0 \n\t"
02284 #else //L1_DIFF
02285 #if defined (FAST_L2_DIFF)
02286 "pcmpeqb %%mm7, %%mm7 \n\t"
02287 "movq "MANGLE(b80)", %%mm6 \n\t"
02288 "pxor %%mm0, %%mm0 \n\t"
02289 #define REAL_L2_DIFF_CORE(a, b)\
02290 "movq " #a ", %%mm5 \n\t"\
02291 "movq " #b ", %%mm2 \n\t"\
02292 "pxor %%mm7, %%mm2 \n\t"\
02293 PAVGB(%%mm2, %%mm5)\
02294 "paddb %%mm6, %%mm5 \n\t"\
02295 "movq %%mm5, %%mm2 \n\t"\
02296 "psllw $8, %%mm5 \n\t"\
02297 "pmaddwd %%mm5, %%mm5 \n\t"\
02298 "pmaddwd %%mm2, %%mm2 \n\t"\
02299 "paddd %%mm2, %%mm5 \n\t"\
02300 "psrld $14, %%mm5 \n\t"\
02301 "paddd %%mm5, %%mm0 \n\t"
02302
02303 #else //defined (FAST_L2_DIFF)
02304 "pxor %%mm7, %%mm7 \n\t"
02305 "pxor %%mm0, %%mm0 \n\t"
02306 #define REAL_L2_DIFF_CORE(a, b)\
02307 "movq " #a ", %%mm5 \n\t"\
02308 "movq " #b ", %%mm2 \n\t"\
02309 "movq %%mm5, %%mm1 \n\t"\
02310 "movq %%mm2, %%mm3 \n\t"\
02311 "punpcklbw %%mm7, %%mm5 \n\t"\
02312 "punpckhbw %%mm7, %%mm1 \n\t"\
02313 "punpcklbw %%mm7, %%mm2 \n\t"\
02314 "punpckhbw %%mm7, %%mm3 \n\t"\
02315 "psubw %%mm2, %%mm5 \n\t"\
02316 "psubw %%mm3, %%mm1 \n\t"\
02317 "pmaddwd %%mm5, %%mm5 \n\t"\
02318 "pmaddwd %%mm1, %%mm1 \n\t"\
02319 "paddd %%mm1, %%mm5 \n\t"\
02320 "paddd %%mm5, %%mm0 \n\t"
02321
02322 #endif //defined (FAST_L2_DIFF)
02323
02324 #define L2_DIFF_CORE(a, b) REAL_L2_DIFF_CORE(a, b)
02325
02326 L2_DIFF_CORE((%0) , (%1))
02327 L2_DIFF_CORE((%0, %2) , (%1, %2))
02328 L2_DIFF_CORE((%0, %2, 2) , (%1, %2, 2))
02329 L2_DIFF_CORE((%0, %%REGa) , (%1, %%REGa))
02330 L2_DIFF_CORE((%0, %2, 4) , (%1, %2, 4))
02331 L2_DIFF_CORE((%0, %%REGd) , (%1, %%REGd))
02332 L2_DIFF_CORE((%0, %%REGa,2), (%1, %%REGa,2))
02333 L2_DIFF_CORE((%0, %%REGc) , (%1, %%REGc))
02334
02335 #endif //L1_DIFF
02336
02337 "movq %%mm0, %%mm4 \n\t"
02338 "psrlq $32, %%mm0 \n\t"
02339 "paddd %%mm0, %%mm4 \n\t"
02340 "movd %%mm4, %%ecx \n\t"
02341 "shll $2, %%ecx \n\t"
02342 "mov %3, %%"REG_d" \n\t"
02343 "addl -4(%%"REG_d"), %%ecx \n\t"
02344 "addl 4(%%"REG_d"), %%ecx \n\t"
02345 "addl -1024(%%"REG_d"), %%ecx \n\t"
02346 "addl $4, %%ecx \n\t"
02347 "addl 1024(%%"REG_d"), %%ecx \n\t"
02348 "shrl $3, %%ecx \n\t"
02349 "movl %%ecx, (%%"REG_d") \n\t"
02350
02351
02352
02353
02354 "cmpl 512(%%"REG_d"), %%ecx \n\t"
02355 " jb 2f \n\t"
02356 "cmpl 516(%%"REG_d"), %%ecx \n\t"
02357 " jb 1f \n\t"
02358
02359 "lea (%%"REG_a", %2, 2), %%"REG_d" \n\t"
02360 "lea (%%"REG_d", %2, 2), %%"REG_c" \n\t"
02361 "movq (%0), %%mm0 \n\t"
02362 "movq (%0, %2), %%mm1 \n\t"
02363 "movq (%0, %2, 2), %%mm2 \n\t"
02364 "movq (%0, %%"REG_a"), %%mm3 \n\t"
02365 "movq (%0, %2, 4), %%mm4 \n\t"
02366 "movq (%0, %%"REG_d"), %%mm5 \n\t"
02367 "movq (%0, %%"REG_a", 2), %%mm6 \n\t"
02368 "movq (%0, %%"REG_c"), %%mm7 \n\t"
02369 "movq %%mm0, (%1) \n\t"
02370 "movq %%mm1, (%1, %2) \n\t"
02371 "movq %%mm2, (%1, %2, 2) \n\t"
02372 "movq %%mm3, (%1, %%"REG_a") \n\t"
02373 "movq %%mm4, (%1, %2, 4) \n\t"
02374 "movq %%mm5, (%1, %%"REG_d") \n\t"
02375 "movq %%mm6, (%1, %%"REG_a", 2) \n\t"
02376 "movq %%mm7, (%1, %%"REG_c") \n\t"
02377 "jmp 4f \n\t"
02378
02379 "1: \n\t"
02380 "lea (%%"REG_a", %2, 2), %%"REG_d" \n\t"
02381 "lea (%%"REG_d", %2, 2), %%"REG_c" \n\t"
02382 "movq (%0), %%mm0 \n\t"
02383 PAVGB((%1), %%mm0)
02384 "movq (%0, %2), %%mm1 \n\t"
02385 PAVGB((%1, %2), %%mm1)
02386 "movq (%0, %2, 2), %%mm2 \n\t"
02387 PAVGB((%1, %2, 2), %%mm2)
02388 "movq (%0, %%"REG_a"), %%mm3 \n\t"
02389 PAVGB((%1, %%REGa), %%mm3)
02390 "movq (%0, %2, 4), %%mm4 \n\t"
02391 PAVGB((%1, %2, 4), %%mm4)
02392 "movq (%0, %%"REG_d"), %%mm5 \n\t"
02393 PAVGB((%1, %%REGd), %%mm5)
02394 "movq (%0, %%"REG_a", 2), %%mm6 \n\t"
02395 PAVGB((%1, %%REGa, 2), %%mm6)
02396 "movq (%0, %%"REG_c"), %%mm7 \n\t"
02397 PAVGB((%1, %%REGc), %%mm7)
02398 "movq %%mm0, (%1) \n\t"
02399 "movq %%mm1, (%1, %2) \n\t"
02400 "movq %%mm2, (%1, %2, 2) \n\t"
02401 "movq %%mm3, (%1, %%"REG_a") \n\t"
02402 "movq %%mm4, (%1, %2, 4) \n\t"
02403 "movq %%mm5, (%1, %%"REG_d") \n\t"
02404 "movq %%mm6, (%1, %%"REG_a", 2) \n\t"
02405 "movq %%mm7, (%1, %%"REG_c") \n\t"
02406 "movq %%mm0, (%0) \n\t"
02407 "movq %%mm1, (%0, %2) \n\t"
02408 "movq %%mm2, (%0, %2, 2) \n\t"
02409 "movq %%mm3, (%0, %%"REG_a") \n\t"
02410 "movq %%mm4, (%0, %2, 4) \n\t"
02411 "movq %%mm5, (%0, %%"REG_d") \n\t"
02412 "movq %%mm6, (%0, %%"REG_a", 2) \n\t"
02413 "movq %%mm7, (%0, %%"REG_c") \n\t"
02414 "jmp 4f \n\t"
02415
02416 "2: \n\t"
02417 "cmpl 508(%%"REG_d"), %%ecx \n\t"
02418 " jb 3f \n\t"
02419
02420 "lea (%%"REG_a", %2, 2), %%"REG_d" \n\t"
02421 "lea (%%"REG_d", %2, 2), %%"REG_c" \n\t"
02422 "movq (%0), %%mm0 \n\t"
02423 "movq (%0, %2), %%mm1 \n\t"
02424 "movq (%0, %2, 2), %%mm2 \n\t"
02425 "movq (%0, %%"REG_a"), %%mm3 \n\t"
02426 "movq (%1), %%mm4 \n\t"
02427 "movq (%1, %2), %%mm5 \n\t"
02428 "movq (%1, %2, 2), %%mm6 \n\t"
02429 "movq (%1, %%"REG_a"), %%mm7 \n\t"
02430 PAVGB(%%mm4, %%mm0)
02431 PAVGB(%%mm5, %%mm1)
02432 PAVGB(%%mm6, %%mm2)
02433 PAVGB(%%mm7, %%mm3)
02434 PAVGB(%%mm4, %%mm0)
02435 PAVGB(%%mm5, %%mm1)
02436 PAVGB(%%mm6, %%mm2)
02437 PAVGB(%%mm7, %%mm3)
02438 "movq %%mm0, (%1) \n\t"
02439 "movq %%mm1, (%1, %2) \n\t"
02440 "movq %%mm2, (%1, %2, 2) \n\t"
02441 "movq %%mm3, (%1, %%"REG_a") \n\t"
02442 "movq %%mm0, (%0) \n\t"
02443 "movq %%mm1, (%0, %2) \n\t"
02444 "movq %%mm2, (%0, %2, 2) \n\t"
02445 "movq %%mm3, (%0, %%"REG_a") \n\t"
02446
02447 "movq (%0, %2, 4), %%mm0 \n\t"
02448 "movq (%0, %%"REG_d"), %%mm1 \n\t"
02449 "movq (%0, %%"REG_a", 2), %%mm2 \n\t"
02450 "movq (%0, %%"REG_c"), %%mm3 \n\t"
02451 "movq (%1, %2, 4), %%mm4 \n\t"
02452 "movq (%1, %%"REG_d"), %%mm5 \n\t"
02453 "movq (%1, %%"REG_a", 2), %%mm6 \n\t"
02454 "movq (%1, %%"REG_c"), %%mm7 \n\t"
02455 PAVGB(%%mm4, %%mm0)
02456 PAVGB(%%mm5, %%mm1)
02457 PAVGB(%%mm6, %%mm2)
02458 PAVGB(%%mm7, %%mm3)
02459 PAVGB(%%mm4, %%mm0)
02460 PAVGB(%%mm5, %%mm1)
02461 PAVGB(%%mm6, %%mm2)
02462 PAVGB(%%mm7, %%mm3)
02463 "movq %%mm0, (%1, %2, 4) \n\t"
02464 "movq %%mm1, (%1, %%"REG_d") \n\t"
02465 "movq %%mm2, (%1, %%"REG_a", 2) \n\t"
02466 "movq %%mm3, (%1, %%"REG_c") \n\t"
02467 "movq %%mm0, (%0, %2, 4) \n\t"
02468 "movq %%mm1, (%0, %%"REG_d") \n\t"
02469 "movq %%mm2, (%0, %%"REG_a", 2) \n\t"
02470 "movq %%mm3, (%0, %%"REG_c") \n\t"
02471 "jmp 4f \n\t"
02472
02473 "3: \n\t"
02474 "lea (%%"REG_a", %2, 2), %%"REG_d" \n\t"
02475 "lea (%%"REG_d", %2, 2), %%"REG_c" \n\t"
02476 "movq (%0), %%mm0 \n\t"
02477 "movq (%0, %2), %%mm1 \n\t"
02478 "movq (%0, %2, 2), %%mm2 \n\t"
02479 "movq (%0, %%"REG_a"), %%mm3 \n\t"
02480 "movq (%1), %%mm4 \n\t"
02481 "movq (%1, %2), %%mm5 \n\t"
02482 "movq (%1, %2, 2), %%mm6 \n\t"
02483 "movq (%1, %%"REG_a"), %%mm7 \n\t"
02484 PAVGB(%%mm4, %%mm0)
02485 PAVGB(%%mm5, %%mm1)
02486 PAVGB(%%mm6, %%mm2)
02487 PAVGB(%%mm7, %%mm3)
02488 PAVGB(%%mm4, %%mm0)
02489 PAVGB(%%mm5, %%mm1)
02490 PAVGB(%%mm6, %%mm2)
02491 PAVGB(%%mm7, %%mm3)
02492 PAVGB(%%mm4, %%mm0)
02493 PAVGB(%%mm5, %%mm1)
02494 PAVGB(%%mm6, %%mm2)
02495 PAVGB(%%mm7, %%mm3)
02496 "movq %%mm0, (%1) \n\t"
02497 "movq %%mm1, (%1, %2) \n\t"
02498 "movq %%mm2, (%1, %2, 2) \n\t"
02499 "movq %%mm3, (%1, %%"REG_a") \n\t"
02500 "movq %%mm0, (%0) \n\t"
02501 "movq %%mm1, (%0, %2) \n\t"
02502 "movq %%mm2, (%0, %2, 2) \n\t"
02503 "movq %%mm3, (%0, %%"REG_a") \n\t"
02504
02505 "movq (%0, %2, 4), %%mm0 \n\t"
02506 "movq (%0, %%"REG_d"), %%mm1 \n\t"
02507 "movq (%0, %%"REG_a", 2), %%mm2 \n\t"
02508 "movq (%0, %%"REG_c"), %%mm3 \n\t"
02509 "movq (%1, %2, 4), %%mm4 \n\t"
02510 "movq (%1, %%"REG_d"), %%mm5 \n\t"
02511 "movq (%1, %%"REG_a", 2), %%mm6 \n\t"
02512 "movq (%1, %%"REG_c"), %%mm7 \n\t"
02513 PAVGB(%%mm4, %%mm0)
02514 PAVGB(%%mm5, %%mm1)
02515 PAVGB(%%mm6, %%mm2)
02516 PAVGB(%%mm7, %%mm3)
02517 PAVGB(%%mm4, %%mm0)
02518 PAVGB(%%mm5, %%mm1)
02519 PAVGB(%%mm6, %%mm2)
02520 PAVGB(%%mm7, %%mm3)
02521 PAVGB(%%mm4, %%mm0)
02522 PAVGB(%%mm5, %%mm1)
02523 PAVGB(%%mm6, %%mm2)
02524 PAVGB(%%mm7, %%mm3)
02525 "movq %%mm0, (%1, %2, 4) \n\t"
02526 "movq %%mm1, (%1, %%"REG_d") \n\t"
02527 "movq %%mm2, (%1, %%"REG_a", 2) \n\t"
02528 "movq %%mm3, (%1, %%"REG_c") \n\t"
02529 "movq %%mm0, (%0, %2, 4) \n\t"
02530 "movq %%mm1, (%0, %%"REG_d") \n\t"
02531 "movq %%mm2, (%0, %%"REG_a", 2) \n\t"
02532 "movq %%mm3, (%0, %%"REG_c") \n\t"
02533
02534 "4: \n\t"
02535
02536 :: "r" (src), "r" (tempBlured), "r"((long)stride), "m" (tempBluredPast)
02537 : "%"REG_a, "%"REG_d, "%"REG_c, "memory"
02538 );
02539 #else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
02540 {
02541 int y;
02542 int d=0;
02543
02544 int i;
02545
02546 for(y=0; y<8; y++)
02547 {
02548 int x;
02549 for(x=0; x<8; x++)
02550 {
02551 int ref= tempBlured[ x + y*stride ];
02552 int cur= src[ x + y*stride ];
02553 int d1=ref - cur;
02554
02555
02556
02557 d+= d1*d1;
02558
02559 }
02560 }
02561 i=d;
02562 d= (
02563 4*d
02564 +(*(tempBluredPast-256))
02565 +(*(tempBluredPast-1))+ (*(tempBluredPast+1))
02566 +(*(tempBluredPast+256))
02567 +4)>>3;
02568 *tempBluredPast=i;
02569
02570
02571
02572
02573
02574
02575
02576
02577
02578 if(d > maxNoise[1])
02579 {
02580 if(d < maxNoise[2])
02581 {
02582 for(y=0; y<8; y++)
02583 {
02584 int x;
02585 for(x=0; x<8; x++)
02586 {
02587 int ref= tempBlured[ x + y*stride ];
02588 int cur= src[ x + y*stride ];
02589 tempBlured[ x + y*stride ]=
02590 src[ x + y*stride ]=
02591 (ref + cur + 1)>>1;
02592 }
02593 }
02594 }
02595 else
02596 {
02597 for(y=0; y<8; y++)
02598 {
02599 int x;
02600 for(x=0; x<8; x++)
02601 {
02602 tempBlured[ x + y*stride ]= src[ x + y*stride ];
02603 }
02604 }
02605 }
02606 }
02607 else
02608 {
02609 if(d < maxNoise[0])
02610 {
02611 for(y=0; y<8; y++)
02612 {
02613 int x;
02614 for(x=0; x<8; x++)
02615 {
02616 int ref= tempBlured[ x + y*stride ];
02617 int cur= src[ x + y*stride ];
02618 tempBlured[ x + y*stride ]=
02619 src[ x + y*stride ]=
02620 (ref*7 + cur + 4)>>3;
02621 }
02622 }
02623 }
02624 else
02625 {
02626 for(y=0; y<8; y++)
02627 {
02628 int x;
02629 for(x=0; x<8; x++)
02630 {
02631 int ref= tempBlured[ x + y*stride ];
02632 int cur= src[ x + y*stride ];
02633 tempBlured[ x + y*stride ]=
02634 src[ x + y*stride ]=
02635 (ref*3 + cur + 2)>>2;
02636 }
02637 }
02638 }
02639 }
02640 }
02641 #endif //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
02642 }
02643 #endif //HAVE_ALTIVEC
02644
02645 #ifdef HAVE_MMX
02646
02649 static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int stride, PPContext *c){
02650 int64_t dc_mask, eq_mask, both_masks;
02651 int64_t sums[10*8*2];
02652 src+= step*3;
02653
02654 asm volatile(
02655 "movq %0, %%mm7 \n\t"
02656 "movq %1, %%mm6 \n\t"
02657 : : "m" (c->mmxDcOffset[c->nonBQP]), "m" (c->mmxDcThreshold[c->nonBQP])
02658 );
02659
02660 asm volatile(
02661 "lea (%2, %3), %%"REG_a" \n\t"
02662
02663
02664
02665 "movq (%2), %%mm0 \n\t"
02666 "movq (%%"REG_a"), %%mm1 \n\t"
02667 "movq %%mm1, %%mm3 \n\t"
02668 "movq %%mm1, %%mm4 \n\t"
02669 "psubb %%mm1, %%mm0 \n\t"
02670 "paddb %%mm7, %%mm0 \n\t"
02671 "pcmpgtb %%mm6, %%mm0 \n\t"
02672
02673 "movq (%%"REG_a",%3), %%mm2 \n\t"
02674 PMAXUB(%%mm2, %%mm4)
02675 PMINUB(%%mm2, %%mm3, %%mm5)
02676 "psubb %%mm2, %%mm1 \n\t"
02677 "paddb %%mm7, %%mm1 \n\t"
02678 "pcmpgtb %%mm6, %%mm1 \n\t"
02679 "paddb %%mm1, %%mm0 \n\t"
02680
02681 "movq (%%"REG_a", %3, 2), %%mm1 \n\t"
02682 PMAXUB(%%mm1, %%mm4)
02683 PMINUB(%%mm1, %%mm3, %%mm5)
02684 "psubb %%mm1, %%mm2 \n\t"
02685 "paddb %%mm7, %%mm2 \n\t"
02686 "pcmpgtb %%mm6, %%mm2 \n\t"
02687 "paddb %%mm2, %%mm0 \n\t"
02688
02689 "lea (%%"REG_a", %3, 4), %%"REG_a" \n\t"
02690
02691 "movq (%2, %3, 4), %%mm2 \n\t"
02692 PMAXUB(%%mm2, %%mm4)
02693 PMINUB(%%mm2, %%mm3, %%mm5)
02694 "psubb %%mm2, %%mm1 \n\t"
02695 "paddb %%mm7, %%mm1 \n\t"
02696 "pcmpgtb %%mm6, %%mm1 \n\t"
02697 "paddb %%mm1, %%mm0 \n\t"
02698
02699 "movq (%%"REG_a"), %%mm1 \n\t"
02700 PMAXUB(%%mm1, %%mm4)
02701 PMINUB(%%mm1, %%mm3, %%mm5)
02702 "psubb %%mm1, %%mm2 \n\t"
02703 "paddb %%mm7, %%mm2 \n\t"
02704 "pcmpgtb %%mm6, %%mm2 \n\t"
02705 "paddb %%mm2, %%mm0 \n\t"
02706
02707 "movq (%%"REG_a", %3), %%mm2 \n\t"
02708 PMAXUB(%%mm2, %%mm4)
02709 PMINUB(%%mm2, %%mm3, %%mm5)
02710 "psubb %%mm2, %%mm1 \n\t"
02711 "paddb %%mm7, %%mm1 \n\t"
02712 "pcmpgtb %%mm6, %%mm1 \n\t"
02713 "paddb %%mm1, %%mm0 \n\t"
02714
02715 "movq (%%"REG_a", %3, 2), %%mm1 \n\t"
02716 PMAXUB(%%mm1, %%mm4)
02717 PMINUB(%%mm1, %%mm3, %%mm5)
02718 "psubb %%mm1, %%mm2 \n\t"
02719 "paddb %%mm7, %%mm2 \n\t"
02720 "pcmpgtb %%mm6, %%mm2 \n\t"
02721 "paddb %%mm2, %%mm0 \n\t"
02722
02723 "movq (%2, %3, 8), %%mm2 \n\t"
02724 PMAXUB(%%mm2, %%mm4)
02725 PMINUB(%%mm2, %%mm3, %%mm5)
02726 "psubb %%mm2, %%mm1 \n\t"
02727 "paddb %%mm7, %%mm1 \n\t"
02728 "pcmpgtb %%mm6, %%mm1 \n\t"
02729 "paddb %%mm1, %%mm0 \n\t"
02730
02731 "movq (%%"REG_a", %3, 4), %%mm1 \n\t"
02732 "psubb %%mm1, %%mm2 \n\t"
02733 "paddb %%mm7, %%mm2 \n\t"
02734 "pcmpgtb %%mm6, %%mm2 \n\t"
02735 "paddb %%mm2, %%mm0 \n\t"
02736 "psubusb %%mm3, %%mm4 \n\t"
02737
02738 "pxor %%mm6, %%mm6 \n\t"
02739 "movq %4, %%mm7 \n\t"
02740 "paddusb %%mm7, %%mm7 \n\t"
02741 "psubusb %%mm4, %%mm7 \n\t"
02742 "pcmpeqb %%mm6, %%mm7 \n\t"
02743 "pcmpeqb %%mm6, %%mm7 \n\t"
02744 "movq %%mm7, %1 \n\t"
02745
02746 "movq %5, %%mm7 \n\t"
02747 "punpcklbw %%mm7, %%mm7 \n\t"
02748 "punpcklbw %%mm7, %%mm7 \n\t"
02749 "punpcklbw %%mm7, %%mm7 \n\t"
02750 "psubb %%mm0, %%mm6 \n\t"
02751 "pcmpgtb %%mm7, %%mm6 \n\t"
02752 "movq %%mm6, %0 \n\t"
02753
02754 : "=m" (eq_mask), "=m" (dc_mask)
02755 : "r" (src), "r" ((long)step), "m" (c->pQPb), "m"(c->ppMode.flatnessThreshold)
02756 : "%"REG_a
02757 );
02758
02759 both_masks = dc_mask & eq_mask;
02760
02761 if(both_masks){
02762 long offset= -8*step;
02763 int64_t *temp_sums= sums;
02764
02765 asm volatile(
02766 "movq %2, %%mm0 \n\t"
02767 "pxor %%mm4, %%mm4 \n\t"
02768
02769 "movq (%0), %%mm6 \n\t"
02770 "movq (%0, %1), %%mm5 \n\t"
02771 "movq %%mm5, %%mm1 \n\t"
02772 "movq %%mm6, %%mm2 \n\t"
02773 "psubusb %%mm6, %%mm5 \n\t"
02774 "psubusb %%mm1, %%mm2 \n\t"
02775 "por %%mm5, %%mm2 \n\t"
02776 "psubusb %%mm2, %%mm0 \n\t"
02777 "pcmpeqb %%mm4, %%mm0 \n\t"
02778
02779 "pxor %%mm6, %%mm1 \n\t"
02780 "pand %%mm0, %%mm1 \n\t"
02781 "pxor %%mm1, %%mm6 \n\t"
02782
02783
02784 "movq (%0, %1, 8), %%mm5 \n\t"
02785 "add %1, %0 \n\t"
02786 "movq (%0, %1, 8), %%mm7 \n\t"
02787 "movq %%mm5, %%mm1 \n\t"
02788 "movq %%mm7, %%mm2 \n\t"
02789 "psubusb %%mm7, %%mm5 \n\t"
02790 "psubusb %%mm1, %%mm2 \n\t"
02791 "por %%mm5, %%mm2 \n\t"
02792 "movq %2, %%mm0 \n\t"
02793 "psubusb %%mm2, %%mm0 \n\t"
02794 "pcmpeqb %%mm4, %%mm0 \n\t"
02795
02796 "pxor %%mm7, %%mm1 \n\t"
02797 "pand %%mm0, %%mm1 \n\t"
02798 "pxor %%mm1, %%mm7 \n\t"
02799
02800 "movq %%mm6, %%mm5 \n\t"
02801 "punpckhbw %%mm4, %%mm6 \n\t"
02802 "punpcklbw %%mm4, %%mm5 \n\t"
02803
02804
02805 "movq %%mm5, %%mm0 \n\t"
02806 "movq %%mm6, %%mm1 \n\t"
02807 "psllw $2, %%mm0 \n\t"
02808 "psllw $2, %%mm1 \n\t"
02809 "paddw "MANGLE(w04)", %%mm0 \n\t"
02810 "paddw "MANGLE(w04)", %%mm1 \n\t"
02811
02812 #define NEXT\
02813 "movq (%0), %%mm2 \n\t"\
02814 "movq (%0), %%mm3 \n\t"\
02815 "add %1, %0 \n\t"\
02816 "punpcklbw %%mm4, %%mm2 \n\t"\
02817 "punpckhbw %%mm4, %%mm3 \n\t"\
02818 "paddw %%mm2, %%mm0 \n\t"\
02819 "paddw %%mm3, %%mm1 \n\t"
02820
02821 #define PREV\
02822 "movq (%0), %%mm2 \n\t"\
02823 "movq (%0), %%mm3 \n\t"\
02824 "add %1, %0 \n\t"\
02825 "punpcklbw %%mm4, %%mm2 \n\t"\
02826 "punpckhbw %%mm4, %%mm3 \n\t"\
02827 "psubw %%mm2, %%mm0 \n\t"\
02828 "psubw %%mm3, %%mm1 \n\t"
02829
02830
02831 NEXT
02832 NEXT
02833 NEXT
02834 "movq %%mm0, (%3) \n\t"
02835 "movq %%mm1, 8(%3) \n\t"
02836
02837 NEXT
02838 "psubw %%mm5, %%mm0 \n\t"
02839 "psubw %%mm6, %%mm1 \n\t"
02840 "movq %%mm0, 16(%3) \n\t"
02841 "movq %%mm1, 24(%3) \n\t"
02842
02843 NEXT
02844 "psubw %%mm5, %%mm0 \n\t"
02845 "psubw %%mm6, %%mm1 \n\t"
02846 "movq %%mm0, 32(%3) \n\t"
02847 "movq %%mm1, 40(%3) \n\t"
02848
02849 NEXT
02850 "psubw %%mm5, %%mm0 \n\t"
02851 "psubw %%mm6, %%mm1 \n\t"
02852 "movq %%mm0, 48(%3) \n\t"
02853 "movq %%mm1, 56(%3) \n\t"
02854
02855 NEXT
02856 "psubw %%mm5, %%mm0 \n\t"
02857 "psubw %%mm6, %%mm1 \n\t"
02858 "movq %%mm0, 64(%3) \n\t"
02859 "movq %%mm1, 72(%3) \n\t"
02860
02861 "movq %%mm7, %%mm6 \n\t"
02862 "punpckhbw %%mm4, %%mm7 \n\t"
02863 "punpcklbw %%mm4, %%mm6 \n\t"
02864
02865 NEXT
02866 "mov %4, %0 \n\t"
02867 "add %1, %0 \n\t"
02868 PREV
02869 "movq %%mm0, 80(%3) \n\t"
02870 "movq %%mm1, 88(%3) \n\t"
02871
02872 PREV
02873 "paddw %%mm6, %%mm0 \n\t"
02874 "paddw %%mm7, %%mm1 \n\t"
02875 "movq %%mm0, 96(%3) \n\t"
02876 "movq %%mm1, 104(%3) \n\t"
02877
02878 PREV
02879 "paddw %%mm6, %%mm0 \n\t"
02880 "paddw %%mm7, %%mm1 \n\t"
02881 "movq %%mm0, 112(%3) \n\t"
02882 "movq %%mm1, 120(%3) \n\t"
02883
02884 PREV
02885 "paddw %%mm6, %%mm0 \n\t"
02886 "paddw %%mm7, %%mm1 \n\t"
02887 "movq %%mm0, 128(%3) \n\t"
02888 "movq %%mm1, 136(%3) \n\t"
02889
02890 PREV
02891 "paddw %%mm6, %%mm0 \n\t"
02892 "paddw %%mm7, %%mm1 \n\t"
02893 "movq %%mm0, 144(%3) \n\t"
02894 "movq %%mm1, 152(%3) \n\t"
02895
02896 "mov %4, %0 \n\t"
02897
02898 : "+&r"(src)
02899 : "r" ((long)step), "m" (c->pQPb), "r"(sums), "g"(src)
02900 );
02901
02902 src+= step;
02903
02904 asm volatile(
02905 "movq %4, %%mm6 \n\t"
02906 "pcmpeqb %%mm5, %%mm5 \n\t"
02907 "pxor %%mm6, %%mm5 \n\t"
02908 "pxor %%mm7, %%mm7 \n\t"
02909
02910 "1: \n\t"
02911 "movq (%1), %%mm0 \n\t"
02912 "movq 8(%1), %%mm1 \n\t"
02913 "paddw 32(%1), %%mm0 \n\t"
02914 "paddw 40(%1), %%mm1 \n\t"
02915 "movq (%0, %3), %%mm2 \n\t"
02916 "movq %%mm2, %%mm3 \n\t"
02917 "movq %%mm2, %%mm4 \n\t"
02918 "punpcklbw %%mm7, %%mm2 \n\t"
02919 "punpckhbw %%mm7, %%mm3 \n\t"
02920 "paddw %%mm2, %%mm0 \n\t"
02921 "paddw %%mm3, %%mm1 \n\t"
02922 "paddw %%mm2, %%mm0 \n\t"
02923 "paddw %%mm3, %%mm1 \n\t"
02924 "psrlw $4, %%mm0 \n\t"
02925 "psrlw $4, %%mm1 \n\t"
02926 "packuswb %%mm1, %%mm0 \n\t"
02927 "pand %%mm6, %%mm0 \n\t"
02928 "pand %%mm5, %%mm4 \n\t"
02929 "por %%mm4, %%mm0 \n\t"
02930 "movq %%mm0, (%0, %3) \n\t"
02931 "add $16, %1 \n\t"
02932 "add %2, %0 \n\t"
02933 " js 1b \n\t"
02934
02935 : "+r"(offset), "+r"(temp_sums)
02936 : "r" ((long)step), "r"(src - offset), "m"(both_masks)
02937 );
02938 }else
02939 src+= step;
02940
02941 if(eq_mask != -1LL){
02942 uint8_t *temp_src= src;
02943 asm volatile(
02944 "pxor %%mm7, %%mm7 \n\t"
02945 "lea -40(%%"REG_SP"), %%"REG_c" \n\t"
02946 "and "ALIGN_MASK", %%"REG_c" \n\t"
02947
02948
02949
02950 "movq (%0), %%mm0 \n\t"
02951 "movq %%mm0, %%mm1 \n\t"
02952 "punpcklbw %%mm7, %%mm0 \n\t"
02953 "punpckhbw %%mm7, %%mm1 \n\t"
02954
02955 "movq (%0, %1), %%mm2 \n\t"
02956 "lea (%0, %1, 2), %%"REG_a" \n\t"
02957 "movq %%mm2, %%mm3 \n\t"
02958 "punpcklbw %%mm7, %%mm2 \n\t"
02959 "punpckhbw %%mm7, %%mm3 \n\t"
02960
02961 "movq (%%"REG_a"), %%mm4 \n\t"
02962 "movq %%mm4, %%mm5 \n\t"
02963 "punpcklbw %%mm7, %%mm4 \n\t"
02964 "punpckhbw %%mm7, %%mm5 \n\t"
02965
02966 "paddw %%mm0, %%mm0 \n\t"
02967 "paddw %%mm1, %%mm1 \n\t"
02968 "psubw %%mm4, %%mm2 \n\t"
02969 "psubw %%mm5, %%mm3 \n\t"
02970 "psubw %%mm2, %%mm0 \n\t"
02971 "psubw %%mm3, %%mm1 \n\t"
02972
02973 "psllw $2, %%mm2 \n\t"
02974 "psllw $2, %%mm3 \n\t"
02975 "psubw %%mm2, %%mm0 \n\t"
02976 "psubw %%mm3, %%mm1 \n\t"
02977
02978 "movq (%%"REG_a", %1), %%mm2 \n\t"
02979 "movq %%mm2, %%mm3 \n\t"
02980 "punpcklbw %%mm7, %%mm2 \n\t"
02981 "punpckhbw %%mm7, %%mm3 \n\t"
02982
02983 "psubw %%mm2, %%mm0 \n\t"
02984 "psubw %%mm3, %%mm1 \n\t"
02985 "psubw %%mm2, %%mm0 \n\t"
02986 "psubw %%mm3, %%mm1 \n\t"
02987 "movq %%mm0, (%%"REG_c") \n\t"
02988 "movq %%mm1, 8(%%"REG_c") \n\t"
02989
02990 "movq (%%"REG_a", %1, 2), %%mm0 \n\t"
02991 "movq %%mm0, %%mm1 \n\t"
02992 "punpcklbw %%mm7, %%mm0 \n\t"
02993 "punpckhbw %%mm7, %%mm1 \n\t"
02994
02995 "psubw %%mm0, %%mm2 \n\t"
02996 "psubw %%mm1, %%mm3 \n\t"
02997 "movq %%mm2, 16(%%"REG_c") \n\t"
02998 "movq %%mm3, 24(%%"REG_c") \n\t"
02999 "paddw %%mm4, %%mm4 \n\t"
03000 "paddw %%mm5, %%mm5 \n\t"
03001 "psubw %%mm2, %%mm4 \n\t"
03002 "psubw %%mm3, %%mm5 \n\t"
03003
03004 "lea (%%"REG_a", %1), %0 \n\t"
03005 "psllw $2, %%mm2 \n\t"
03006 "psllw $2, %%mm3 \n\t"
03007 "psubw %%mm2, %%mm4 \n\t"
03008 "psubw %%mm3, %%mm5 \n\t"
03009
03010 "movq (%0, %1, 2), %%mm2 \n\t"
03011 "movq %%mm2, %%mm3 \n\t"
03012 "punpcklbw %%mm7, %%mm2 \n\t"
03013 "punpckhbw %%mm7, %%mm3 \n\t"
03014 "psubw %%mm2, %%mm4 \n\t"
03015 "psubw %%mm3, %%mm5 \n\t"
03016 "psubw %%mm2, %%mm4 \n\t"
03017 "psubw %%mm3, %%mm5 \n\t"
03018
03019 "movq (%%"REG_a", %1, 4), %%mm6 \n\t"
03020 "punpcklbw %%mm7, %%mm6 \n\t"
03021 "psubw %%mm6, %%mm2 \n\t"
03022 "movq (%%"REG_a", %1, 4), %%mm6 \n\t"
03023 "punpckhbw %%mm7, %%mm6 \n\t"
03024 "psubw %%mm6, %%mm3 \n\t"
03025
03026 "paddw %%mm0, %%mm0 \n\t"
03027 "paddw %%mm1, %%mm1 \n\t"
03028 "psubw %%mm2, %%mm0 \n\t"
03029 "psubw %%mm3, %%mm1 \n\t"
03030
03031 "psllw $2, %%mm2 \n\t"
03032 "psllw $2, %%mm3 \n\t"
03033 "psubw %%mm2, %%mm0 \n\t"
03034 "psubw %%mm3, %%mm1 \n\t"
03035
03036 "movq (%0, %1, 4), %%mm2 \n\t"
03037 "movq %%mm2, %%mm3 \n\t"
03038 "punpcklbw %%mm7, %%mm2 \n\t"
03039 "punpckhbw %%mm7, %%mm3 \n\t"
03040
03041 "paddw %%mm2, %%mm2 \n\t"
03042 "paddw %%mm3, %%mm3 \n\t"
03043 "psubw %%mm2, %%mm0 \n\t"
03044 "psubw %%mm3, %%mm1 \n\t"
03045
03046 "movq (%%"REG_c"), %%mm2 \n\t"
03047 "movq 8(%%"REG_c"), %%mm3 \n\t"
03048
03049 #ifdef HAVE_MMX2
03050 "movq %%mm7, %%mm6 \n\t"
03051 "psubw %%mm0, %%mm6 \n\t"
03052 "pmaxsw %%mm6, %%mm0 \n\t"
03053 "movq %%mm7, %%mm6 \n\t"
03054 "psubw %%mm1, %%mm6 \n\t"
03055 "pmaxsw %%mm6, %%mm1 \n\t"
03056 "movq %%mm7, %%mm6 \n\t"
03057 "psubw %%mm2, %%mm6 \n\t"
03058 "pmaxsw %%mm6, %%mm2 \n\t"
03059 "movq %%mm7, %%mm6 \n\t"
03060 "psubw %%mm3, %%mm6 \n\t"
03061 "pmaxsw %%mm6, %%mm3 \n\t"
03062 #else
03063 "movq %%mm7, %%mm6 \n\t"
03064 "pcmpgtw %%mm0, %%mm6 \n\t"
03065 "pxor %%mm6, %%mm0 \n\t"
03066 "psubw %%mm6, %%mm0 \n\t"
03067 "movq %%mm7, %%mm6 \n\t"
03068 "pcmpgtw %%mm1, %%mm6 \n\t"
03069 "pxor %%mm6, %%mm1 \n\t"
03070 "psubw %%mm6, %%mm1 \n\t"
03071 "movq %%mm7, %%mm6 \n\t"
03072 "pcmpgtw %%mm2, %%mm6 \n\t"
03073 "pxor %%mm6, %%mm2 \n\t"
03074 "psubw %%mm6, %%mm2 \n\t"
03075 "movq %%mm7, %%mm6 \n\t"
03076 "pcmpgtw %%mm3, %%mm6 \n\t"
03077 "pxor %%mm6, %%mm3 \n\t"
03078 "psubw %%mm6, %%mm3 \n\t"
03079 #endif
03080
03081 #ifdef HAVE_MMX2
03082 "pminsw %%mm2, %%mm0 \n\t"
03083 "pminsw %%mm3, %%mm1 \n\t"
03084 #else
03085 "movq %%mm0, %%mm6 \n\t"
03086 "psubusw %%mm2, %%mm6 \n\t"
03087 "psubw %%mm6, %%mm0 \n\t"
03088 "movq %%mm1, %%mm6 \n\t"
03089 "psubusw %%mm3, %%mm6 \n\t"
03090 "psubw %%mm6, %%mm1 \n\t"
03091 #endif
03092
03093 "movd %2, %%mm2 \n\t"
03094 "punpcklbw %%mm7, %%mm2 \n\t"
03095
03096 "movq %%mm7, %%mm6 \n\t"
03097 "pcmpgtw %%mm4, %%mm6 \n\t"
03098 "pxor %%mm6, %%mm4 \n\t"
03099 "psubw %%mm6, %%mm4 \n\t"
03100 "pcmpgtw %%mm5, %%mm7 \n\t"
03101 "pxor %%mm7, %%mm5 \n\t"
03102 "psubw %%mm7, %%mm5 \n\t"
03103
03104 "psllw $3, %%mm2 \n\t"
03105 "movq %%mm2, %%mm3 \n\t"
03106 "pcmpgtw %%mm4, %%mm2 \n\t"
03107 "pcmpgtw %%mm5, %%mm3 \n\t"
03108 "pand %%mm2, %%mm4 \n\t"
03109 "pand %%mm3, %%mm5 \n\t"
03110
03111
03112 "psubusw %%mm0, %%mm4 \n\t"
03113 "psubusw %%mm1, %%mm5 \n\t"
03114
03115
03116 "movq "MANGLE(w05)", %%mm2 \n\t"
03117 "pmullw %%mm2, %%mm4 \n\t"
03118 "pmullw %%mm2, %%mm5 \n\t"
03119 "movq "MANGLE(w20)", %%mm2 \n\t"
03120 "paddw %%mm2, %%mm4 \n\t"
03121 "paddw %%mm2, %%mm5 \n\t"
03122 "psrlw $6, %%mm4 \n\t"
03123 "psrlw $6, %%mm5 \n\t"
03124
03125 "movq 16(%%"REG_c"), %%mm0 \n\t"
03126 "movq 24(%%"REG_c"), %%mm1 \n\t"
03127
03128 "pxor %%mm2, %%mm2 \n\t"
03129 "pxor %%mm3, %%mm3 \n\t"
03130
03131 "pcmpgtw %%mm0, %%mm2 \n\t"
03132 "pcmpgtw %%mm1, %%mm3 \n\t"
03133 "pxor %%mm2, %%mm0 \n\t"
03134 "pxor %%mm3, %%mm1 \n\t"
03135 "psubw %%mm2, %%mm0 \n\t"
03136 "psubw %%mm3, %%mm1 \n\t"
03137 "psrlw $1, %%mm0 \n\t"
03138 "psrlw $1, %%mm1 \n\t"
03139
03140 "pxor %%mm6, %%mm2 \n\t"
03141 "pxor %%mm7, %%mm3 \n\t"
03142 "pand %%mm2, %%mm4 \n\t"
03143 "pand %%mm3, %%mm5 \n\t"
03144
03145 #ifdef HAVE_MMX2
03146 "pminsw %%mm0, %%mm4 \n\t"
03147 "pminsw %%mm1, %%mm5 \n\t"
03148 #else
03149 "movq %%mm4, %%mm2 \n\t"
03150 "psubusw %%mm0, %%mm2 \n\t"
03151 "psubw %%mm2, %%mm4 \n\t"
03152 "movq %%mm5, %%mm2 \n\t"
03153 "psubusw %%mm1, %%mm2 \n\t"
03154 "psubw %%mm2, %%mm5 \n\t"
03155 #endif
03156 "pxor %%mm6, %%mm4 \n\t"
03157 "pxor %%mm7, %%mm5 \n\t"
03158 "psubw %%mm6, %%mm4 \n\t"
03159 "psubw %%mm7, %%mm5 \n\t"
03160 "packsswb %%mm5, %%mm4 \n\t"
03161 "movq %3, %%mm1 \n\t"
03162 "pandn %%mm4, %%mm1 \n\t"
03163 "movq (%0), %%mm0 \n\t"
03164 "paddb %%mm1, %%mm0 \n\t"
03165 "movq %%mm0, (%0) \n\t"
03166 "movq (%0, %1), %%mm0 \n\t"
03167 "psubb %%mm1, %%mm0 \n\t"
03168 "movq %%mm0, (%0, %1) \n\t"
03169
03170 : "+r" (temp_src)
03171 : "r" ((long)step), "m" (c->pQPb), "m"(eq_mask)
03172 : "%"REG_a, "%"REG_c
03173 );
03174 }
03175
03176
03177
03178
03179
03180 }
03181 #endif //HAVE_MMX
03182
03183 static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
03184 const QP_STORE_T QPs[], int QPStride, int isColor, PPContext *c);
03185
03190 #undef SCALED_CPY
03191
03192 static inline void RENAME(blockCopy)(uint8_t dst[], int dstStride, const uint8_t src[], int srcStride,
03193 int levelFix, int64_t *packedOffsetAndScale)
03194 {
03195 #ifndef HAVE_MMX
03196 int i;
03197 #endif
03198 if(levelFix)
03199 {
03200 #ifdef HAVE_MMX
03201 asm volatile(
03202 "movq (%%"REG_a"), %%mm2 \n\t"
03203 "movq 8(%%"REG_a"), %%mm3 \n\t"
03204 "lea (%2,%4), %%"REG_a" \n\t"
03205 "lea (%3,%5), %%"REG_d" \n\t"
03206 "pxor %%mm4, %%mm4 \n\t"
03207 #ifdef HAVE_MMX2
03208 #define REAL_SCALED_CPY(src1, src2, dst1, dst2) \
03209 "movq " #src1 ", %%mm0 \n\t"\
03210 "movq " #src1 ", %%mm5 \n\t"\
03211 "movq " #src2 ", %%mm1 \n\t"\
03212 "movq " #src2 ", %%mm6 \n\t"\
03213 "punpcklbw %%mm0, %%mm0 \n\t"\
03214 "punpckhbw %%mm5, %%mm5 \n\t"\
03215 "punpcklbw %%mm1, %%mm1 \n\t"\
03216 "punpckhbw %%mm6, %%mm6 \n\t"\
03217 "pmulhuw %%mm3, %%mm0 \n\t"\
03218 "pmulhuw %%mm3, %%mm5 \n\t"\
03219 "pmulhuw %%mm3, %%mm1 \n\t"\
03220 "pmulhuw %%mm3, %%mm6 \n\t"\
03221 "psubw %%mm2, %%mm0 \n\t"\
03222 "psubw %%mm2, %%mm5 \n\t"\
03223 "psubw %%mm2, %%mm1 \n\t"\
03224 "psubw %%mm2, %%mm6 \n\t"\
03225 "packuswb %%mm5, %%mm0 \n\t"\
03226 "packuswb %%mm6, %%mm1 \n\t"\
03227 "movq %%mm0, " #dst1 " \n\t"\
03228 "movq %%mm1, " #dst2 " \n\t"\
03229
03230 #else //HAVE_MMX2
03231 #define REAL_SCALED_CPY(src1, src2, dst1, dst2) \
03232 "movq " #src1 ", %%mm0 \n\t"\
03233 "movq " #src1 ", %%mm5 \n\t"\
03234 "punpcklbw %%mm4, %%mm0 \n\t"\
03235 "punpckhbw %%mm4, %%mm5 \n\t"\
03236 "psubw %%mm2, %%mm0 \n\t"\
03237 "psubw %%mm2, %%mm5 \n\t"\
03238 "movq " #src2 ", %%mm1 \n\t"\
03239 "psllw $6, %%mm0 \n\t"\
03240 "psllw $6, %%mm5 \n\t"\
03241 "pmulhw %%mm3, %%mm0 \n\t"\
03242 "movq " #src2 ", %%mm6 \n\t"\
03243 "pmulhw %%mm3, %%mm5 \n\t"\
03244 "punpcklbw %%mm4, %%mm1 \n\t"\
03245 "punpckhbw %%mm4, %%mm6 \n\t"\
03246 "psubw %%mm2, %%mm1 \n\t"\
03247 "psubw %%mm2, %%mm6 \n\t"\
03248 "psllw $6, %%mm1 \n\t"\
03249 "psllw $6, %%mm6 \n\t"\
03250 "pmulhw %%mm3, %%mm1 \n\t"\
03251 "pmulhw %%mm3, %%mm6 \n\t"\
03252 "packuswb %%mm5, %%mm0 \n\t"\
03253 "packuswb %%mm6, %%mm1 \n\t"\
03254 "movq %%mm0, " #dst1 " \n\t"\
03255 "movq %%mm1, " #dst2 " \n\t"\
03256
03257 #endif //HAVE_MMX2
03258 #define SCALED_CPY(src1, src2, dst1, dst2)\
03259 REAL_SCALED_CPY(src1, src2, dst1, dst2)
03260
03261 SCALED_CPY((%2) , (%2, %4) , (%3) , (%3, %5))
03262 SCALED_CPY((%2, %4, 2), (%%REGa, %4, 2), (%3, %5, 2), (%%REGd, %5, 2))
03263 SCALED_CPY((%2, %4, 4), (%%REGa, %4, 4), (%3, %5, 4), (%%REGd, %5, 4))
03264 "lea (%%"REG_a",%4,4), %%"REG_a" \n\t"
03265 "lea (%%"REG_d",%5,4), %%"REG_d" \n\t"
03266 SCALED_CPY((%%REGa, %4), (%%REGa, %4, 2), (%%REGd, %5), (%%REGd, %5, 2))
03267
03268
03269 : "=&a" (packedOffsetAndScale)
03270 : "0" (packedOffsetAndScale),
03271 "r"(src),
03272 "r"(dst),
03273 "r" ((long)srcStride),
03274 "r" ((long)dstStride)
03275 : "%"REG_d
03276 );
03277 #else //HAVE_MMX
03278 for(i=0; i<8; i++)
03279 memcpy( &(dst[dstStride*i]),
03280 &(src[srcStride*i]), BLOCK_SIZE);
03281 #endif //HAVE_MMX
03282 }
03283 else
03284 {
03285 #ifdef HAVE_MMX
03286 asm volatile(
03287 "lea (%0,%2), %%"REG_a" \n\t"
03288 "lea (%1,%3), %%"REG_d" \n\t"
03289
03290 #define REAL_SIMPLE_CPY(src1, src2, dst1, dst2) \
03291 "movq " #src1 ", %%mm0 \n\t"\
03292 "movq " #src2 ", %%mm1 \n\t"\
03293 "movq %%mm0, " #dst1 " \n\t"\
03294 "movq %%mm1, " #dst2 " \n\t"\
03295
03296 #define SIMPLE_CPY(src1, src2, dst1, dst2)\
03297 REAL_SIMPLE_CPY(src1, src2, dst1, dst2)
03298
03299 SIMPLE_CPY((%0) , (%0, %2) , (%1) , (%1, %3))
03300 SIMPLE_CPY((%0, %2, 2), (%%REGa, %2, 2), (%1, %3, 2), (%%REGd, %3, 2))
03301 SIMPLE_CPY((%0, %2, 4), (%%REGa, %2, 4), (%1, %3, 4), (%%REGd, %3, 4))
03302 "lea (%%"REG_a",%2,4), %%"REG_a" \n\t"
03303 "lea (%%"REG_d",%3,4), %%"REG_d" \n\t"
03304 SIMPLE_CPY((%%REGa, %2), (%%REGa, %2, 2), (%%REGd, %3), (%%REGd, %3, 2))
03305
03306 : : "r" (src),
03307 "r" (dst),
03308 "r" ((long)srcStride),
03309 "r" ((long)dstStride)
03310 : "%"REG_a, "%"REG_d
03311 );
03312 #else //HAVE_MMX
03313 for(i=0; i<8; i++)
03314 memcpy( &(dst[dstStride*i]),
03315 &(src[srcStride*i]), BLOCK_SIZE);
03316 #endif //HAVE_MMX
03317 }
03318 }
03319
03323 static inline void RENAME(duplicate)(uint8_t src[], int stride)
03324 {
03325 #ifdef HAVE_MMX
03326 asm volatile(
03327 "movq (%0), %%mm0 \n\t"
03328 "add %1, %0 \n\t"
03329 "movq %%mm0, (%0) \n\t"
03330 "movq %%mm0, (%0, %1) \n\t"
03331 "movq %%mm0, (%0, %1, 2) \n\t"
03332 : "+r" (src)
03333 : "r" ((long)-stride)
03334 );
03335 #else
03336 int i;
03337 uint8_t *p=src;
03338 for(i=0; i<3; i++)
03339 {
03340 p-= stride;
03341 memcpy(p, src, 8);
03342 }
03343 #endif
03344 }
03345
03349 static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
03350 const QP_STORE_T QPs[], int QPStride, int isColor, PPContext *c2)
03351 {
03352 DECLARE_ALIGNED(8, PPContext, c)= *c2;
03353 int x,y;
03354 #ifdef COMPILE_TIME_MODE
03355 const int mode= COMPILE_TIME_MODE;
03356 #else
03357 const int mode= isColor ? c.ppMode.chromMode : c.ppMode.lumMode;
03358 #endif
03359 int black=0, white=255;
03360 int QPCorrecture= 256*256;
03361
03362 int copyAhead;
03363 #ifdef HAVE_MMX
03364 int i;
03365 #endif
03366
03367 const int qpHShift= isColor ? 4-c.hChromaSubSample : 4;
03368 const int qpVShift= isColor ? 4-c.vChromaSubSample : 4;
03369
03370
03371 uint64_t * const yHistogram= c.yHistogram;
03372 uint8_t * const tempSrc= srcStride > 0 ? c.tempSrc : c.tempSrc - 23*srcStride;
03373 uint8_t * const tempDst= dstStride > 0 ? c.tempDst : c.tempDst - 23*dstStride;
03374
03375
03376 #ifdef HAVE_MMX
03377 for(i=0; i<57; i++){
03378 int offset= ((i*c.ppMode.baseDcDiff)>>8) + 1;
03379 int threshold= offset*2 + 1;
03380 c.mmxDcOffset[i]= 0x7F - offset;
03381 c.mmxDcThreshold[i]= 0x7F - threshold;
03382 c.mmxDcOffset[i]*= 0x0101010101010101LL;
03383 c.mmxDcThreshold[i]*= 0x0101010101010101LL;
03384 }
03385 #endif
03386
03387 if(mode & CUBIC_IPOL_DEINT_FILTER) copyAhead=16;
03388 else if( (mode & LINEAR_BLEND_DEINT_FILTER)
03389 || (mode & FFMPEG_DEINT_FILTER)
03390 || (mode & LOWPASS5_DEINT_FILTER)) copyAhead=14;
03391 else if( (mode & V_DEBLOCK)
03392 || (mode & LINEAR_IPOL_DEINT_FILTER)
03393 || (mode & MEDIAN_DEINT_FILTER)
03394 || (mode & V_A_DEBLOCK)) copyAhead=13;
03395 else if(mode & V_X1_FILTER) copyAhead=11;
03396
03397 else if(mode & DERING) copyAhead=9;
03398 else copyAhead=8;
03399
03400 copyAhead-= 8;
03401
03402 if(!isColor)
03403 {
03404 uint64_t sum= 0;
03405 int i;
03406 uint64_t maxClipped;
03407 uint64_t clipped;
03408 double scale;
03409
03410 c.frameNum++;
03411
03412 if(c.frameNum == 1) yHistogram[0]= width*height/64*15/256;
03413
03414 for(i=0; i<256; i++)
03415 {
03416 sum+= yHistogram[i];
03417 }
03418
03419
03420 maxClipped= (uint64_t)(sum * c.ppMode.maxClippedThreshold);
03421
03422 clipped= sum;
03423 for(black=255; black>0; black--)
03424 {
03425 if(clipped < maxClipped) break;
03426 clipped-= yHistogram[black];
03427 }
03428
03429 clipped= sum;
03430 for(white=0; white<256; white++)
03431 {
03432 if(clipped < maxClipped) break;
03433 clipped-= yHistogram[white];
03434 }
03435
03436 scale= (double)(c.ppMode.maxAllowedY - c.ppMode.minAllowedY) / (double)(white-black);
03437
03438 #ifdef HAVE_MMX2
03439 c.packedYScale= (uint16_t)(scale*256.0 + 0.5);
03440 c.packedYOffset= (((black*c.packedYScale)>>8) - c.ppMode.minAllowedY) & 0xFFFF;
03441 #else
03442 c.packedYScale= (uint16_t)(scale*1024.0 + 0.5);
03443 c.packedYOffset= (black - c.ppMode.minAllowedY) & 0xFFFF;
03444 #endif
03445
03446 c.packedYOffset|= c.packedYOffset<<32;
03447 c.packedYOffset|= c.packedYOffset<<16;
03448
03449 c.packedYScale|= c.packedYScale<<32;
03450 c.packedYScale|= c.packedYScale<<16;
03451
03452 if(mode & LEVEL_FIX) QPCorrecture= (int)(scale*256*256 + 0.5);
03453 else QPCorrecture= 256*256;
03454 }
03455 else
03456 {
03457 c.packedYScale= 0x0100010001000100LL;
03458 c.packedYOffset= 0;
03459 QPCorrecture= 256*256;
03460 }
03461
03462
03463 y=-BLOCK_SIZE;
03464 {
03465 const uint8_t *srcBlock= &(src[y*srcStride]);
03466 uint8_t *dstBlock= tempDst + dstStride;
03467
03468
03469
03470
03471 for(x=0; x<width; x+=BLOCK_SIZE)
03472 {
03473
03474 #ifdef HAVE_MMX2
03475
03476
03477
03478
03479
03480
03481
03482 asm(
03483 "mov %4, %%"REG_a" \n\t"
03484 "shr $2, %%"REG_a" \n\t"
03485 "and $6, %%"REG_a" \n\t"
03486 "add %5, %%"REG_a" \n\t"
03487 "mov %%"REG_a", %%"REG_d" \n\t"
03488 "imul %1, %%"REG_a" \n\t"
03489 "imul %3, %%"REG_d" \n\t"
03490 "prefetchnta 32(%%"REG_a", %0) \n\t"
03491 "prefetcht0 32(%%"REG_d", %2) \n\t"
03492 "add %1, %%"REG_a" \n\t"
03493 "add %3, %%"REG_d" \n\t"
03494 "prefetchnta 32(%%"REG_a", %0) \n\t"
03495 "prefetcht0 32(%%"REG_d", %2) \n\t"
03496 :: "r" (srcBlock), "r" ((long)srcStride), "r" (dstBlock), "r" ((long)dstStride),
03497 "g" ((long)x), "g" ((long)copyAhead)
03498 : "%"REG_a, "%"REG_d
03499 );
03500
03501 #elif defined(HAVE_3DNOW)
03502
03503
03504
03505
03506
03507
03508 #endif
03509
03510 RENAME(blockCopy)(dstBlock + dstStride*8, dstStride,
03511 srcBlock + srcStride*8, srcStride, mode & LEVEL_FIX, &c.packedYOffset);
03512
03513 RENAME(duplicate)(dstBlock + dstStride*8, dstStride);
03514
03515 if(mode & LINEAR_IPOL_DEINT_FILTER)
03516 RENAME(deInterlaceInterpolateLinear)(dstBlock, dstStride);
03517 else if(mode & LINEAR_BLEND_DEINT_FILTER)
03518 RENAME(deInterlaceBlendLinear)(dstBlock, dstStride, c.deintTemp + x);
03519 else if(mode & MEDIAN_DEINT_FILTER)
03520 RENAME(deInterlaceMedian)(dstBlock, dstStride);
03521 else if(mode & CUBIC_IPOL_DEINT_FILTER)
03522 RENAME(deInterlaceInterpolateCubic)(dstBlock, dstStride);
03523 else if(mode & FFMPEG_DEINT_FILTER)
03524 RENAME(deInterlaceFF)(dstBlock, dstStride, c.deintTemp + x);
03525 else if(mode & LOWPASS5_DEINT_FILTER)
03526 RENAME(deInterlaceL5)(dstBlock, dstStride, c.deintTemp + x, c.deintTemp + width + x);
03527
03528
03529
03530 dstBlock+=8;
03531 srcBlock+=8;
03532 }
03533 if(width==FFABS(dstStride))
03534 linecpy(dst, tempDst + 9*dstStride, copyAhead, dstStride);
03535 else
03536 {
03537 int i;
03538 for(i=0; i<copyAhead; i++)
03539 {
03540 memcpy(dst + i*dstStride, tempDst + (9+i)*dstStride, width);
03541 }
03542 }
03543 }
03544
03545 for(y=0; y<height; y+=BLOCK_SIZE)
03546 {
03547
03548 const uint8_t *srcBlock= &(src[y*srcStride]);
03549 uint8_t *dstBlock= &(dst[y*dstStride]);
03550 #ifdef HAVE_MMX
03551 uint8_t *tempBlock1= c.tempBlocks;
03552 uint8_t *tempBlock2= c.tempBlocks + 8;
03553 #endif
03554 const int8_t *QPptr= &QPs[(y>>qpVShift)*QPStride];
03555 int8_t *nonBQPptr= &c.nonBQPTable[(y>>qpVShift)*FFABS(QPStride)];
03556 int QP=0;
03557
03558
03559 if(y+15 >= height)
03560 {
03561 int i;
03562
03563
03564 linecpy(tempSrc + srcStride*copyAhead, srcBlock + srcStride*copyAhead,
03565 FFMAX(height-y-copyAhead, 0), srcStride);
03566
03567
03568 for(i=FFMAX(height-y, 8); i<copyAhead+8; i++)
03569 memcpy(tempSrc + srcStride*i, src + srcStride*(height-1), FFABS(srcStride));
03570
03571
03572 linecpy(tempDst, dstBlock - dstStride, FFMIN(height-y+1, copyAhead+1), dstStride);
03573
03574
03575 for(i=height-y+1; i<=copyAhead; i++)
03576 memcpy(tempDst + dstStride*i, dst + dstStride*(height-1), FFABS(dstStride));
03577
03578 dstBlock= tempDst + dstStride;
03579 srcBlock= tempSrc;
03580 }
03581
03582
03583
03584
03585 for(x=0; x<width; x+=BLOCK_SIZE)
03586 {
03587 const int stride= dstStride;
03588 #ifdef HAVE_MMX
03589 uint8_t *tmpXchg;
03590 #endif
03591 if(isColor)
03592 {
03593 QP= QPptr[x>>qpHShift];
03594 c.nonBQP= nonBQPptr[x>>qpHShift];
03595 }
03596 else
03597 {
03598 QP= QPptr[x>>4];
03599 QP= (QP* QPCorrecture + 256*128)>>16;
03600 c.nonBQP= nonBQPptr[x>>4];
03601 c.nonBQP= (c.nonBQP* QPCorrecture + 256*128)>>16;
03602 yHistogram[ srcBlock[srcStride*12 + 4] ]++;
03603 }
03604 c.QP= QP;
03605 #ifdef HAVE_MMX
03606 asm volatile(
03607 "movd %1, %%mm7 \n\t"
03608 "packuswb %%mm7, %%mm7 \n\t"
03609 "packuswb %%mm7, %%mm7 \n\t"
03610 "packuswb %%mm7, %%mm7 \n\t"
03611 "movq %%mm7, %0 \n\t"
03612 : "=m" (c.pQPb)
03613 : "r" (QP)
03614 );
03615 #endif
03616
03617
03618 #ifdef HAVE_MMX2
03619
03620
03621
03622
03623
03624
03625
03626 asm(
03627 "mov %4, %%"REG_a" \n\t"
03628 "shr $2, %%"REG_a" \n\t"
03629 "and $6, %%"REG_a" \n\t"
03630 "add %5, %%"REG_a" \n\t"
03631 "mov %%"REG_a", %%"REG_d" \n\t"
03632 "imul %1, %%"REG_a" \n\t"
03633 "imul %3, %%"REG_d" \n\t"
03634 "prefetchnta 32(%%"REG_a", %0) \n\t"
03635 "prefetcht0 32(%%"REG_d", %2) \n\t"
03636 "add %1, %%"REG_a" \n\t"
03637 "add %3, %%"REG_d" \n\t"
03638 "prefetchnta 32(%%"REG_a", %0) \n\t"
03639 "prefetcht0 32(%%"REG_d", %2) \n\t"
03640 :: "r" (srcBlock), "r" ((long)srcStride), "r" (dstBlock), "r" ((long)dstStride),
03641 "g" ((long)x), "g" ((long)copyAhead)
03642 : "%"REG_a, "%"REG_d
03643 );
03644
03645 #elif defined(HAVE_3DNOW)
03646
03647
03648
03649
03650
03651
03652 #endif
03653
03654 RENAME(blockCopy)(dstBlock + dstStride*copyAhead, dstStride,
03655 srcBlock + srcStride*copyAhead, srcStride, mode & LEVEL_FIX, &c.packedYOffset);
03656
03657 if(mode & LINEAR_IPOL_DEINT_FILTER)
03658 RENAME(deInterlaceInterpolateLinear)(dstBlock, dstStride);
03659 else if(mode & LINEAR_BLEND_DEINT_FILTER)
03660 RENAME(deInterlaceBlendLinear)(dstBlock, dstStride, c.deintTemp + x);
03661 else if(mode & MEDIAN_DEINT_FILTER)
03662 RENAME(deInterlaceMedian)(dstBlock, dstStride);
03663 else if(mode & CUBIC_IPOL_DEINT_FILTER)
03664 RENAME(deInterlaceInterpolateCubic)(dstBlock, dstStride);
03665 else if(mode & FFMPEG_DEINT_FILTER)
03666 RENAME(deInterlaceFF)(dstBlock, dstStride, c.deintTemp + x);
03667 else if(mode & LOWPASS5_DEINT_FILTER)
03668 RENAME(deInterlaceL5)(dstBlock, dstStride, c.deintTemp + x, c.deintTemp + width + x);
03669
03670
03671
03672
03673
03674 if(y + 8 < height)
03675 {
03676 if(mode & V_X1_FILTER)
03677 RENAME(vertX1Filter)(dstBlock, stride, &c);
03678 else if(mode & V_DEBLOCK)
03679 {
03680 const int t= RENAME(vertClassify)(dstBlock, stride, &c);
03681
03682 if(t==1)
03683 RENAME(doVertLowPass)(dstBlock, stride, &c);
03684 else if(t==2)
03685 RENAME(doVertDefFilter)(dstBlock, stride, &c);
03686 }else if(mode & V_A_DEBLOCK){
03687 RENAME(do_a_deblock)(dstBlock, stride, 1, &c);
03688 }
03689 }
03690
03691 #ifdef HAVE_MMX
03692 RENAME(transpose1)(tempBlock1, tempBlock2, dstBlock, dstStride);
03693 #endif
03694
03695 if(x - 8 >= 0)
03696 {
03697 #ifdef HAVE_MMX
03698 if(mode & H_X1_FILTER)
03699 RENAME(vertX1Filter)(tempBlock1, 16, &c);
03700 else if(mode & H_DEBLOCK)
03701 {
03702
03703 const int t= RENAME(vertClassify)(tempBlock1, 16, &c);
03704
03705 if(t==1)
03706 RENAME(doVertLowPass)(tempBlock1, 16, &c);
03707 else if(t==2)
03708 RENAME(doVertDefFilter)(tempBlock1, 16, &c);
03709 }else if(mode & H_A_DEBLOCK){
03710 RENAME(do_a_deblock)(tempBlock1, 16, 1, &c);
03711 }
03712
03713 RENAME(transpose2)(dstBlock-4, dstStride, tempBlock1 + 4*16);
03714
03715 #else
03716 if(mode & H_X1_FILTER)
03717 horizX1Filter(dstBlock-4, stride, QP);
03718 else if(mode & H_DEBLOCK)
03719 {
03720 #ifdef HAVE_ALTIVEC
03721 DECLARE_ALIGNED(16, unsigned char, tempBlock[272]);
03722 transpose_16x8_char_toPackedAlign_altivec(tempBlock, dstBlock - (4 + 1), stride);
03723
03724 const int t=vertClassify_altivec(tempBlock-48, 16, &c);
03725 if(t==1) {
03726 doVertLowPass_altivec(tempBlock-48, 16, &c);
03727 transpose_8x16_char_fromPackedAlign_altivec(dstBlock - (4 + 1), tempBlock, stride);
03728 }
03729 else if(t==2) {
03730 doVertDefFilter_altivec(tempBlock-48, 16, &c);
03731 transpose_8x16_char_fromPackedAlign_altivec(dstBlock - (4 + 1), tempBlock, stride);
03732 }
03733 #else
03734 const int t= RENAME(horizClassify)(dstBlock-4, stride, &c);
03735
03736 if(t==1)
03737 RENAME(doHorizLowPass)(dstBlock-4, stride, &c);
03738 else if(t==2)
03739 RENAME(doHorizDefFilter)(dstBlock-4, stride, &c);
03740 #endif
03741 }else if(mode & H_A_DEBLOCK){
03742 RENAME(do_a_deblock)(dstBlock-8, 1, stride, &c);
03743 }
03744 #endif //HAVE_MMX
03745 if(mode & DERING)
03746 {
03747
03748 if(y>0) RENAME(dering)(dstBlock - stride - 8, stride, &c);
03749 }
03750
03751 if(mode & TEMP_NOISE_FILTER)
03752 {
03753 RENAME(tempNoiseReducer)(dstBlock-8, stride,
03754 c.tempBlured[isColor] + y*dstStride + x,
03755 c.tempBluredPast[isColor] + (y>>3)*256 + (x>>3),
03756 c.ppMode.maxTmpNoise);
03757 }
03758 }
03759
03760 dstBlock+=8;
03761 srcBlock+=8;
03762
03763 #ifdef HAVE_MMX
03764 tmpXchg= tempBlock1;
03765 tempBlock1= tempBlock2;
03766 tempBlock2 = tmpXchg;
03767 #endif
03768 }
03769
03770 if(mode & DERING)
03771 {
03772 if(y > 0) RENAME(dering)(dstBlock - dstStride - 8, dstStride, &c);
03773 }
03774
03775 if((mode & TEMP_NOISE_FILTER))
03776 {
03777 RENAME(tempNoiseReducer)(dstBlock-8, dstStride,
03778 c.tempBlured[isColor] + y*dstStride + x,
03779 c.tempBluredPast[isColor] + (y>>3)*256 + (x>>3),
03780 c.ppMode.maxTmpNoise);
03781 }
03782
03783
03784 if(y+15 >= height)
03785 {
03786 uint8_t *dstBlock= &(dst[y*dstStride]);
03787 if(width==FFABS(dstStride))
03788 linecpy(dstBlock, tempDst + dstStride, height-y, dstStride);
03789 else
03790 {
03791 int i;
03792 for(i=0; i<height-y; i++)
03793 {
03794 memcpy(dstBlock + i*dstStride, tempDst + (i+1)*dstStride, width);
03795 }
03796 }
03797 }
03798
03799
03800
03801
03802
03803
03804
03805
03806
03807
03808 }
03809 #ifdef HAVE_3DNOW
03810 asm volatile("femms");
03811 #elif defined (HAVE_MMX)
03812 asm volatile("emms");
03813 #endif
03814
03815 #ifdef DEBUG_BRIGHTNESS
03816 if(!isColor)
03817 {
03818 int max=1;
03819 int i;
03820 for(i=0; i<256; i++)
03821 if(yHistogram[i] > max) max=yHistogram[i];
03822
03823 for(i=1; i<256; i++)
03824 {
03825 int x;
03826 int start=yHistogram[i-1]/(max/256+1);
03827 int end=yHistogram[i]/(max/256+1);
03828 int inc= end > start ? 1 : -1;
03829 for(x=start; x!=end+inc; x+=inc)
03830 dst[ i*dstStride + x]+=128;
03831 }
03832
03833 for(i=0; i<100; i+=2)
03834 {
03835 dst[ (white)*dstStride + i]+=128;
03836 dst[ (black)*dstStride + i]+=128;
03837 }
03838
03839 }
03840 #endif
03841
03842 *c2= c;
03843
03844 }