00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023 #include "dsputil.h"
00024
00025 #include "dsputil_ppc.h"
00026
00027 #ifdef HAVE_ALTIVEC
00028 #include "dsputil_altivec.h"
00029
00030 extern void fdct_altivec(int16_t *block);
00031 extern void gmc1_altivec(uint8_t *dst, uint8_t *src, int stride, int h,
00032 int x16, int y16, int rounder);
00033 extern void idct_put_altivec(uint8_t *dest, int line_size, int16_t *block);
00034 extern void idct_add_altivec(uint8_t *dest, int line_size, int16_t *block);
00035
00036 void dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx);
00037
00038 void dsputil_init_altivec(DSPContext* c, AVCodecContext *avctx);
00039 void vc1dsp_init_altivec(DSPContext* c, AVCodecContext *avctx);
00040 void snow_init_altivec(DSPContext* c, AVCodecContext *avctx);
00041 void float_init_altivec(DSPContext* c, AVCodecContext *avctx);
00042 void int_init_altivec(DSPContext* c, AVCodecContext *avctx);
00043
00044 #endif
00045
00046 int mm_flags = 0;
00047
00048 int mm_support(void)
00049 {
00050 int result = 0;
00051 #ifdef HAVE_ALTIVEC
00052 if (has_altivec()) {
00053 result |= MM_ALTIVEC;
00054 }
00055 #endif
00056 return result;
00057 }
00058
00059 #ifdef CONFIG_POWERPC_PERF
00060 unsigned long long perfdata[POWERPC_NUM_PMC_ENABLED][powerpc_perf_total][powerpc_data_total];
00061
00062 static unsigned char* perfname[] = {
00063 "ff_fft_calc_altivec",
00064 "gmc1_altivec",
00065 "dct_unquantize_h263_altivec",
00066 "fdct_altivec",
00067 "idct_add_altivec",
00068 "idct_put_altivec",
00069 "put_pixels16_altivec",
00070 "avg_pixels16_altivec",
00071 "avg_pixels8_altivec",
00072 "put_pixels8_xy2_altivec",
00073 "put_no_rnd_pixels8_xy2_altivec",
00074 "put_pixels16_xy2_altivec",
00075 "put_no_rnd_pixels16_xy2_altivec",
00076 "hadamard8_diff8x8_altivec",
00077 "hadamard8_diff16_altivec",
00078 "avg_pixels8_xy2_altivec",
00079 "clear_blocks_dcbz32_ppc",
00080 "clear_blocks_dcbz128_ppc",
00081 "put_h264_chroma_mc8_altivec",
00082 "avg_h264_chroma_mc8_altivec",
00083 "put_h264_qpel16_h_lowpass_altivec",
00084 "avg_h264_qpel16_h_lowpass_altivec",
00085 "put_h264_qpel16_v_lowpass_altivec",
00086 "avg_h264_qpel16_v_lowpass_altivec",
00087 "put_h264_qpel16_hv_lowpass_altivec",
00088 "avg_h264_qpel16_hv_lowpass_altivec",
00089 ""
00090 };
00091 #include <stdio.h>
00092 #endif
00093
00094 #ifdef CONFIG_POWERPC_PERF
00095 void powerpc_display_perf_report(void)
00096 {
00097 int i, j;
00098 av_log(NULL, AV_LOG_INFO, "PowerPC performance report\n Values are from the PMC registers, and represent whatever the registers are set to record.\n");
00099 for(i = 0 ; i < powerpc_perf_total ; i++)
00100 {
00101 for (j = 0; j < POWERPC_NUM_PMC_ENABLED ; j++)
00102 {
00103 if (perfdata[j][i][powerpc_data_num] != (unsigned long long)0)
00104 av_log(NULL, AV_LOG_INFO,
00105 " Function \"%s\" (pmc%d):\n\tmin: %"PRIu64"\n\tmax: %"PRIu64"\n\tavg: %1.2lf (%"PRIu64")\n",
00106 perfname[i],
00107 j+1,
00108 perfdata[j][i][powerpc_data_min],
00109 perfdata[j][i][powerpc_data_max],
00110 (double)perfdata[j][i][powerpc_data_sum] /
00111 (double)perfdata[j][i][powerpc_data_num],
00112 perfdata[j][i][powerpc_data_num]);
00113 }
00114 }
00115 }
00116 #endif
00117
00118
00119
00120
00121
00122
00123
00124
00125
00126
00127
00128
00129
00130
00131
00132
00133
00134
00135
00136
00137
00138
00139
00140
00141
00142
00143 void clear_blocks_dcbz32_ppc(DCTELEM *blocks)
00144 {
00145 POWERPC_PERF_DECLARE(powerpc_clear_blocks_dcbz32, 1);
00146 register int misal = ((unsigned long)blocks & 0x00000010);
00147 register int i = 0;
00148 POWERPC_PERF_START_COUNT(powerpc_clear_blocks_dcbz32, 1);
00149 #if 1
00150 if (misal) {
00151 ((unsigned long*)blocks)[0] = 0L;
00152 ((unsigned long*)blocks)[1] = 0L;
00153 ((unsigned long*)blocks)[2] = 0L;
00154 ((unsigned long*)blocks)[3] = 0L;
00155 i += 16;
00156 }
00157 for ( ; i < sizeof(DCTELEM)*6*64-31 ; i += 32) {
00158 asm volatile("dcbz %0,%1" : : "b" (blocks), "r" (i) : "memory");
00159 }
00160 if (misal) {
00161 ((unsigned long*)blocks)[188] = 0L;
00162 ((unsigned long*)blocks)[189] = 0L;
00163 ((unsigned long*)blocks)[190] = 0L;
00164 ((unsigned long*)blocks)[191] = 0L;
00165 i += 16;
00166 }
00167 #else
00168 memset(blocks, 0, sizeof(DCTELEM)*6*64);
00169 #endif
00170 POWERPC_PERF_STOP_COUNT(powerpc_clear_blocks_dcbz32, 1);
00171 }
00172
00173
00174
00175 #ifdef HAVE_DCBZL
00176 void clear_blocks_dcbz128_ppc(DCTELEM *blocks)
00177 {
00178 POWERPC_PERF_DECLARE(powerpc_clear_blocks_dcbz128, 1);
00179 register int misal = ((unsigned long)blocks & 0x0000007f);
00180 register int i = 0;
00181 POWERPC_PERF_START_COUNT(powerpc_clear_blocks_dcbz128, 1);
00182 #if 1
00183 if (misal) {
00184
00185
00186
00187 memset(blocks, 0, sizeof(DCTELEM)*6*64);
00188 }
00189 else
00190 for ( ; i < sizeof(DCTELEM)*6*64 ; i += 128) {
00191 asm volatile("dcbzl %0,%1" : : "b" (blocks), "r" (i) : "memory");
00192 }
00193 #else
00194 memset(blocks, 0, sizeof(DCTELEM)*6*64);
00195 #endif
00196 POWERPC_PERF_STOP_COUNT(powerpc_clear_blocks_dcbz128, 1);
00197 }
00198 #else
00199 void clear_blocks_dcbz128_ppc(DCTELEM *blocks)
00200 {
00201 memset(blocks, 0, sizeof(DCTELEM)*6*64);
00202 }
00203 #endif
00204
00205 #ifdef HAVE_DCBZL
00206
00207
00208
00209
00210
00211 long check_dcbzl_effect(void)
00212 {
00213 register char *fakedata = av_malloc(1024);
00214 register char *fakedata_middle;
00215 register long zero = 0;
00216 register long i = 0;
00217 long count = 0;
00218
00219 if (!fakedata)
00220 {
00221 return 0L;
00222 }
00223
00224 fakedata_middle = (fakedata + 512);
00225
00226 memset(fakedata, 0xFF, 1024);
00227
00228
00229
00230 asm volatile("dcbzl %0, %1" : : "b" (fakedata_middle), "r" (zero));
00231
00232 for (i = 0; i < 1024 ; i ++)
00233 {
00234 if (fakedata[i] == (char)0)
00235 count++;
00236 }
00237
00238 av_free(fakedata);
00239
00240 return count;
00241 }
00242 #else
00243 long check_dcbzl_effect(void)
00244 {
00245 return 0;
00246 }
00247 #endif
00248
00249 static void prefetch_ppc(void *mem, int stride, int h)
00250 {
00251 register const uint8_t *p = mem;
00252 do {
00253 asm volatile ("dcbt 0,%0" : : "r" (p));
00254 p+= stride;
00255 } while(--h);
00256 }
00257
00258 void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx)
00259 {
00260
00261 c->prefetch = prefetch_ppc;
00262 switch (check_dcbzl_effect()) {
00263 case 32:
00264 c->clear_blocks = clear_blocks_dcbz32_ppc;
00265 break;
00266 case 128:
00267 c->clear_blocks = clear_blocks_dcbz128_ppc;
00268 break;
00269 default:
00270 break;
00271 }
00272
00273 #ifdef HAVE_ALTIVEC
00274 if(ENABLE_H264_DECODER) dsputil_h264_init_ppc(c, avctx);
00275
00276 if (has_altivec()) {
00277 mm_flags |= MM_ALTIVEC;
00278
00279 dsputil_init_altivec(c, avctx);
00280 if(ENABLE_SNOW_DECODER) snow_init_altivec(c, avctx);
00281 if(ENABLE_VC1_DECODER || ENABLE_WMV3_DECODER)
00282 vc1dsp_init_altivec(c, avctx);
00283 float_init_altivec(c, avctx);
00284 int_init_altivec(c, avctx);
00285 c->gmc1 = gmc1_altivec;
00286
00287 #ifdef CONFIG_ENCODERS
00288 if (avctx->dct_algo == FF_DCT_AUTO ||
00289 avctx->dct_algo == FF_DCT_ALTIVEC)
00290 {
00291 c->fdct = fdct_altivec;
00292 }
00293 #endif //CONFIG_ENCODERS
00294
00295 if (avctx->lowres==0)
00296 {
00297 if ((avctx->idct_algo == FF_IDCT_AUTO) ||
00298 (avctx->idct_algo == FF_IDCT_ALTIVEC))
00299 {
00300 c->idct_put = idct_put_altivec;
00301 c->idct_add = idct_add_altivec;
00302 c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
00303 }
00304 }
00305
00306 #ifdef CONFIG_POWERPC_PERF
00307 {
00308 int i, j;
00309 for (i = 0 ; i < powerpc_perf_total ; i++)
00310 {
00311 for (j = 0; j < POWERPC_NUM_PMC_ENABLED ; j++)
00312 {
00313 perfdata[j][i][powerpc_data_min] = 0xFFFFFFFFFFFFFFFFULL;
00314 perfdata[j][i][powerpc_data_max] = 0x0000000000000000ULL;
00315 perfdata[j][i][powerpc_data_sum] = 0x0000000000000000ULL;
00316 perfdata[j][i][powerpc_data_num] = 0x0000000000000000ULL;
00317 }
00318 }
00319 }
00320 #endif
00321 }
00322 #endif
00323 }