| CODENOTIFIER | HelpYou are not signed inSign in |
Project: Theora
Revision: 15153
Author: tterribe
Date: 04 Aug 2008 14:37:55
Changes:Remove all TH_DEBUG statements.
They required variadic macros, which are not standard in C90.
They also cluttered up the code, and were unlikely to be maintained properly
anyway.
Also, remove all the tabs and trailing whitespace, etc., that xiphmont gunked
up my code with.
| ... | ...@@ -22,8 +22,8 @@ | |
| 22 | 22 | static const __attribute__ ((aligned(8),used)) ogg_int64_t V128 = 0x8080808080808080LL; |
| 23 | 23 | |
| 24 | 24 | static void copy8x8__mmx (unsigned char *src, |
| 25 | unsigned char *dest, | |
| 26 | unsigned int stride) | |
| 25 | unsigned char *dest, | |
| 26 | unsigned int stride) | |
| 27 | 27 | { |
| 28 | 28 | __asm__ __volatile__ ( |
| 29 | 29 | " .p2align 4 \n\t" |
| ... | ...@@ -35,14 +35,14 @@ | |
| 35 | 35 | " movq (%1, %2, 2), %%mm2 \n\t" |
| 36 | 36 | " movq (%1, %%edi), %%mm3 \n\t" |
| 37 | 37 | |
| 38 | " lea (%1, %2, 4), %1 \n\t" | |
| 38 | " lea (%1, %2, 4), %1 \n\t" | |
| 39 | 39 | |
| 40 | 40 | " movq %%mm0, (%0) \n\t" |
| 41 | 41 | " movq %%mm1, (%0, %2) \n\t" |
| 42 | 42 | " movq %%mm2, (%0, %2, 2) \n\t" |
| 43 | 43 | " movq %%mm3, (%0, %%edi) \n\t" |
| 44 | 44 | |
| 45 | " lea (%0, %2, 4), %0 \n\t" | |
| 45 | " lea (%0, %2, 4), %0 \n\t" | |
| 46 | 46 | |
| 47 | 47 | " movq (%1), %%mm0 \n\t" |
| 48 | 48 | " movq (%1, %2), %%mm1 \n\t" |
| ... | ...@@ -61,7 +61,7 @@ | |
| 61 | 61 | } |
| 62 | 62 | |
| 63 | 63 | static void recon_intra8x8__mmx (unsigned char *ReconPtr, ogg_int16_t *ChangePtr, |
| 64 | ogg_uint32_t LineStep) | |
| 64 | ogg_uint32_t LineStep) | |
| 65 | 65 | { |
| 66 | 66 | __asm__ __volatile__ ( |
| 67 | 67 | " .p2align 4 \n\t" |
| ... | ...@@ -69,11 +69,11 @@ | |
| 69 | 69 | " movq %[V128], %%mm0 \n\t" /* Set mm0 to 0x8080808080808080 */ |
| 70 | 70 | |
| 71 | 71 | " lea 128(%1), %%edi \n\t" /* Endpoint in input buffer */ |
| 72 | "1: \n\t" | |
| 72 | "1: \n\t" | |
| 73 | 73 | " movq (%1), %%mm2 \n\t" /* First four input values */ |
| 74 | 74 | |
| 75 | 75 | " packsswb 8(%1), %%mm2 \n\t" /* pack with next(high) four values */ |
| 76 | " por %%mm0, %%mm0 \n\t" | |
| 76 | " por %%mm0, %%mm0 \n\t" | |
| 77 | 77 | " pxor %%mm0, %%mm2 \n\t" /* Convert result to unsigned (same as add 128) */ |
| 78 | 78 | " lea 16(%1), %1 \n\t" /* Step source buffer */ |
| 79 | 79 | " cmp %%edi, %1 \n\t" /* are we done */ |
| ... | ...@@ -91,7 +91,7 @@ | |
| 91 | 91 | } |
| 92 | 92 | |
| 93 | 93 | static void recon_inter8x8__mmx (unsigned char *ReconPtr, unsigned char *RefPtr, |
| 94 | ogg_int16_t *ChangePtr, ogg_uint32_t LineStep) | |
| 94 | ogg_int16_t *ChangePtr, ogg_uint32_t LineStep) | |
| 95 | 95 | { |
| 96 | 96 | __asm__ __volatile__ ( |
| 97 | 97 | " .p2align 4 \n\t" |
| ... | ...@@ -127,8 +127,8 @@ | |
| 127 | 127 | } |
| 128 | 128 | |
| 129 | 129 | static void recon_inter8x8_half__mmx (unsigned char *ReconPtr, unsigned char *RefPtr1, |
| 130 | unsigned char *RefPtr2, ogg_int16_t *ChangePtr, | |
| 131 | ogg_uint32_t LineStep) | |
| 130 | unsigned char *RefPtr2, ogg_int16_t *ChangePtr, | |
| 131 | ogg_uint32_t LineStep) | |
| 132 | 132 | { |
| 133 | 133 | __asm__ __volatile__ ( |
| 134 | 134 | " .p2align 4 \n\t" |
| ... | ...@@ -173,7 +173,6 @@ | |
| 173 | 173 | |
| 174 | 174 | void dsp_mmx_recon_init(DspFunctions *funcs) |
| 175 | 175 | { |
| 176 | TH_DEBUG("enabling accelerated x86_32 mmx recon functions.\n"); | |
| 177 | 176 | funcs->copy8x8 = copy8x8__mmx; |
| 178 | 177 | funcs->recon_intra8x8 = recon_intra8x8__mmx; |
| 179 | 178 | funcs->recon_inter8x8 = recon_inter8x8__mmx; |
| ... | ...@@ -220,24 +220,6 @@ | |
| 220 | 220 | /*Implement me.*/ |
| 221 | 221 | flags=0; |
| 222 | 222 | } |
| 223 | # if defined(DEBUG) | |
| 224 | if(flags){ | |
| 225 | TH_DEBUG("vectorized instruction sets supported:"); | |
| 226 | if(flags&OC_CPU_X86_MMX)TH_DEBUG(" mmx"); | |
| 227 | if(flags&OC_CPU_X86_MMXEXT)TH_DEBUG(" mmxext"); | |
| 228 | if(flags&OC_CPU_X86_SSE)TH_DEBUG(" sse"); | |
| 229 | if(flags&OC_CPU_X86_SSE2)TH_DEBUG(" sse2"); | |
| 230 | if(flags&OC_CPU_X86_3DNOW)TH_DEBUG(" 3dnow"); | |
| 231 | if(flags&OC_CPU_X86_3DNOWEXT)TH_DEBUG(" 3dnowext"); | |
| 232 | if(flags&OC_CPU_X86_PNI)TH_DEBUG(" pni"); | |
| 233 | if(flags&OC_CPU_X86_SSSE3)TH_DEBUG(" ssse3"); | |
| 234 | if(flags&OC_CPU_X86_SSE4_1)TH_DEBUG(" sse4_1"); | |
| 235 | if(flags&OC_CPU_X86_SSE4_2)TH_DEBUG(" sse4_2"); | |
| 236 | if(flags&OC_CPU_X86_SSE4A)TH_DEBUG(" sse4a"); | |
| 237 | if(flags&OC_CPU_X86_SSE5)TH_DEBUG(" sse5"); | |
| 238 | TH_DEBUG("\n"); | |
| 239 | } | |
| 240 | # endif | |
| 241 | 223 | return flags; |
| 242 | 224 | } |
| 243 | 225 | #endif |
| ... | ...@@ -27,7 +27,7 @@ | |
| 27 | 27 | 0x0004000400040004LL; |
| 28 | 28 | |
| 29 | 29 | static void loop_filter_v(unsigned char *_pix,int _ystride, |
| 30 | const ogg_int16_t *_ll){ | |
| 30 | const ogg_int16_t *_ll){ | |
| 31 | 31 | long esi; |
| 32 | 32 | _pix-=_ystride*2; |
| 33 | 33 | __asm__ __volatile__( |
| ... | ...@@ -210,7 +210,7 @@ | |
| 210 | 210 | four p0's to one register we must transpose the values in four mmx regs. |
| 211 | 211 | When half is done we repeat this for the rest.*/ |
| 212 | 212 | static void loop_filter_h4(unsigned char *_pix,long _ystride, |
| 213 | const ogg_int16_t *_ll){ | |
| 213 | const ogg_int16_t *_ll){ | |
| 214 | 214 | long esi; |
| 215 | 215 | long edi; |
| 216 | 216 | __asm__ __volatile__( |
| ... | ...@@ -343,12 +343,12 @@ | |
| 343 | 343 | } |
| 344 | 344 | |
| 345 | 345 | static void loop_filter_h(unsigned char *_pix,int _ystride, |
| 346 | const ogg_int16_t *_ll){ | |
| 346 | const ogg_int16_t *_ll){ | |
| 347 | 347 | _pix-=2; |
| 348 | 348 | loop_filter_h4(_pix,_ystride,_ll); |
| 349 | 349 | loop_filter_h4(_pix+(_ystride<<2),_ystride,_ll); |
| 350 | 350 | } |
| 351 | ||
| 351 | ||
| 352 | 352 | static void loop_filter_mmx(PB_INSTANCE *pbi, int FLimit){ |
| 353 | 353 | int j; |
| 354 | 354 | ogg_int16_t __attribute__((aligned(8))) ll[4]; |
| ... | ...@@ -359,7 +359,7 @@ | |
| 359 | 359 | ll[0]=ll[1]=ll[2]=ll[3]=FLimit; |
| 360 | 360 | |
| 361 | 361 | for ( j = 0; j < 3 ; j++){ |
| 362 | ogg_uint32_t *bp_begin = bp; | |
| 362 | ogg_uint32_t *bp_begin = bp; | |
| 363 | 363 | ogg_uint32_t *bp_end; |
| 364 | 364 | int stride; |
| 365 | 365 | int h; |
| ... | ...@@ -376,23 +376,23 @@ | |
| 376 | 376 | stride = pbi->UVStride; |
| 377 | 377 | break; |
| 378 | 378 | } |
| 379 | ||
| 379 | ||
| 380 | 380 | while(bp<bp_end){ |
| 381 | 381 | ogg_uint32_t *bp_left = bp; |
| 382 | 382 | ogg_uint32_t *bp_right = bp + h; |
| 383 | 383 | while(bp<bp_right){ |
| 384 | if(cp[0]){ | |
| 385 | if(bp>bp_left) | |
| 386 | loop_filter_h(&pbi->LastFrameRecon[bp[0]],stride,ll); | |
| 387 | if(bp_left>bp_begin) | |
| 388 | loop_filter_v(&pbi->LastFrameRecon[bp[0]],stride,ll); | |
| 389 | if(bp+1<bp_right && !cp[1]) | |
| 390 | loop_filter_h(&pbi->LastFrameRecon[bp[0]]+8,stride,ll); | |
| 391 | if(bp+h<bp_end && !cp[h]) | |
| 392 | loop_filter_v(&pbi->LastFrameRecon[bp[h]],stride,ll); | |
| 393 | } | |
| 394 | bp++; | |
| 395 | cp++; | |
| 384 | if(cp[0]){ | |
| 385 | if(bp>bp_left) | |
| 386 | loop_filter_h(&pbi->LastFrameRecon[bp[0]],stride,ll); | |
| 387 | if(bp_left>bp_begin) | |
| 388 | loop_filter_v(&pbi->LastFrameRecon[bp[0]],stride,ll); | |
| 389 | if(bp+1<bp_right && !cp[1]) | |
| 390 | loop_filter_h(&pbi->LastFrameRecon[bp[0]]+8,stride,ll); | |
| 391 | if(bp+h<bp_end && !cp[h]) | |
| 392 | loop_filter_v(&pbi->LastFrameRecon[bp[h]],stride,ll); | |
| 393 | } | |
| 394 | bp++; | |
| 395 | cp++; | |
| 396 | 396 | } |
| 397 | 397 | } |
| 398 | 398 | } |
| ... | ...@@ -127,65 +127,6 @@ | |
| 127 | 127 | } |
| 128 | 128 | while(qri-->0); |
| 129 | 129 | } |
| 130 | ||
| 131 | #ifdef _TH_DEBUG_ | |
| 132 | /* dump the tables */ | |
| 133 | { | |
| 134 | int i, j, k, l, m; | |
| 135 | TH_DEBUG("loop filter limits = {"); | |
| 136 | for(i=0;i<64;){ | |
| 137 | TH_DEBUG("\n "); | |
| 138 | for(j=0;j<16;i++,j++) | |
| 139 | TH_DEBUG("%3d ",_qinfo->loop_filter_limits[i]); | |
| 140 | } | |
| 141 | TH_DEBUG("\n}\n\n"); | |
| 142 | ||
| 143 | TH_DEBUG("ac scale = {"); | |
| 144 | for(i=0;i<64;){ | |
| 145 | TH_DEBUG("\n "); | |
| 146 | for(j=0;j<16;i++,j++) | |
| 147 | TH_DEBUG("%3d ",_qinfo->ac_scale[i]); | |
| 148 | } | |
| 149 | TH_DEBUG("\n}\n\n"); | |
| 150 | ||
| 151 | TH_DEBUG("dc scale = {"); | |
| 152 | for(i=0;i<64;){ | |
| 153 | TH_DEBUG("\n "); | |
| 154 | for(j=0;j<16;i++,j++) | |
| 155 | TH_DEBUG("%3d ",_qinfo->dc_scale[i]); | |
| 156 | } | |
| 157 | TH_DEBUG("\n}\n\n"); | |
| 158 | ||
| 159 | for(k=0;k<2;k++) | |
| 160 | for(l=0;l<3;l++){ | |
| 161 | char *name[2][3]={ | |
| 162 | {"intra Y bases","intra U bases", "intra V bases"}, | |
| 163 | {"inter Y bases","inter U bases", "inter V bases"} | |
| 164 | }; | |
| 165 | ||
| 166 | th_quant_ranges *r = &_qinfo->qi_ranges[k][l]; | |
| 167 | TH_DEBUG("%s = {\n",name[k][l]); | |
| 168 | TH_DEBUG(" ranges = %d\n",r->nranges); | |
| 169 | TH_DEBUG(" intervals = { "); | |
| 170 | for(i=0;i<r->nranges;i++) | |
| 171 | TH_DEBUG("%3d ",r->sizes[i]); | |
| 172 | TH_DEBUG("}\n"); | |
| 173 | TH_DEBUG("\n matricies = { "); | |
| 174 | for(m=0;m<r->nranges+1;m++){ | |
| 175 | TH_DEBUG("\n { "); | |
| 176 | for(i=0;i<64;){ | |
| 177 | TH_DEBUG("\n "); | |
| 178 | for(j=0;j<8;i++,j++) | |
| 179 | TH_DEBUG("%3d ",r->base_matrices[m][i]); | |
| 180 | } | |
| 181 | TH_DEBUG("\n }"); | |
| 182 | } | |
| 183 | TH_DEBUG("\n }\n"); | |
| 184 | } | |
| 185 | } | |
| 186 | ||
| 187 | #endif | |
| 188 | ||
| 189 | 130 | _ogg_free(base_mats); |
| 190 | 131 | return 0; |
| 191 | 132 | } |
| ... | ...@@ -227,4 +168,3 @@ | |
| 227 | 168 | _ogg_free((void *)_qinfo->qi_ranges[qti][pli].base_matrices); |
| 228 | 169 | } |
| 229 | 170 | } |
| 230 |
| ... | ...@@ -26,18 +26,13 @@ | |
| 26 | 26 | #include "dsp.h" |
| 27 | 27 | #include "codec_internal.h" |
| 28 | 28 | |
| 29 | #ifdef _TH_DEBUG_ | |
| 30 | FILE *debugout=NULL; | |
| 31 | long dframe=0; | |
| 32 | #endif | |
| 33 | ||
| 34 | 29 | #define A_TABLE_SIZE 29 |
| 35 | 30 | #define DF_CANDIDATE_WINDOW 5 |
| 36 | 31 | |
| 37 | 32 | /* |
| 38 | * th_quant_info for VP3 | |
| 33 | * th_quant_info for VP3 | |
| 39 | 34 | */ |
| 40 | ||
| 35 | ||
| 41 | 36 | /*The default quantization parameters used by VP3.1.*/ |
| 42 | 37 | static const int OC_VP31_RANGE_SIZES[1]={63}; |
| 43 | 38 | static const th_quant_base OC_VP31_BASES_INTRA_Y[2]={ |
| ... | ...@@ -897,10 +892,6 @@ | |
| 897 | 892 | |
| 898 | 893 | CP_INSTANCE *cpi; |
| 899 | 894 | |
| 900 | #ifdef _TH_DEBUG_ | |
| 901 | debugout=fopen("theoraenc-debugout.txt","w"); | |
| 902 | #endif | |
| 903 | ||
| 904 | 895 | memset(th, 0, sizeof(*th)); |
| 905 | 896 | /*Currently only the 4:2:0 format is supported.*/ |
| 906 | 897 | if(c->pixelformat!=OC_PF_420)return OC_IMPL; |
| ... | ...@@ -1043,7 +1034,7 @@ | |
| 1043 | 1034 | current clip. */ |
| 1044 | 1035 | cpi->ThisIsFirstFrame = 1; |
| 1045 | 1036 | cpi->readyflag = 1; |
| 1046 | ||
| 1037 | ||
| 1047 | 1038 | cpi->pb.HeadersWritten = 0; |
| 1048 | 1039 | /*We overload this flag to track header output.*/ |
| 1049 | 1040 | cpi->doneflag=-3; |
| ... | ...@@ -1111,7 +1102,7 @@ | |
| 1111 | 1102 | if(cpi->LastKeyFrame >= (ogg_uint32_t) |
| 1112 | 1103 | cpi->pb.info.keyframe_frequency_force) |
| 1113 | 1104 | cpi->ThisIsKeyFrame = 1; |
| 1114 | ||
| 1105 | ||
| 1115 | 1106 | if ( cpi->ThisIsKeyFrame ) { |
| 1116 | 1107 | CompressKeyFrame(cpi); |
| 1117 | 1108 | cpi->ThisIsKeyFrame = 0; |
| ... | ...@@ -1131,10 +1122,6 @@ | |
| 1131 | 1122 | ((cpi->CurrentFrame - cpi->LastKeyFrame)<<cpi->pb.keyframe_granule_shift)+ |
| 1132 | 1123 | cpi->LastKeyFrame - 1; |
| 1133 | 1124 | |
| 1134 | #ifdef _TH_DEBUG_ | |
| 1135 | dframe++; | |
| 1136 | #endif | |
| 1137 | ||
| 1138 | 1125 | return 0; |
| 1139 | 1126 | } |
| 1140 | 1127 | |
| ... | ...@@ -1170,7 +1157,7 @@ | |
| 1170 | 1157 | |
| 1171 | 1158 | static void _tp_writelsbint(oggpack_buffer *opb, long value) |
| 1172 | 1159 | { |
| 1173 | oggpackB_write(opb, value&0xFF, 8); | |
| 1160 | oggpackB_write(opb, value&0xFF, 8); | |
| 1174 | 1161 | oggpackB_write(opb, value>>8&0xFF, 8); |
| 1175 | 1162 | oggpackB_write(opb, value>>16&0xFF, 8); |
| 1176 | 1163 | oggpackB_write(opb, value>>24&0xFF, 8); |
| ... | ...@@ -1197,7 +1184,7 @@ | |
| 1197 | 1184 | /* Applications use offset_y to mean offset from the top of the image; the |
| 1198 | 1185 | * meaning in the bitstream is the opposite (from the bottom). Transform. |
| 1199 | 1186 | */ |
| 1200 | offset_y = cpi->pb.info.height - cpi->pb.info.frame_height - | |
| 1187 | offset_y = cpi->pb.info.height - cpi->pb.info.frame_height - | |
| 1201 | 1188 | cpi->pb.info.offset_y; |
| 1202 | 1189 | oggpackB_write(cpi->oggbuffer,offset_y,8); |
| 1203 | 1190 | |
| ... | ...@@ -1321,11 +1308,6 @@ | |
| 1321 | 1308 | _ogg_free(cpi); |
| 1322 | 1309 | } |
| 1323 | 1310 | |
| 1324 | #ifdef _TH_DEBUG_ | |
| 1325 | fclose(debugout); | |
| 1326 | debugout=NULL; | |
| 1327 | #endif | |
| 1328 | ||
| 1329 | 1311 | memset(th,0,sizeof(*th)); |
| 1330 | 1312 | } |
| 1331 | 1313 | |
| ... | ...@@ -1377,59 +1359,59 @@ | |
| 1377 | 1359 | CP_INSTANCE *cpi; |
| 1378 | 1360 | PB_INSTANCE *pbi; |
| 1379 | 1361 | int value; |
| 1380 | ||
| 1362 | ||
| 1381 | 1363 | if(th == NULL) |
| 1382 | 1364 | return TH_EFAULT; |
| 1383 | 1365 | |
| 1384 | 1366 | cpi = th->internal_encode; |
| 1385 | 1367 | pbi = &cpi->pb; |
| 1386 | ||
| 1368 | ||
| 1387 | 1369 | switch(req) { |
| 1388 | 1370 | case TH_ENCCTL_SET_QUANT_PARAMS: |
| 1389 | 1371 | if( ( buf==NULL&&buf_sz!=0 ) |
| 1390 | || ( buf!=NULL&&buf_sz!=sizeof(th_quant_info) ) | |
| 1391 | || cpi->pb.HeadersWritten ){ | |
| 1372 | || ( buf!=NULL&&buf_sz!=sizeof(th_quant_info) ) | |
| 1373 | || cpi->pb.HeadersWritten ){ | |
| 1392 | 1374 | return TH_EINVAL; |
| 1393 | 1375 | } |
| 1394 | ||
| 1376 | ||
| 1395 | 1377 | memcpy(&pbi->quant_info, buf, sizeof(th_quant_info)); |
| 1396 | 1378 | InitQTables(pbi); |
| 1397 | ||
| 1379 | ||
| 1398 | 1380 | return 0; |
| 1399 | 1381 | case TH_ENCCTL_SET_VP3_COMPATIBLE: |
| 1400 | 1382 | if(cpi->pb.HeadersWritten) |
| 1401 | 1383 | return TH_EINVAL; |
| 1402 | ||
| 1384 | ||
| 1403 | 1385 | memcpy(&pbi->quant_info, &TH_VP31_QUANT_INFO, sizeof(th_quant_info)); |
| 1404 | 1386 | InitQTables(pbi); |
| 1405 | ||
| 1387 | ||
| 1406 | 1388 | return 0; |
| 1407 | 1389 | case TH_ENCCTL_SET_SPLEVEL: |
| 1408 | 1390 | if(buf == NULL || buf_sz != sizeof(int)) |
| 1409 | 1391 | return TH_EINVAL; |
| 1410 | ||
| 1392 | ||
| 1411 | 1393 | memcpy(&value, buf, sizeof(int)); |
| 1412 | ||
| 1394 | ||
| 1413 | 1395 | switch(value) { |
| 1414 | 1396 | case 0: |
| 1415 | 1397 | cpi->MotionCompensation = 1; |
| 1416 | 1398 | pbi->info.quick_p = 0; |
| 1417 | 1399 | break; |
| 1418 | ||
| 1400 | ||
| 1419 | 1401 | case 1: |
| 1420 | 1402 | cpi->MotionCompensation = 1; |
| 1421 | 1403 | pbi->info.quick_p = 1; |
| 1422 | 1404 | break; |
| 1423 | ||
| 1405 | ||
| 1424 | 1406 | case 2: |
| 1425 | 1407 | cpi->MotionCompensation = 0; |
| 1426 | 1408 | pbi->info.quick_p = 1; |
| 1427 | 1409 | break; |
| 1428 | ||
| 1410 | ||
| 1429 | 1411 | default: |
| 1430 | return TH_EINVAL; | |
| 1412 | return TH_EINVAL; | |
| 1431 | 1413 | } |
| 1432 | ||
| 1414 | ||
| 1433 | 1415 | return 0; |
| 1434 | 1416 | case TH_ENCCTL_GET_SPLEVEL_MAX: |
| 1435 | 1417 | value = 2; |
| ... | ...@@ -27,7 +27,7 @@ | |
| 27 | 27 | 0x0004000400040004LL; |
| 28 | 28 | |
| 29 | 29 | static void loop_filter_v(unsigned char *_pix,int _ystride, |
| 30 | const ogg_int16_t *_ll){ | |
| 30 | const ogg_int16_t *_ll){ | |
| 31 | 31 | long esi; |
| 32 | 32 | _pix-=_ystride*2; |
| 33 | 33 | __asm__ __volatile__( |
| ... | ...@@ -210,7 +210,7 @@ | |
| 210 | 210 | four p0's to one register we must transpose the values in four mmx regs. |
| 211 | 211 | When half is done we repeat this for the rest.*/ |
| 212 | 212 | static void loop_filter_h4(unsigned char *_pix,long _ystride, |
| 213 | const ogg_int16_t *_ll){ | |
| 213 | const ogg_int16_t *_ll){ | |
| 214 | 214 | long esi; |
| 215 | 215 | long edi; |
| 216 | 216 | __asm__ __volatile__( |
| ... | ...@@ -343,12 +343,12 @@ | |
| 343 | 343 | } |
| 344 | 344 | |
| 345 | 345 | static void loop_filter_h(unsigned char *_pix,int _ystride, |
| 346 | const ogg_int16_t *_ll){ | |
| 346 | const ogg_int16_t *_ll){ | |
| 347 | 347 | _pix-=2; |
| 348 | 348 | loop_filter_h4(_pix,_ystride,_ll); |
| 349 | 349 | loop_filter_h4(_pix+(_ystride<<2),_ystride,_ll); |
| 350 | 350 | } |
| 351 | ||
| 351 | ||
| 352 | 352 | static void loop_filter_mmx(PB_INSTANCE *pbi, int FLimit){ |
| 353 | 353 | int j; |
| 354 | 354 | ogg_int16_t __attribute__((aligned(8))) ll[4]; |
| ... | ...@@ -359,7 +359,7 @@ | |
| 359 | 359 | ll[0]=ll[1]=ll[2]=ll[3]=FLimit; |
| 360 | 360 | |
| 361 | 361 | for ( j = 0; j < 3 ; j++){ |
| 362 | ogg_uint32_t *bp_begin = bp; | |
| 362 | ogg_uint32_t *bp_begin = bp; | |
| 363 | 363 | ogg_uint32_t *bp_end; |
| 364 | 364 | int stride; |
| 365 | 365 | int h; |
| ... | ...@@ -376,23 +376,23 @@ | |
| 376 | 376 | stride = pbi->UVStride; |
| 377 | 377 | break; |
| 378 | 378 | } |
| 379 | ||
| 379 | ||
| 380 | 380 | while(bp<bp_end){ |
| 381 | 381 | ogg_uint32_t *bp_left = bp; |
| 382 | 382 | ogg_uint32_t *bp_right = bp + h; |
| 383 | 383 | while(bp<bp_right){ |
| 384 | if(cp[0]){ | |
| 385 | if(bp>bp_left) | |
| 386 | loop_filter_h(&pbi->LastFrameRecon[bp[0]],stride,ll); | |
| 387 | if(bp_left>bp_begin) | |
| 388 | loop_filter_v(&pbi->LastFrameRecon[bp[0]],stride,ll); | |
| 389 | if(bp+1<bp_right && !cp[1]) | |
| 390 | loop_filter_h(&pbi->LastFrameRecon[bp[0]]+8,stride,ll); | |
| 391 | if(bp+h<bp_end && !cp[h]) | |
| 392 | loop_filter_v(&pbi->LastFrameRecon[bp[h]],stride,ll); | |
| 393 | } | |
| 394 | bp++; | |
| 395 | cp++; | |
| 384 | if(cp[0]){ | |
| 385 | if(bp>bp_left) | |
| 386 | loop_filter_h(&pbi->LastFrameRecon[bp[0]],stride,ll); | |
| 387 | if(bp_left>bp_begin) | |
| 388 | loop_filter_v(&pbi->LastFrameRecon[bp[0]],stride,ll); | |
| 389 | if(bp+1<bp_right && !cp[1]) | |
| 390 | loop_filter_h(&pbi->LastFrameRecon[bp[0]]+8,stride,ll); | |
| 391 | if(bp+h<bp_end && !cp[h]) | |
| 392 | loop_filter_v(&pbi->LastFrameRecon[bp[h]],stride,ll); | |
| 393 | } | |
| 394 | bp++; | |
| 395 | cp++; | |
| 396 | 396 | } |
| 397 | 397 | } |
| 398 | 398 | } |
| ... | ...@@ -28,10 +28,10 @@ | |
| 28 | 28 | #define HIGHBITDUPPED(X) (((ogg_int16_t) X) >> 15) |
| 29 | 29 | |
| 30 | 30 | static ogg_uint32_t QuadCodeComponent ( CP_INSTANCE *cpi, |
| 31 | ogg_uint32_t FirstSB, | |
| 32 | ogg_uint32_t SBRows, | |
| 33 | ogg_uint32_t SBCols, | |
| 34 | ogg_uint32_t PixelsPerLine){ | |
| 31 | ogg_uint32_t FirstSB, | |
| 32 | ogg_uint32_t SBRows, | |
| 33 | ogg_uint32_t SBCols, | |
| 34 | ogg_uint32_t PixelsPerLine){ | |
| 35 | 35 | |
| 36 | 36 | ogg_int32_t FragIndex; /* Fragment number */ |
| 37 | 37 | ogg_uint32_t MB, B; /* Macro-Block, Block indices */ |
| ... | ...@@ -49,7 +49,7 @@ | |
| 49 | 49 | for ( SBcol=0; SBcol<SBCols; SBcol++ ) { |
| 50 | 50 | /* Check its four Macro-Blocks */ |
| 51 | 51 | /* 'Macro-Block' is a misnomer in the chroma planes; this is |
| 52 | really just a Hilbert curve iterator */ | |
| 52 | really just a Hilbert curve iterator */ | |
| 53 | 53 | for ( MB=0; MB<4; MB++ ) { |
| 54 | 54 | |
| 55 | 55 | if ( QuadMapToMBTopLeft(cpi->pb.BlockMap,SB,MB) >= 0 ) { |
| ... | ...@@ -359,31 +359,15 @@ | |
| 359 | 359 | /* Add the appropriate mode entropy token. */ |
| 360 | 360 | ModeIndex = SchemeList[cpi->ModeList[i]]; |
| 361 | 361 | oggpackB_write( opb, ModeBitPatterns[ModeIndex], |
| 362 | (ogg_uint32_t)ModeBitLengths[ModeIndex] ); | |
| 362 | (ogg_uint32_t)ModeBitLengths[ModeIndex] ); | |
| 363 | 363 | } |
| 364 | 364 | }else{ |
| 365 | 365 | /* Fall back to MODE_BITS per entry */ |
| 366 | 366 | for ( i = 0; i < cpi->ModeListCount; i++) |
| 367 | 367 | /* Add the appropriate mode entropy token. */ |
| 368 | oggpackB_write( opb, cpi->ModeList[i], MODE_BITS ); | |
| 368 | oggpackB_write( opb, cpi->ModeList[i], MODE_BITS ); | |
| 369 | 369 | } |
| 370 | ||
| 371 | #ifdef _TH_DEBUG_ | |
| 372 | TH_DEBUG("mode encode scheme = %d\n",(int)BestScheme); | |
| 373 | if ( BestScheme == 0 ) { | |
| 374 | TH_DEBUG("mode scheme list = { "); | |
| 375 | for ( j = 0; j < MAX_MODES; j++ ) | |
| 376 | TH_DEBUG("%d ",(int)BestModeSchemes[j]); | |
| 377 | TH_DEBUG("}\n"); | |
| 378 | } | |
| 379 | TH_DEBUG("mode list = { "); | |
| 380 | for ( i = 0; i < cpi->ModeListCount; i++) { | |
| 381 | if((i&0x1f)==0) | |
| 382 | TH_DEBUG("\n "); | |
| 383 | TH_DEBUG("%d ",cpi->ModeList[i]); | |
| 384 | } | |
| 385 | TH_DEBUG("\n}\n"); | |
| 386 | #endif | |
| 370 | ||
| 387 | 371 | } |
| 388 | 372 | |
| 389 | 373 | static void PackMotionVectors (CP_INSTANCE *cpi) { |
| ... | ...@@ -422,15 +406,6 @@ | |
| 422 | 406 | (ogg_uint32_t)MvBitsPtr[cpi->MVList[i].y] ); |
| 423 | 407 | } |
| 424 | 408 | |
| 425 | #ifdef _TH_DEBUG_ | |
| 426 | TH_DEBUG("motion vectors = {"); | |
| 427 | for ( i = 0; i < (ogg_int32_t)cpi->MvListCount; i++ ) { | |
| 428 | if((i&0x7)==0) | |
| 429 | TH_DEBUG("\n "); | |
| 430 | TH_DEBUG("%+03d,%+03d ",cpi->MVList[i].x,cpi->MVList[i].y); | |
| 431 | } | |
| 432 | TH_DEBUG("\n}\n"); | |
| 433 | #endif | |
| 434 | 409 | } |
| 435 | 410 | |
| 436 | 411 | static void PackEOBRun( CP_INSTANCE *cpi) { |
| ... | ...@@ -905,17 +880,6 @@ | |
| 905 | 880 | } |
| 906 | 881 | } |
| 907 | 882 | |
| 908 | #ifdef _TH_DEBUG_ | |
| 909 | { | |
| 910 | int j; | |
| 911 | for ( i = 0; i < cpi->pb.CodedBlockIndex; i++ ) { | |
| 912 | FragIndex = cpi->pb.CodedBlockList[i]; | |
| 913 | for(j=0;j<64;j++) | |
| 914 | cpi->pb.QFragQUAN[FragIndex][j] = cpi->pb.QFragData[FragIndex][j]; | |
| 915 | } | |
| 916 | } | |
| 917 | #endif | |
| 918 | ||
| 919 | 883 | /* Pack DC tokens and adjust the ones we couldn't predict 2d */ |
| 920 | 884 | for ( i = 0; i < cpi->pb.CodedBlockIndex; i++ ) { |
| 921 | 885 | /* Get the linear index for the current coded fragment. */ |
| ... | ...@@ -1013,7 +977,7 @@ | |
| 1013 | 977 | cpi->pb.FragCodingMethod[cpi->pb.YPlaneFragments + |
| 1014 | 978 | cpi->pb.UVPlaneFragments + UVFragOffset] = |
| 1015 | 979 | cpi->MBCodingMode; |
| 1016 | } | |
| 980 | } | |
| 1017 | 981 | } |
| 1018 | 982 | |
| 1019 | 983 | /* Next Super-Block */ |
| ... | ...@@ -1391,7 +1355,7 @@ | |
| 1391 | 1355 | |
| 1392 | 1356 | cpi->MBCodingMode = CODE_INTER_PLUS_MV; |
| 1393 | 1357 | SetMBMotionVectorsAndMode(cpi,YFragIndex,UFragIndex, |
| 1394 | VFragIndex,&InterMVect); | |
| 1358 | VFragIndex,&InterMVect); | |
| 1395 | 1359 | |
| 1396 | 1360 | /* Update Prior last mv with last mv */ |
| 1397 | 1361 | PriorLastInterMVect.x = LastInterMVect.x; |
| ... | ...@@ -1407,7 +1371,7 @@ | |
| 1407 | 1371 | |
| 1408 | 1372 | cpi->MBCodingMode = CODE_GOLDEN_MV; |
| 1409 | 1373 | SetMBMotionVectorsAndMode(cpi,YFragIndex,UFragIndex, |
| 1410 | VFragIndex,&GFMVect); | |
| 1374 | VFragIndex,&GFMVect); | |
| 1411 | 1375 | |
| 1412 | 1376 | /* Note last inter GF MV for future use */ |
| 1413 | 1377 | LastGFMVect.x = GFMVect.x; |
| ... | ...@@ -1463,7 +1427,7 @@ | |
| 1463 | 1427 | |
| 1464 | 1428 | cpi->MBCodingMode = CODE_INTRA; |
| 1465 | 1429 | SetMBMotionVectorsAndMode(cpi,YFragIndex,UFragIndex, |
| 1466 | VFragIndex,&ZeroVect); | |
| 1430 | VFragIndex,&ZeroVect); | |
| 1467 | 1431 | } |
| 1468 | 1432 | |
| 1469 | 1433 | |
| ... | ...@@ -1487,17 +1451,11 @@ | |
| 1487 | 1451 | void WriteFrameHeader( CP_INSTANCE *cpi) { |
| 1488 | 1452 | ogg_uint32_t i; |
| 1489 | 1453 | oggpack_buffer *opb=cpi->oggbuffer; |
| 1490 | ||
| 1491 | TH_DEBUG("\n>>>> beginning frame %ld\n\n",dframe); | |
| 1492 | ||
| 1493 | 1454 | /* Output the frame type (base/key frame or inter frame) */ |
| 1494 | 1455 | oggpackB_write( opb, cpi->pb.FrameType, 1 ); |
| 1495 | TH_DEBUG("frame type = video, %s\n",cpi->pb.FrameType?"predicted":"key"); | |
| 1496 | ||
| 1497 | 1456 | /* Write out details of the current value of Q... variable resolution. */ |
| 1498 | 1457 | for ( i = 0; i < Q_TABLE_SIZE; i++ ) { |
| 1499 | 1458 | if ( cpi->pb.ThisFrameQualityValue == cpi->pb.QThreshTable[i] ) { |
| 1500 | TH_DEBUG("frame quality = { %d }\n",i); | |
| 1501 | 1459 | oggpackB_write( opb, i, 6 ); |
| 1502 | 1460 | break; |
| 1503 | 1461 | } |
| ... | ...@@ -401,7 +401,7 @@ | |
| 401 | 401 | " paddsw "r0","r0"\n" \ |
| 402 | 402 | " movq "r1","I(1)"\n" /* save R1 */ \ |
| 403 | 403 | " paddsw "r7","r0"\n" /* r0 = R0 = G. + C. */ \ |
| 404 | "#end RowIDCT" \ | |
| 404 | "#end RowIDCT" \ | |
| 405 | 405 | ); |
| 406 | 406 | // end RowIDCT macro (8 + 38 = 46 cycles) |
| 407 | 407 | |
| ... | ...@@ -465,7 +465,7 @@ | |
| 465 | 465 | " movq "r5","J(5)"\n" /* store NR5 at J5 */ \ |
| 466 | 466 | " movq "r7","J(7)"\n" /* store NR7 at J7 */ \ |
| 467 | 467 | " movq "r0","I(0)"\n" /* store NR0 at I0 */ \ |
| 468 | "#end ColumnIDCT\n" \ | |
| 468 | "#end ColumnIDCT\n" \ | |
| 469 | 469 | ); |
| 470 | 470 | // end ColumnIDCT macro (38 + 19 = 57 cycles) |
| 471 | 471 | |
| ... | ...@@ -559,7 +559,7 @@ | |
| 559 | 559 | " movq "r4","I(3)"\n" \ |
| 560 | 560 | \ |
| 561 | 561 | " movq "r2","I(2)"\n" \ |
| 562 | "#end Transpose\n" \ | |
| 562 | "#end Transpose\n" \ | |
| 563 | 563 | ); |
| 564 | 564 | // end Transpose macro (19 cycles). |
| 565 | 565 | |
| ... | ...@@ -1013,7 +1013,7 @@ | |
| 1013 | 1013 | " paddsw "r0","r0"\n" \ |
| 1014 | 1014 | " movq "r1","I(1)"\n" /* save R1 */ \ |
| 1015 | 1015 | " paddsw "r7","r0"\n" /* r0 = R0 = G. + C. */ \ |
| 1016 | "#end RowIDCT_10\n" \ | |
| 1016 | "#end RowIDCT_10\n" \ | |
| 1017 | 1017 | ); |
| 1018 | 1018 | // end RowIDCT macro (8 + 38 = 46 cycles) |
| 1019 | 1019 | |
| ... | ...@@ -1060,7 +1060,7 @@ | |
| 1060 | 1060 | " movq "r7","J(7)"\n" /* store NR7 at J7 */ \ |
| 1061 | 1061 | \ |
| 1062 | 1062 | " movq "r0","I(0)"\n" /* store NR0 at I0 */ \ |
| 1063 | "#end ColumnIDCT_10\n" \ | |
| 1063 | "#end ColumnIDCT_10\n" \ | |
| 1064 | 1064 | ); |
| 1065 | 1065 | // end ColumnIDCT macro (38 + 19 = 57 cycles) |
| 1066 | 1066 | /* --------------------------------------------------------------- */ |
| ... | ...@@ -1389,7 +1389,7 @@ | |
| 1389 | 1389 | ); |
| 1390 | 1390 | |
| 1391 | 1391 | ASM( |
| 1392 | "movq (%eax), "r0"\n" | |
| 1392 | "movq (%eax), "r0"\n" | |
| 1393 | 1393 | "pmullw (%esi), "r0"\n" /* r0 = 03 02 01 00 */ |
| 1394 | 1394 | "movq "M(0)", "r2"\n" /* r2 = __ __ __ FF */ |
| 1395 | 1395 | "movq "r0", "r3"\n" /* r3 = 03 02 01 00 */ |
| ... | ...@@ -1444,7 +1444,6 @@ | |
| 1444 | 1444 | /* install our implementation in the function table */ |
| 1445 | 1445 | void dsp_mmx_idct_init(DspFunctions *funcs) |
| 1446 | 1446 | { |
| 1447 | TH_DEBUG("enabling accelerated x86_32 mmx idct functions.\n"); | |
| 1448 | 1447 | funcs->IDctSlow = IDctSlow__mmx; |
| 1449 | 1448 | funcs->IDct10 = IDct10__mmx; |
| 1450 | 1449 | funcs->IDct3 = IDct3__mmx; |
| ... | ...@@ -50,12 +50,12 @@ | |
| 50 | 50 | |
| 51 | 51 | static void sub8x8__mmx (unsigned char *FiltPtr, unsigned char *ReconPtr, |
| 52 | 52 | ogg_int16_t *DctInputPtr, ogg_uint32_t PixelsPerLine, |
| 53 | ogg_uint32_t ReconPixelsPerLine) | |
| 53 | ogg_uint32_t ReconPixelsPerLine) | |
| 54 | 54 | { |
| 55 | 55 | __asm__ __volatile__ ( |
| 56 | 56 | " .p2align 4 \n\t" |
| 57 | 57 | |
| 58 | " pxor %%mm7, %%mm7 \n\t" | |
| 58 | " pxor %%mm7, %%mm7 \n\t" | |
| 59 | 59 | SUB_LOOP |
| 60 | 60 | SUB_LOOP |
| 61 | 61 | SUB_LOOP |
| ... | ...@@ -68,7 +68,7 @@ | |
| 68 | 68 | "+r" (ReconPtr), |
| 69 | 69 | "+r" (DctInputPtr) |
| 70 | 70 | : "m" (PixelsPerLine), |
| 71 | "m" (ReconPixelsPerLine) | |
| 71 | "m" (ReconPixelsPerLine) | |
| 72 | 72 | : "memory" |
| 73 | 73 | ); |
| 74 | 74 | } |
| ... | ...@@ -86,16 +86,16 @@ | |
| 86 | 86 | " movq %%mm2, 8(%1) \n\t" /* write answer out */ \ |
| 87 | 87 | /* Increment pointers */ \ |
| 88 | 88 | " add $16, %1 \n\t" \ |
| 89 | " add %2, %0 \n\t" | |
| 89 | " add %2, %0 \n\t" | |
| 90 | 90 | |
| 91 | 91 | |
| 92 | 92 | static void sub8x8_128__mmx (unsigned char *FiltPtr, ogg_int16_t *DctInputPtr, |
| 93 | ogg_uint32_t PixelsPerLine) | |
| 93 | ogg_uint32_t PixelsPerLine) | |
| 94 | 94 | { |
| 95 | 95 | __asm__ __volatile__ ( |
| 96 | 96 | " .p2align 4 \n\t" |
| 97 | 97 | |
| 98 | " pxor %%mm7, %%mm7 \n\t" | |
| 98 | " pxor %%mm7, %%mm7 \n\t" | |
| 99 | 99 | " movq %[V128], %%mm1 \n\t" |
| 100 | 100 | SUB_128_LOOP |
| 101 | 101 | SUB_128_LOOP |
| ... | ...@@ -140,18 +140,18 @@ | |
| 140 | 140 | " add $16, %3 \n\t" \ |
| 141 | 141 | " add %4, %0 \n\t" \ |
| 142 | 142 | " add %5, %1 \n\t" \ |
| 143 | " add %5, %2 \n\t" | |
| 143 | " add %5, %2 \n\t" | |
| 144 | 144 | |
| 145 | 145 | |
| 146 | 146 | static void sub8x8avg2__mmx (unsigned char *FiltPtr, unsigned char *ReconPtr1, |
| 147 | 147 | unsigned char *ReconPtr2, ogg_int16_t *DctInputPtr, |
| 148 | 148 | ogg_uint32_t PixelsPerLine, |
| 149 | ogg_uint32_t ReconPixelsPerLine) | |
| 149 | ogg_uint32_t ReconPixelsPerLine) | |
| 150 | 150 | { |