Mercurial > cgi-bin > hgwebdir.cgi > PR > Applications > VSs > VSs__H264__App
diff libavcodec/h264_pred.c @ 2:897f711a7157
rearrange to work with autoconf
| author | Nina Engelhardt <nengel@mailbox.tu-berlin.de> |
|---|---|
| date | Tue, 25 Sep 2012 15:55:33 +0200 |
| parents | |
| children |
line diff
1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/libavcodec/h264_pred.c Tue Sep 25 15:55:33 2012 +0200 1.3 @@ -0,0 +1,945 @@ 1.4 +/* 1.5 + * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder 1.6 + * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at> 1.7 + * 1.8 + * This file is part of FFmpeg. 1.9 + * 1.10 + * FFmpeg is free software; you can redistribute it and/or 1.11 + * modify it under the terms of the GNU Lesser General Public 1.12 + * License as published by the Free Software Foundation; either 1.13 + * version 2.1 of the License, or (at your option) any later version. 1.14 + * 1.15 + * FFmpeg is distributed in the hope that it will be useful, 1.16 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 1.17 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 1.18 + * Lesser General Public License for more details. 1.19 + * 1.20 + * You should have received a copy of the GNU Lesser General Public 1.21 + * License along with FFmpeg; if not, write to the Free Software 1.22 + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 1.23 + */ 1.24 + 1.25 +/** 1.26 + * @file 1.27 + * H.264 / AVC / MPEG4 part10 prediction functions. 1.28 + * @author Michael Niedermayer <michaelni@gmx.at> 1.29 + */ 1.30 + 1.31 +#include "avcodec.h" 1.32 +#include "h264_pred.h" 1.33 +//#include "dsputil.h" 1.34 + 1.35 +static void pred4x4_vertical_c(uint8_t *src, uint8_t *topright, int stride){ 1.36 + (void) topright; 1.37 + const uint32_t a= ((uint32_t*)(src-stride))[0]; 1.38 + ((uint32_t*)(src+0*stride))[0]= a; 1.39 + ((uint32_t*)(src+1*stride))[0]= a; 1.40 + ((uint32_t*)(src+2*stride))[0]= a; 1.41 + ((uint32_t*)(src+3*stride))[0]= a; 1.42 +} 1.43 + 1.44 +static void pred4x4_horizontal_c(uint8_t *src, uint8_t *topright, int stride){ 1.45 + (void) topright; 1.46 + ((uint32_t*)(src+0*stride))[0]= src[-1+0*stride]*0x01010101; 1.47 + ((uint32_t*)(src+1*stride))[0]= src[-1+1*stride]*0x01010101; 1.48 + ((uint32_t*)(src+2*stride))[0]= src[-1+2*stride]*0x01010101; 1.49 + ((uint32_t*)(src+3*stride))[0]= src[-1+3*stride]*0x01010101; 1.50 +} 1.51 + 1.52 +static void pred4x4_dc_c(uint8_t *src, uint8_t *topright, int stride){ 1.53 + (void) topright; 1.54 + const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride] 1.55 + + src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 4) >>3; 1.56 + 1.57 + ((uint32_t*)(src+0*stride))[0]= 1.58 + ((uint32_t*)(src+1*stride))[0]= 1.59 + ((uint32_t*)(src+2*stride))[0]= 1.60 + ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101; 1.61 +} 1.62 + 1.63 +static void pred4x4_left_dc_c(uint8_t *src, uint8_t *topright, int stride){ 1.64 + (void) topright; 1.65 + const int dc= ( src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 2) >>2; 1.66 + 1.67 + ((uint32_t*)(src+0*stride))[0]= 1.68 + ((uint32_t*)(src+1*stride))[0]= 1.69 + ((uint32_t*)(src+2*stride))[0]= 1.70 + ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101; 1.71 +} 1.72 + 1.73 +static void pred4x4_top_dc_c(uint8_t *src, uint8_t *topright, int stride){ 1.74 + (void) topright; 1.75 + const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride] + 2) >>2; 1.76 + 1.77 + ((uint32_t*)(src+0*stride))[0]= 1.78 + ((uint32_t*)(src+1*stride))[0]= 1.79 + ((uint32_t*)(src+2*stride))[0]= 1.80 + ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101; 1.81 +} 1.82 + 1.83 +static void pred4x4_128_dc_c(uint8_t *src, uint8_t *topright, int stride){ 1.84 + (void) topright; 1.85 + ((uint32_t*)(src+0*stride))[0]= 1.86 + ((uint32_t*)(src+1*stride))[0]= 1.87 + ((uint32_t*)(src+2*stride))[0]= 1.88 + ((uint32_t*)(src+3*stride))[0]= 128U*0x01010101U; 1.89 +} 1.90 + 1.91 + 1.92 +#define LOAD_TOP_RIGHT_EDGE\ 1.93 + const int av_unused t4= topright[0];\ 1.94 + const int av_unused t5= topright[1];\ 1.95 + const int av_unused t6= topright[2];\ 1.96 + const int av_unused t7= topright[3];\ 1.97 + 1.98 +#define LOAD_DOWN_LEFT_EDGE\ 1.99 + const int av_unused l4= src[-1+4*stride];\ 1.100 + const int av_unused l5= src[-1+5*stride];\ 1.101 + const int av_unused l6= src[-1+6*stride];\ 1.102 + const int av_unused l7= src[-1+7*stride];\ 1.103 + 1.104 +#define LOAD_LEFT_EDGE\ 1.105 + const int av_unused l0= src[-1+0*stride];\ 1.106 + const int av_unused l1= src[-1+1*stride];\ 1.107 + const int av_unused l2= src[-1+2*stride];\ 1.108 + const int av_unused l3= src[-1+3*stride];\ 1.109 + 1.110 +#define LOAD_TOP_EDGE\ 1.111 + const int av_unused t0= src[ 0-1*stride];\ 1.112 + const int av_unused t1= src[ 1-1*stride];\ 1.113 + const int av_unused t2= src[ 2-1*stride];\ 1.114 + const int av_unused t3= src[ 3-1*stride];\ 1.115 + 1.116 +static void pred4x4_down_right_c(uint8_t *src, uint8_t *topright, int stride){ 1.117 + (void) topright; 1.118 + const int lt= src[-1-1*stride]; 1.119 + LOAD_TOP_EDGE 1.120 + LOAD_LEFT_EDGE 1.121 + 1.122 + src[0+3*stride]=(l3 + 2*l2 + l1 + 2)>>2; 1.123 + src[0+2*stride]= 1.124 + src[1+3*stride]=(l2 + 2*l1 + l0 + 2)>>2; 1.125 + src[0+1*stride]= 1.126 + src[1+2*stride]= 1.127 + src[2+3*stride]=(l1 + 2*l0 + lt + 2)>>2; 1.128 + src[0+0*stride]= 1.129 + src[1+1*stride]= 1.130 + src[2+2*stride]= 1.131 + src[3+3*stride]=(l0 + 2*lt + t0 + 2)>>2; 1.132 + src[1+0*stride]= 1.133 + src[2+1*stride]= 1.134 + src[3+2*stride]=(lt + 2*t0 + t1 + 2)>>2; 1.135 + src[2+0*stride]= 1.136 + src[3+1*stride]=(t0 + 2*t1 + t2 + 2)>>2; 1.137 + src[3+0*stride]=(t1 + 2*t2 + t3 + 2)>>2; 1.138 +} 1.139 + 1.140 +static void pred4x4_down_left_c(uint8_t *src, uint8_t *topright, int stride){ 1.141 + LOAD_TOP_EDGE 1.142 + LOAD_TOP_RIGHT_EDGE 1.143 +// LOAD_LEFT_EDGE 1.144 + 1.145 + src[0+0*stride]=(t0 + t2 + 2*t1 + 2)>>2; 1.146 + src[1+0*stride]= 1.147 + src[0+1*stride]=(t1 + t3 + 2*t2 + 2)>>2; 1.148 + src[2+0*stride]= 1.149 + src[1+1*stride]= 1.150 + src[0+2*stride]=(t2 + t4 + 2*t3 + 2)>>2; 1.151 + src[3+0*stride]= 1.152 + src[2+1*stride]= 1.153 + src[1+2*stride]= 1.154 + src[0+3*stride]=(t3 + t5 + 2*t4 + 2)>>2; 1.155 + src[3+1*stride]= 1.156 + src[2+2*stride]= 1.157 + src[1+3*stride]=(t4 + t6 + 2*t5 + 2)>>2; 1.158 + src[3+2*stride]= 1.159 + src[2+3*stride]=(t5 + t7 + 2*t6 + 2)>>2; 1.160 + src[3+3*stride]=(t6 + 3*t7 + 2)>>2; 1.161 +} 1.162 + 1.163 +static void pred4x4_vertical_right_c(uint8_t *src, uint8_t *topright, int stride){ 1.164 + (void) topright; 1.165 + const int lt= src[-1-1*stride]; 1.166 + LOAD_TOP_EDGE 1.167 + LOAD_LEFT_EDGE 1.168 + 1.169 + src[0+0*stride]= 1.170 + src[1+2*stride]=(lt + t0 + 1)>>1; 1.171 + src[1+0*stride]= 1.172 + src[2+2*stride]=(t0 + t1 + 1)>>1; 1.173 + src[2+0*stride]= 1.174 + src[3+2*stride]=(t1 + t2 + 1)>>1; 1.175 + src[3+0*stride]=(t2 + t3 + 1)>>1; 1.176 + src[0+1*stride]= 1.177 + src[1+3*stride]=(l0 + 2*lt + t0 + 2)>>2; 1.178 + src[1+1*stride]= 1.179 + src[2+3*stride]=(lt + 2*t0 + t1 + 2)>>2; 1.180 + src[2+1*stride]= 1.181 + src[3+3*stride]=(t0 + 2*t1 + t2 + 2)>>2; 1.182 + src[3+1*stride]=(t1 + 2*t2 + t3 + 2)>>2; 1.183 + src[0+2*stride]=(lt + 2*l0 + l1 + 2)>>2; 1.184 + src[0+3*stride]=(l0 + 2*l1 + l2 + 2)>>2; 1.185 +} 1.186 + 1.187 +static void pred4x4_vertical_left_c(uint8_t *src, uint8_t *topright, int stride){ 1.188 + LOAD_TOP_EDGE 1.189 + LOAD_TOP_RIGHT_EDGE 1.190 + 1.191 + src[0+0*stride]=(t0 + t1 + 1)>>1; 1.192 + src[1+0*stride]= 1.193 + src[0+2*stride]=(t1 + t2 + 1)>>1; 1.194 + src[2+0*stride]= 1.195 + src[1+2*stride]=(t2 + t3 + 1)>>1; 1.196 + src[3+0*stride]= 1.197 + src[2+2*stride]=(t3 + t4+ 1)>>1; 1.198 + src[3+2*stride]=(t4 + t5+ 1)>>1; 1.199 + src[0+1*stride]=(t0 + 2*t1 + t2 + 2)>>2; 1.200 + src[1+1*stride]= 1.201 + src[0+3*stride]=(t1 + 2*t2 + t3 + 2)>>2; 1.202 + src[2+1*stride]= 1.203 + src[1+3*stride]=(t2 + 2*t3 + t4 + 2)>>2; 1.204 + src[3+1*stride]= 1.205 + src[2+3*stride]=(t3 + 2*t4 + t5 + 2)>>2; 1.206 + src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2; 1.207 +} 1.208 + 1.209 +static void pred4x4_horizontal_up_c(uint8_t *src, uint8_t *topright, int stride){ 1.210 + (void) topright; 1.211 + LOAD_LEFT_EDGE 1.212 + 1.213 + src[0+0*stride]=(l0 + l1 + 1)>>1; 1.214 + src[1+0*stride]=(l0 + 2*l1 + l2 + 2)>>2; 1.215 + src[2+0*stride]= 1.216 + src[0+1*stride]=(l1 + l2 + 1)>>1; 1.217 + src[3+0*stride]= 1.218 + src[1+1*stride]=(l1 + 2*l2 + l3 + 2)>>2; 1.219 + src[2+1*stride]= 1.220 + src[0+2*stride]=(l2 + l3 + 1)>>1; 1.221 + src[3+1*stride]= 1.222 + src[1+2*stride]=(l2 + 2*l3 + l3 + 2)>>2; 1.223 + src[3+2*stride]= 1.224 + src[1+3*stride]= 1.225 + src[0+3*stride]= 1.226 + src[2+2*stride]= 1.227 + src[2+3*stride]= 1.228 + src[3+3*stride]=l3; 1.229 +} 1.230 + 1.231 + 1.232 +static void pred4x4_horizontal_down_c(uint8_t *src, uint8_t *topright, int stride){ 1.233 + (void) topright; 1.234 + const int lt= src[-1-1*stride]; 1.235 + LOAD_TOP_EDGE 1.236 + LOAD_LEFT_EDGE 1.237 + 1.238 + src[0+0*stride]= 1.239 + src[2+1*stride]=(lt + l0 + 1)>>1; 1.240 + src[1+0*stride]= 1.241 + src[3+1*stride]=(l0 + 2*lt + t0 + 2)>>2; 1.242 + src[2+0*stride]=(lt + 2*t0 + t1 + 2)>>2; 1.243 + src[3+0*stride]=(t0 + 2*t1 + t2 + 2)>>2; 1.244 + src[0+1*stride]= 1.245 + src[2+2*stride]=(l0 + l1 + 1)>>1; 1.246 + src[1+1*stride]= 1.247 + src[3+2*stride]=(lt + 2*l0 + l1 + 2)>>2; 1.248 + src[0+2*stride]= 1.249 + src[2+3*stride]=(l1 + l2+ 1)>>1; 1.250 + src[1+2*stride]= 1.251 + src[3+3*stride]=(l0 + 2*l1 + l2 + 2)>>2; 1.252 + src[0+3*stride]=(l2 + l3 + 1)>>1; 1.253 + src[1+3*stride]=(l1 + 2*l2 + l3 + 2)>>2; 1.254 +} 1.255 + 1.256 +static void pred16x16_vertical_c(uint8_t *src, int stride){ 1.257 + int i; 1.258 + const uint32_t a= ((uint32_t*)(src-stride))[0]; 1.259 + const uint32_t b= ((uint32_t*)(src-stride))[1]; 1.260 + const uint32_t c= ((uint32_t*)(src-stride))[2]; 1.261 + const uint32_t d= ((uint32_t*)(src-stride))[3]; 1.262 + 1.263 + for(i=0; i<16; i++){ 1.264 + ((uint32_t*)(src+i*stride))[0]= a; 1.265 + ((uint32_t*)(src+i*stride))[1]= b; 1.266 + ((uint32_t*)(src+i*stride))[2]= c; 1.267 + ((uint32_t*)(src+i*stride))[3]= d; 1.268 + } 1.269 +} 1.270 + 1.271 +static void pred16x16_horizontal_c(uint8_t *src, int stride){ 1.272 + int i; 1.273 + 1.274 + for(i=0; i<16; i++){ 1.275 + ((uint32_t*)(src+i*stride))[0]= 1.276 + ((uint32_t*)(src+i*stride))[1]= 1.277 + ((uint32_t*)(src+i*stride))[2]= 1.278 + ((uint32_t*)(src+i*stride))[3]= src[-1+i*stride]*0x01010101; 1.279 + } 1.280 +} 1.281 + 1.282 +static void pred16x16_dc_c(uint8_t *src, int stride){ 1.283 + int i, dc=0; 1.284 + 1.285 + for(i=0;i<16; i++){ 1.286 + dc+= src[-1+i*stride]; 1.287 + } 1.288 + 1.289 + for(i=0;i<16; i++){ 1.290 + dc+= src[i-stride]; 1.291 + } 1.292 + 1.293 + dc= 0x01010101*((dc + 16)>>5); 1.294 + 1.295 + for(i=0; i<16; i++){ 1.296 + ((uint32_t*)(src+i*stride))[0]= 1.297 + ((uint32_t*)(src+i*stride))[1]= 1.298 + ((uint32_t*)(src+i*stride))[2]= 1.299 + ((uint32_t*)(src+i*stride))[3]= dc; 1.300 + } 1.301 +} 1.302 + 1.303 +static void pred16x16_left_dc_c(uint8_t *src, int stride){ 1.304 + int i, dc=0; 1.305 + 1.306 + for(i=0;i<16; i++){ 1.307 + dc+= src[-1+i*stride]; 1.308 + } 1.309 + 1.310 + dc= 0x01010101*((dc + 8)>>4); 1.311 + 1.312 + for(i=0; i<16; i++){ 1.313 + ((uint32_t*)(src+i*stride))[0]= 1.314 + ((uint32_t*)(src+i*stride))[1]= 1.315 + ((uint32_t*)(src+i*stride))[2]= 1.316 + ((uint32_t*)(src+i*stride))[3]= dc; 1.317 + } 1.318 +} 1.319 + 1.320 +static void pred16x16_top_dc_c(uint8_t *src, int stride){ 1.321 + int i, dc=0; 1.322 + 1.323 + for(i=0;i<16; i++){ 1.324 + dc+= src[i-stride]; 1.325 + } 1.326 + dc= 0x01010101*((dc + 8)>>4); 1.327 + 1.328 + for(i=0; i<16; i++){ 1.329 + ((uint32_t*)(src+i*stride))[0]= 1.330 + ((uint32_t*)(src+i*stride))[1]= 1.331 + ((uint32_t*)(src+i*stride))[2]= 1.332 + ((uint32_t*)(src+i*stride))[3]= dc; 1.333 + } 1.334 +} 1.335 + 1.336 +static void pred16x16_128_dc_c(uint8_t *src, int stride){ 1.337 + int i; 1.338 + 1.339 + for(i=0; i<16; i++){ 1.340 + ((uint32_t*)(src+i*stride))[0]= 1.341 + ((uint32_t*)(src+i*stride))[1]= 1.342 + ((uint32_t*)(src+i*stride))[2]= 1.343 + ((uint32_t*)(src+i*stride))[3]= 0x01010101U*128U; 1.344 + } 1.345 +} 1.346 + 1.347 +static inline void pred16x16_plane_compat_c(uint8_t *src, int stride, const int svq3, const int rv40){ 1.348 + int i, j, k; 1.349 + int a; 1.350 + uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; 1.351 + const uint8_t * const src0 = src+7-stride; 1.352 + const uint8_t *src1 = src+8*stride-1; 1.353 + const uint8_t *src2 = src1-2*stride; // == src+6*stride-1; 1.354 + int H = src0[1] - src0[-1]; 1.355 + int V = src1[0] - src2[ 0]; 1.356 + for(k=2; k<=8; ++k) { 1.357 + src1 += stride; src2 -= stride; 1.358 + H += k*(src0[k] - src0[-k]); 1.359 + V += k*(src1[0] - src2[ 0]); 1.360 + } 1.361 + if(svq3){ 1.362 + H = ( 5*(H/4) ) / 16; 1.363 + V = ( 5*(V/4) ) / 16; 1.364 + 1.365 + /* required for 100% accuracy */ 1.366 + i = H; H = V; V = i; 1.367 + }else if(rv40){ 1.368 + H = ( H + (H>>2) ) >> 4; 1.369 + V = ( V + (V>>2) ) >> 4; 1.370 + }else{ 1.371 + H = ( 5*H+32 ) >> 6; 1.372 + V = ( 5*V+32 ) >> 6; 1.373 + } 1.374 + 1.375 + a = 16*(src1[0] + src2[16] + 1) - 7*(V+H); 1.376 + for(j=16; j>0; --j) { 1.377 + int b = a; 1.378 + a += V; 1.379 + for(i=-16; i<0; i+=4) { 1.380 + src[16+i] = cm[ (b ) >> 5 ]; 1.381 + src[17+i] = cm[ (b+ H) >> 5 ]; 1.382 + src[18+i] = cm[ (b+2*H) >> 5 ]; 1.383 + src[19+i] = cm[ (b+3*H) >> 5 ]; 1.384 + b += 4*H; 1.385 + } 1.386 + src += stride; 1.387 + } 1.388 +} 1.389 + 1.390 +static void pred16x16_plane_c(uint8_t *src, int stride){ 1.391 + pred16x16_plane_compat_c(src, stride, 0, 0); 1.392 +} 1.393 + 1.394 + 1.395 +static void pred8x8_vertical_c(uint8_t *src, int stride){ 1.396 + int i; 1.397 + const uint32_t a= ((uint32_t*)(src-stride))[0]; 1.398 + const uint32_t b= ((uint32_t*)(src-stride))[1]; 1.399 + 1.400 + for(i=0; i<8; i++){ 1.401 + ((uint32_t*)(src+i*stride))[0]= a; 1.402 + ((uint32_t*)(src+i*stride))[1]= b; 1.403 + } 1.404 +} 1.405 + 1.406 +static void pred8x8_horizontal_c(uint8_t *src, int stride){ 1.407 + int i; 1.408 + 1.409 + for(i=0; i<8; i++){ 1.410 + ((uint32_t*)(src+i*stride))[0]= 1.411 + ((uint32_t*)(src+i*stride))[1]= src[-1+i*stride]*0x01010101; 1.412 + } 1.413 +} 1.414 + 1.415 +static void pred8x8_128_dc_c(uint8_t *src, int stride){ 1.416 + int i; 1.417 + 1.418 + for(i=0; i<8; i++){ 1.419 + ((uint32_t*)(src+i*stride))[0]= 1.420 + ((uint32_t*)(src+i*stride))[1]= 0x01010101U*128U; 1.421 + } 1.422 +} 1.423 + 1.424 +static void pred8x8_left_dc_c(uint8_t *src, int stride){ 1.425 + int i; 1.426 + int dc0, dc2; 1.427 + 1.428 + dc0=dc2=0; 1.429 + for(i=0;i<4; i++){ 1.430 + dc0+= src[-1+i*stride]; 1.431 + dc2+= src[-1+(i+4)*stride]; 1.432 + } 1.433 + dc0= 0x01010101*((dc0 + 2)>>2); 1.434 + dc2= 0x01010101*((dc2 + 2)>>2); 1.435 + 1.436 + for(i=0; i<4; i++){ 1.437 + ((uint32_t*)(src+i*stride))[0]= 1.438 + ((uint32_t*)(src+i*stride))[1]= dc0; 1.439 + } 1.440 + for(i=4; i<8; i++){ 1.441 + ((uint32_t*)(src+i*stride))[0]= 1.442 + ((uint32_t*)(src+i*stride))[1]= dc2; 1.443 + } 1.444 +} 1.445 + 1.446 + 1.447 +static void pred8x8_top_dc_c(uint8_t *src, int stride){ 1.448 + int i; 1.449 + int dc0, dc1; 1.450 + 1.451 + dc0=dc1=0; 1.452 + for(i=0;i<4; i++){ 1.453 + dc0+= src[i-stride]; 1.454 + dc1+= src[4+i-stride]; 1.455 + } 1.456 + dc0= 0x01010101*((dc0 + 2)>>2); 1.457 + dc1= 0x01010101*((dc1 + 2)>>2); 1.458 + 1.459 + for(i=0; i<4; i++){ 1.460 + ((uint32_t*)(src+i*stride))[0]= dc0; 1.461 + ((uint32_t*)(src+i*stride))[1]= dc1; 1.462 + } 1.463 + for(i=4; i<8; i++){ 1.464 + ((uint32_t*)(src+i*stride))[0]= dc0; 1.465 + ((uint32_t*)(src+i*stride))[1]= dc1; 1.466 + } 1.467 +} 1.468 + 1.469 +static void pred8x8_dc_c(uint8_t *src, int stride){ 1.470 + int i; 1.471 + int dc0, dc1, dc2, dc3; 1.472 + 1.473 + dc0=dc1=dc2=0; 1.474 + for(i=0;i<4; i++){ 1.475 + dc0+= src[-1+i*stride] + src[i-stride]; 1.476 + dc1+= src[4+i-stride]; 1.477 + dc2+= src[-1+(i+4)*stride]; 1.478 + } 1.479 + dc3= 0x01010101*((dc1 + dc2 + 4)>>3); 1.480 + dc0= 0x01010101*((dc0 + 4)>>3); 1.481 + dc1= 0x01010101*((dc1 + 2)>>2); 1.482 + dc2= 0x01010101*((dc2 + 2)>>2); 1.483 + 1.484 + for(i=0; i<4; i++){ 1.485 + ((uint32_t*)(src+i*stride))[0]= dc0; 1.486 + ((uint32_t*)(src+i*stride))[1]= dc1; 1.487 + } 1.488 + for(i=4; i<8; i++){ 1.489 + ((uint32_t*)(src+i*stride))[0]= dc2; 1.490 + ((uint32_t*)(src+i*stride))[1]= dc3; 1.491 + } 1.492 +} 1.493 + 1.494 +//the following 4 function should not be optimized! 1.495 +static void pred8x8_mad_cow_dc_l0t(uint8_t *src, int stride){ 1.496 + pred8x8_top_dc_c(src, stride); 1.497 + pred4x4_dc_c(src, NULL, stride); 1.498 +} 1.499 + 1.500 +static void pred8x8_mad_cow_dc_0lt(uint8_t *src, int stride){ 1.501 + pred8x8_dc_c(src, stride); 1.502 + pred4x4_top_dc_c(src, NULL, stride); 1.503 +} 1.504 + 1.505 +static void pred8x8_mad_cow_dc_l00(uint8_t *src, int stride){ 1.506 + pred8x8_left_dc_c(src, stride); 1.507 + pred4x4_128_dc_c(src + 4*stride , NULL, stride); 1.508 + pred4x4_128_dc_c(src + 4*stride + 4, NULL, stride); 1.509 +} 1.510 + 1.511 +static void pred8x8_mad_cow_dc_0l0(uint8_t *src, int stride){ 1.512 + pred8x8_left_dc_c(src, stride); 1.513 + pred4x4_128_dc_c(src , NULL, stride); 1.514 + pred4x4_128_dc_c(src + 4, NULL, stride); 1.515 +} 1.516 + 1.517 +static void pred8x8_plane_c(uint8_t *src, int stride){ 1.518 + int j, k; 1.519 + int a; 1.520 + uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; 1.521 + const uint8_t * const src0 = src+3-stride; 1.522 + const uint8_t *src1 = src+4*stride-1; 1.523 + const uint8_t *src2 = src1-2*stride; // == src+2*stride-1; 1.524 + int H = src0[1] - src0[-1]; 1.525 + int V = src1[0] - src2[ 0]; 1.526 + for(k=2; k<=4; ++k) { 1.527 + src1 += stride; src2 -= stride; 1.528 + H += k*(src0[k] - src0[-k]); 1.529 + V += k*(src1[0] - src2[ 0]); 1.530 + } 1.531 + H = ( 17*H+16 ) >> 5; 1.532 + V = ( 17*V+16 ) >> 5; 1.533 + 1.534 + a = 16*(src1[0] + src2[8]+1) - 3*(V+H); 1.535 + for(j=8; j>0; --j) { 1.536 + int b = a; 1.537 + a += V; 1.538 + src[0] = cm[ (b ) >> 5 ]; 1.539 + src[1] = cm[ (b+ H) >> 5 ]; 1.540 + src[2] = cm[ (b+2*H) >> 5 ]; 1.541 + src[3] = cm[ (b+3*H) >> 5 ]; 1.542 + src[4] = cm[ (b+4*H) >> 5 ]; 1.543 + src[5] = cm[ (b+5*H) >> 5 ]; 1.544 + src[6] = cm[ (b+6*H) >> 5 ]; 1.545 + src[7] = cm[ (b+7*H) >> 5 ]; 1.546 + src += stride; 1.547 + } 1.548 +} 1.549 + 1.550 +#define SRC(x,y) src[(x)+(y)*stride] 1.551 +#define PL(y) \ 1.552 + const int l##y = (SRC(-1,y-1) + 2*SRC(-1,y) + SRC(-1,y+1) + 2) >> 2; 1.553 +#define PREDICT_8x8_LOAD_LEFT \ 1.554 + const int l0 = ((has_topleft ? SRC(-1,-1) : SRC(-1,0)) \ 1.555 + + 2*SRC(-1,0) + SRC(-1,1) + 2) >> 2; \ 1.556 + PL(1) PL(2) PL(3) PL(4) PL(5) PL(6) \ 1.557 + const int l7 av_unused = (SRC(-1,6) + 3*SRC(-1,7) + 2) >> 2 1.558 + 1.559 +#define PT(x) \ 1.560 + const int t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2; 1.561 +#define PREDICT_8x8_LOAD_TOP \ 1.562 + const int t0 = ((has_topleft ? SRC(-1,-1) : SRC(0,-1)) \ 1.563 + + 2*SRC(0,-1) + SRC(1,-1) + 2) >> 2; \ 1.564 + PT(1) PT(2) PT(3) PT(4) PT(5) PT(6) \ 1.565 + const int t7 av_unused = ((has_topright ? SRC(8,-1) : SRC(7,-1)) \ 1.566 + + 2*SRC(7,-1) + SRC(6,-1) + 2) >> 2 1.567 + 1.568 +#define PTR(x) \ 1.569 + t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2; 1.570 +#define PREDICT_8x8_LOAD_TOPRIGHT \ 1.571 + int t8, t9, t10, t11, t12, t13, t14, t15; \ 1.572 + if(has_topright) { \ 1.573 + PTR(8) PTR(9) PTR(10) PTR(11) PTR(12) PTR(13) PTR(14) \ 1.574 + t15 = (SRC(14,-1) + 3*SRC(15,-1) + 2) >> 2; \ 1.575 + } else t8=t9=t10=t11=t12=t13=t14=t15= SRC(7,-1); 1.576 + 1.577 +#define PREDICT_8x8_LOAD_TOPLEFT \ 1.578 + const int lt = (SRC(-1,0) + 2*SRC(-1,-1) + SRC(0,-1) + 2) >> 2 1.579 + 1.580 +#define PREDICT_8x8_DC(v) \ 1.581 + int y; \ 1.582 + for( y = 0; y < 8; y++ ) { \ 1.583 + ((uint32_t*)src)[0] = \ 1.584 + ((uint32_t*)src)[1] = v; \ 1.585 + src += stride; \ 1.586 + } 1.587 + 1.588 +static void pred8x8l_128_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride){ 1.589 + (void) has_topleft; (void) has_topright; 1.590 + PREDICT_8x8_DC(0x80808080); 1.591 +} 1.592 + 1.593 +static void pred8x8l_left_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride){ 1.594 + (void) has_topleft; (void) has_topright; 1.595 + PREDICT_8x8_LOAD_LEFT; 1.596 + const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7+4) >> 3) * 0x01010101; 1.597 + PREDICT_8x8_DC(dc); 1.598 +} 1.599 + 1.600 +static void pred8x8l_top_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride){ 1.601 + PREDICT_8x8_LOAD_TOP; 1.602 + const uint32_t dc = ((t0+t1+t2+t3+t4+t5+t6+t7+4) >> 3) * 0x01010101; 1.603 + PREDICT_8x8_DC(dc); 1.604 +} 1.605 + 1.606 +static void pred8x8l_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride){ 1.607 + PREDICT_8x8_LOAD_LEFT; 1.608 + PREDICT_8x8_LOAD_TOP; 1.609 + const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7 1.610 + +t0+t1+t2+t3+t4+t5+t6+t7+8) >> 4) * 0x01010101; 1.611 + PREDICT_8x8_DC(dc); 1.612 +} 1.613 + 1.614 +static void pred8x8l_horizontal_c(uint8_t *src, int has_topleft, int has_topright, int stride){ 1.615 + (void) has_topleft; (void) has_topright; 1.616 + PREDICT_8x8_LOAD_LEFT; 1.617 +#define ROW(y) ((uint32_t*)(src+y*stride))[0] =\ 1.618 + ((uint32_t*)(src+y*stride))[1] = 0x01010101 * l##y 1.619 + ROW(0); ROW(1); ROW(2); ROW(3); ROW(4); ROW(5); ROW(6); ROW(7); 1.620 +#undef ROW 1.621 +} 1.622 + 1.623 +static void pred8x8l_vertical_c(uint8_t *src, int has_topleft, int has_topright, int stride){ 1.624 + int y; 1.625 + PREDICT_8x8_LOAD_TOP; 1.626 + src[0] = t0; 1.627 + src[1] = t1; 1.628 + src[2] = t2; 1.629 + src[3] = t3; 1.630 + src[4] = t4; 1.631 + src[5] = t5; 1.632 + src[6] = t6; 1.633 + src[7] = t7; 1.634 + for( y = 1; y < 8; y++ ) 1.635 + *(uint64_t*)(src+y*stride) = *(uint64_t*)src; 1.636 +} 1.637 + 1.638 +static void pred8x8l_down_left_c(uint8_t *src, int has_topleft, int has_topright, int stride){ 1.639 + PREDICT_8x8_LOAD_TOP; 1.640 + PREDICT_8x8_LOAD_TOPRIGHT; 1.641 + SRC(0,0)= (t0 + 2*t1 + t2 + 2) >> 2; 1.642 + SRC(0,1)=SRC(1,0)= (t1 + 2*t2 + t3 + 2) >> 2; 1.643 + SRC(0,2)=SRC(1,1)=SRC(2,0)= (t2 + 2*t3 + t4 + 2) >> 2; 1.644 + SRC(0,3)=SRC(1,2)=SRC(2,1)=SRC(3,0)= (t3 + 2*t4 + t5 + 2) >> 2; 1.645 + SRC(0,4)=SRC(1,3)=SRC(2,2)=SRC(3,1)=SRC(4,0)= (t4 + 2*t5 + t6 + 2) >> 2; 1.646 + SRC(0,5)=SRC(1,4)=SRC(2,3)=SRC(3,2)=SRC(4,1)=SRC(5,0)= (t5 + 2*t6 + t7 + 2) >> 2; 1.647 + SRC(0,6)=SRC(1,5)=SRC(2,4)=SRC(3,3)=SRC(4,2)=SRC(5,1)=SRC(6,0)= (t6 + 2*t7 + t8 + 2) >> 2; 1.648 + SRC(0,7)=SRC(1,6)=SRC(2,5)=SRC(3,4)=SRC(4,3)=SRC(5,2)=SRC(6,1)=SRC(7,0)= (t7 + 2*t8 + t9 + 2) >> 2; 1.649 + SRC(1,7)=SRC(2,6)=SRC(3,5)=SRC(4,4)=SRC(5,3)=SRC(6,2)=SRC(7,1)= (t8 + 2*t9 + t10 + 2) >> 2; 1.650 + SRC(2,7)=SRC(3,6)=SRC(4,5)=SRC(5,4)=SRC(6,3)=SRC(7,2)= (t9 + 2*t10 + t11 + 2) >> 2; 1.651 + SRC(3,7)=SRC(4,6)=SRC(5,5)=SRC(6,4)=SRC(7,3)= (t10 + 2*t11 + t12 + 2) >> 2; 1.652 + SRC(4,7)=SRC(5,6)=SRC(6,5)=SRC(7,4)= (t11 + 2*t12 + t13 + 2) >> 2; 1.653 + SRC(5,7)=SRC(6,6)=SRC(7,5)= (t12 + 2*t13 + t14 + 2) >> 2; 1.654 + SRC(6,7)=SRC(7,6)= (t13 + 2*t14 + t15 + 2) >> 2; 1.655 + SRC(7,7)= (t14 + 3*t15 + 2) >> 2; 1.656 +} 1.657 + 1.658 +static void pred8x8l_down_right_c(uint8_t *src, int has_topleft, int has_topright, int stride){ 1.659 + PREDICT_8x8_LOAD_TOP; 1.660 + PREDICT_8x8_LOAD_LEFT; 1.661 + PREDICT_8x8_LOAD_TOPLEFT; 1.662 + SRC(0,7)= (l7 + 2*l6 + l5 + 2) >> 2; 1.663 + SRC(0,6)=SRC(1,7)= (l6 + 2*l5 + l4 + 2) >> 2; 1.664 + SRC(0,5)=SRC(1,6)=SRC(2,7)= (l5 + 2*l4 + l3 + 2) >> 2; 1.665 + SRC(0,4)=SRC(1,5)=SRC(2,6)=SRC(3,7)= (l4 + 2*l3 + l2 + 2) >> 2; 1.666 + SRC(0,3)=SRC(1,4)=SRC(2,5)=SRC(3,6)=SRC(4,7)= (l3 + 2*l2 + l1 + 2) >> 2; 1.667 + SRC(0,2)=SRC(1,3)=SRC(2,4)=SRC(3,5)=SRC(4,6)=SRC(5,7)= (l2 + 2*l1 + l0 + 2) >> 2; 1.668 + SRC(0,1)=SRC(1,2)=SRC(2,3)=SRC(3,4)=SRC(4,5)=SRC(5,6)=SRC(6,7)= (l1 + 2*l0 + lt + 2) >> 2; 1.669 + SRC(0,0)=SRC(1,1)=SRC(2,2)=SRC(3,3)=SRC(4,4)=SRC(5,5)=SRC(6,6)=SRC(7,7)= (l0 + 2*lt + t0 + 2) >> 2; 1.670 + SRC(1,0)=SRC(2,1)=SRC(3,2)=SRC(4,3)=SRC(5,4)=SRC(6,5)=SRC(7,6)= (lt + 2*t0 + t1 + 2) >> 2; 1.671 + SRC(2,0)=SRC(3,1)=SRC(4,2)=SRC(5,3)=SRC(6,4)=SRC(7,5)= (t0 + 2*t1 + t2 + 2) >> 2; 1.672 + SRC(3,0)=SRC(4,1)=SRC(5,2)=SRC(6,3)=SRC(7,4)= (t1 + 2*t2 + t3 + 2) >> 2; 1.673 + SRC(4,0)=SRC(5,1)=SRC(6,2)=SRC(7,3)= (t2 + 2*t3 + t4 + 2) >> 2; 1.674 + SRC(5,0)=SRC(6,1)=SRC(7,2)= (t3 + 2*t4 + t5 + 2) >> 2; 1.675 + SRC(6,0)=SRC(7,1)= (t4 + 2*t5 + t6 + 2) >> 2; 1.676 + SRC(7,0)= (t5 + 2*t6 + t7 + 2) >> 2; 1.677 +} 1.678 + 1.679 +static void pred8x8l_vertical_right_c(uint8_t *src, int has_topleft, int has_topright, int stride){ 1.680 + PREDICT_8x8_LOAD_TOP; 1.681 + PREDICT_8x8_LOAD_LEFT; 1.682 + PREDICT_8x8_LOAD_TOPLEFT; 1.683 + SRC(0,6)= (l5 + 2*l4 + l3 + 2) >> 2; 1.684 + SRC(0,7)= (l6 + 2*l5 + l4 + 2) >> 2; 1.685 + SRC(0,4)=SRC(1,6)= (l3 + 2*l2 + l1 + 2) >> 2; 1.686 + SRC(0,5)=SRC(1,7)= (l4 + 2*l3 + l2 + 2) >> 2; 1.687 + SRC(0,2)=SRC(1,4)=SRC(2,6)= (l1 + 2*l0 + lt + 2) >> 2; 1.688 + SRC(0,3)=SRC(1,5)=SRC(2,7)= (l2 + 2*l1 + l0 + 2) >> 2; 1.689 + SRC(0,1)=SRC(1,3)=SRC(2,5)=SRC(3,7)= (l0 + 2*lt + t0 + 2) >> 2; 1.690 + SRC(0,0)=SRC(1,2)=SRC(2,4)=SRC(3,6)= (lt + t0 + 1) >> 1; 1.691 + SRC(1,1)=SRC(2,3)=SRC(3,5)=SRC(4,7)= (lt + 2*t0 + t1 + 2) >> 2; 1.692 + SRC(1,0)=SRC(2,2)=SRC(3,4)=SRC(4,6)= (t0 + t1 + 1) >> 1; 1.693 + SRC(2,1)=SRC(3,3)=SRC(4,5)=SRC(5,7)= (t0 + 2*t1 + t2 + 2) >> 2; 1.694 + SRC(2,0)=SRC(3,2)=SRC(4,4)=SRC(5,6)= (t1 + t2 + 1) >> 1; 1.695 + SRC(3,1)=SRC(4,3)=SRC(5,5)=SRC(6,7)= (t1 + 2*t2 + t3 + 2) >> 2; 1.696 + SRC(3,0)=SRC(4,2)=SRC(5,4)=SRC(6,6)= (t2 + t3 + 1) >> 1; 1.697 + SRC(4,1)=SRC(5,3)=SRC(6,5)=SRC(7,7)= (t2 + 2*t3 + t4 + 2) >> 2; 1.698 + SRC(4,0)=SRC(5,2)=SRC(6,4)=SRC(7,6)= (t3 + t4 + 1) >> 1; 1.699 + SRC(5,1)=SRC(6,3)=SRC(7,5)= (t3 + 2*t4 + t5 + 2) >> 2; 1.700 + SRC(5,0)=SRC(6,2)=SRC(7,4)= (t4 + t5 + 1) >> 1; 1.701 + SRC(6,1)=SRC(7,3)= (t4 + 2*t5 + t6 + 2) >> 2; 1.702 + SRC(6,0)=SRC(7,2)= (t5 + t6 + 1) >> 1; 1.703 + SRC(7,1)= (t5 + 2*t6 + t7 + 2) >> 2; 1.704 + SRC(7,0)= (t6 + t7 + 1) >> 1; 1.705 +} 1.706 + 1.707 +static void pred8x8l_horizontal_down_c(uint8_t *src, int has_topleft, int has_topright, int stride){ 1.708 + PREDICT_8x8_LOAD_TOP; 1.709 + PREDICT_8x8_LOAD_LEFT; 1.710 + PREDICT_8x8_LOAD_TOPLEFT; 1.711 + SRC(0,7)= (l6 + l7 + 1) >> 1; 1.712 + SRC(1,7)= (l5 + 2*l6 + l7 + 2) >> 2; 1.713 + SRC(0,6)=SRC(2,7)= (l5 + l6 + 1) >> 1; 1.714 + SRC(1,6)=SRC(3,7)= (l4 + 2*l5 + l6 + 2) >> 2; 1.715 + SRC(0,5)=SRC(2,6)=SRC(4,7)= (l4 + l5 + 1) >> 1; 1.716 + SRC(1,5)=SRC(3,6)=SRC(5,7)= (l3 + 2*l4 + l5 + 2) >> 2; 1.717 + SRC(0,4)=SRC(2,5)=SRC(4,6)=SRC(6,7)= (l3 + l4 + 1) >> 1; 1.718 + SRC(1,4)=SRC(3,5)=SRC(5,6)=SRC(7,7)= (l2 + 2*l3 + l4 + 2) >> 2; 1.719 + SRC(0,3)=SRC(2,4)=SRC(4,5)=SRC(6,6)= (l2 + l3 + 1) >> 1; 1.720 + SRC(1,3)=SRC(3,4)=SRC(5,5)=SRC(7,6)= (l1 + 2*l2 + l3 + 2) >> 2; 1.721 + SRC(0,2)=SRC(2,3)=SRC(4,4)=SRC(6,5)= (l1 + l2 + 1) >> 1; 1.722 + SRC(1,2)=SRC(3,3)=SRC(5,4)=SRC(7,5)= (l0 + 2*l1 + l2 + 2) >> 2; 1.723 + SRC(0,1)=SRC(2,2)=SRC(4,3)=SRC(6,4)= (l0 + l1 + 1) >> 1; 1.724 + SRC(1,1)=SRC(3,2)=SRC(5,3)=SRC(7,4)= (lt + 2*l0 + l1 + 2) >> 2; 1.725 + SRC(0,0)=SRC(2,1)=SRC(4,2)=SRC(6,3)= (lt + l0 + 1) >> 1; 1.726 + SRC(1,0)=SRC(3,1)=SRC(5,2)=SRC(7,3)= (l0 + 2*lt + t0 + 2) >> 2; 1.727 + SRC(2,0)=SRC(4,1)=SRC(6,2)= (t1 + 2*t0 + lt + 2) >> 2; 1.728 + SRC(3,0)=SRC(5,1)=SRC(7,2)= (t2 + 2*t1 + t0 + 2) >> 2; 1.729 + SRC(4,0)=SRC(6,1)= (t3 + 2*t2 + t1 + 2) >> 2; 1.730 + SRC(5,0)=SRC(7,1)= (t4 + 2*t3 + t2 + 2) >> 2; 1.731 + SRC(6,0)= (t5 + 2*t4 + t3 + 2) >> 2; 1.732 + SRC(7,0)= (t6 + 2*t5 + t4 + 2) >> 2; 1.733 +} 1.734 + 1.735 +static void pred8x8l_vertical_left_c(uint8_t *src, int has_topleft, int has_topright, int stride){ 1.736 + PREDICT_8x8_LOAD_TOP; 1.737 + PREDICT_8x8_LOAD_TOPRIGHT; 1.738 + SRC(0,0)= (t0 + t1 + 1) >> 1; 1.739 + SRC(0,1)= (t0 + 2*t1 + t2 + 2) >> 2; 1.740 + SRC(0,2)=SRC(1,0)= (t1 + t2 + 1) >> 1; 1.741 + SRC(0,3)=SRC(1,1)= (t1 + 2*t2 + t3 + 2) >> 2; 1.742 + SRC(0,4)=SRC(1,2)=SRC(2,0)= (t2 + t3 + 1) >> 1; 1.743 + SRC(0,5)=SRC(1,3)=SRC(2,1)= (t2 + 2*t3 + t4 + 2) >> 2; 1.744 + SRC(0,6)=SRC(1,4)=SRC(2,2)=SRC(3,0)= (t3 + t4 + 1) >> 1; 1.745 + SRC(0,7)=SRC(1,5)=SRC(2,3)=SRC(3,1)= (t3 + 2*t4 + t5 + 2) >> 2; 1.746 + SRC(1,6)=SRC(2,4)=SRC(3,2)=SRC(4,0)= (t4 + t5 + 1) >> 1; 1.747 + SRC(1,7)=SRC(2,5)=SRC(3,3)=SRC(4,1)= (t4 + 2*t5 + t6 + 2) >> 2; 1.748 + SRC(2,6)=SRC(3,4)=SRC(4,2)=SRC(5,0)= (t5 + t6 + 1) >> 1; 1.749 + SRC(2,7)=SRC(3,5)=SRC(4,3)=SRC(5,1)= (t5 + 2*t6 + t7 + 2) >> 2; 1.750 + SRC(3,6)=SRC(4,4)=SRC(5,2)=SRC(6,0)= (t6 + t7 + 1) >> 1; 1.751 + SRC(3,7)=SRC(4,5)=SRC(5,3)=SRC(6,1)= (t6 + 2*t7 + t8 + 2) >> 2; 1.752 + SRC(4,6)=SRC(5,4)=SRC(6,2)=SRC(7,0)= (t7 + t8 + 1) >> 1; 1.753 + SRC(4,7)=SRC(5,5)=SRC(6,3)=SRC(7,1)= (t7 + 2*t8 + t9 + 2) >> 2; 1.754 + SRC(5,6)=SRC(6,4)=SRC(7,2)= (t8 + t9 + 1) >> 1; 1.755 + SRC(5,7)=SRC(6,5)=SRC(7,3)= (t8 + 2*t9 + t10 + 2) >> 2; 1.756 + SRC(6,6)=SRC(7,4)= (t9 + t10 + 1) >> 1; 1.757 + SRC(6,7)=SRC(7,5)= (t9 + 2*t10 + t11 + 2) >> 2; 1.758 + SRC(7,6)= (t10 + t11 + 1) >> 1; 1.759 + SRC(7,7)= (t10 + 2*t11 + t12 + 2) >> 2; 1.760 +} 1.761 + 1.762 +static void pred8x8l_horizontal_up_c(uint8_t *src, int has_topleft, int has_topright, int stride){ 1.763 + (void) has_topleft; (void) has_topright; 1.764 + PREDICT_8x8_LOAD_LEFT; 1.765 + SRC(0,0)= (l0 + l1 + 1) >> 1; 1.766 + SRC(1,0)= (l0 + 2*l1 + l2 + 2) >> 2; 1.767 + SRC(0,1)=SRC(2,0)= (l1 + l2 + 1) >> 1; 1.768 + SRC(1,1)=SRC(3,0)= (l1 + 2*l2 + l3 + 2) >> 2; 1.769 + SRC(0,2)=SRC(2,1)=SRC(4,0)= (l2 + l3 + 1) >> 1; 1.770 + SRC(1,2)=SRC(3,1)=SRC(5,0)= (l2 + 2*l3 + l4 + 2) >> 2; 1.771 + SRC(0,3)=SRC(2,2)=SRC(4,1)=SRC(6,0)= (l3 + l4 + 1) >> 1; 1.772 + SRC(1,3)=SRC(3,2)=SRC(5,1)=SRC(7,0)= (l3 + 2*l4 + l5 + 2) >> 2; 1.773 + SRC(0,4)=SRC(2,3)=SRC(4,2)=SRC(6,1)= (l4 + l5 + 1) >> 1; 1.774 + SRC(1,4)=SRC(3,3)=SRC(5,2)=SRC(7,1)= (l4 + 2*l5 + l6 + 2) >> 2; 1.775 + SRC(0,5)=SRC(2,4)=SRC(4,3)=SRC(6,2)= (l5 + l6 + 1) >> 1; 1.776 + SRC(1,5)=SRC(3,4)=SRC(5,3)=SRC(7,2)= (l5 + 2*l6 + l7 + 2) >> 2; 1.777 + SRC(0,6)=SRC(2,5)=SRC(4,4)=SRC(6,3)= (l6 + l7 + 1) >> 1; 1.778 + SRC(1,6)=SRC(3,5)=SRC(5,4)=SRC(7,3)= (l6 + 3*l7 + 2) >> 2; 1.779 + SRC(0,7)=SRC(1,7)=SRC(2,6)=SRC(2,7)=SRC(3,6)= 1.780 + SRC(3,7)=SRC(4,5)=SRC(4,6)=SRC(4,7)=SRC(5,5)= 1.781 + SRC(5,6)=SRC(5,7)=SRC(6,4)=SRC(6,5)=SRC(6,6)= 1.782 + SRC(6,7)=SRC(7,4)=SRC(7,5)=SRC(7,6)=SRC(7,7)= l7; 1.783 +} 1.784 +#undef PREDICT_8x8_LOAD_LEFT 1.785 +#undef PREDICT_8x8_LOAD_TOP 1.786 +#undef PREDICT_8x8_LOAD_TOPLEFT 1.787 +#undef PREDICT_8x8_LOAD_TOPRIGHT 1.788 +#undef PREDICT_8x8_DC 1.789 +#undef PTR 1.790 +#undef PT 1.791 +#undef PL 1.792 +#undef SRC 1.793 + 1.794 +static void pred4x4_vertical_add_c(uint8_t *pix, const DCTELEM *block, int stride){ 1.795 + int i; 1.796 + pix -= stride; 1.797 + for(i=0; i<4; i++){ 1.798 + uint8_t v = pix[0]; 1.799 + pix[1*stride]= v += block[0]; 1.800 + pix[2*stride]= v += block[4]; 1.801 + pix[3*stride]= v += block[8]; 1.802 + pix[4*stride]= v + block[12]; 1.803 + pix++; 1.804 + block++; 1.805 + } 1.806 +} 1.807 + 1.808 +static void pred4x4_horizontal_add_c(uint8_t *pix, const DCTELEM *block, int stride){ 1.809 + int i; 1.810 + for(i=0; i<4; i++){ 1.811 + uint8_t v = pix[-1]; 1.812 + pix[0]= v += block[0]; 1.813 + pix[1]= v += block[1]; 1.814 + pix[2]= v += block[2]; 1.815 + pix[3]= v + block[3]; 1.816 + pix+= stride; 1.817 + block+= 4; 1.818 + } 1.819 +} 1.820 + 1.821 +static void pred8x8l_vertical_add_c(uint8_t *pix, const DCTELEM *block, int stride){ 1.822 + int i; 1.823 + pix -= stride; 1.824 + for(i=0; i<8; i++){ 1.825 + uint8_t v = pix[0]; 1.826 + pix[1*stride]= v += block[0]; 1.827 + pix[2*stride]= v += block[8]; 1.828 + pix[3*stride]= v += block[16]; 1.829 + pix[4*stride]= v += block[24]; 1.830 + pix[5*stride]= v += block[32]; 1.831 + pix[6*stride]= v += block[40]; 1.832 + pix[7*stride]= v += block[48]; 1.833 + pix[8*stride]= v + block[56]; 1.834 + pix++; 1.835 + block++; 1.836 + } 1.837 +} 1.838 + 1.839 +static void pred8x8l_horizontal_add_c(uint8_t *pix, const DCTELEM *block, int stride){ 1.840 + int i; 1.841 + for(i=0; i<8; i++){ 1.842 + uint8_t v = pix[-1]; 1.843 + pix[0]= v += block[0]; 1.844 + pix[1]= v += block[1]; 1.845 + pix[2]= v += block[2]; 1.846 + pix[3]= v += block[3]; 1.847 + pix[4]= v += block[4]; 1.848 + pix[5]= v += block[5]; 1.849 + pix[6]= v += block[6]; 1.850 + pix[7]= v + block[7]; 1.851 + pix+= stride; 1.852 + block+= 8; 1.853 + } 1.854 +} 1.855 + 1.856 +static void pred16x16_vertical_add_c(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){ 1.857 + int i; 1.858 + for(i=0; i<16; i++) 1.859 + pred4x4_vertical_add_c(pix + block_offset[i], block + i*16, stride); 1.860 +} 1.861 + 1.862 +static void pred16x16_horizontal_add_c(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){ 1.863 + int i; 1.864 + for(i=0; i<16; i++) 1.865 + pred4x4_horizontal_add_c(pix + block_offset[i], block + i*16, stride); 1.866 +} 1.867 + 1.868 +static void pred8x8_vertical_add_c(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){ 1.869 + int i; 1.870 + for(i=0; i<4; i++) 1.871 + pred4x4_vertical_add_c(pix + block_offset[i], block + i*16, stride); 1.872 +} 1.873 + 1.874 +static void pred8x8_horizontal_add_c(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){ 1.875 + int i; 1.876 + for(i=0; i<4; i++) 1.877 + pred4x4_horizontal_add_c(pix + block_offset[i], block + i*16, stride); 1.878 +} 1.879 + 1.880 + 1.881 +/** 1.882 + * Sets the intra prediction function pointers. 1.883 + */ 1.884 +void ff_h264_pred_init(H264PredContext *h){ 1.885 + 1.886 + h->pred4x4[VERT_PRED ]= pred4x4_vertical_c; 1.887 + h->pred4x4[HOR_PRED ]= pred4x4_horizontal_c; 1.888 + h->pred4x4[DC_PRED ]= pred4x4_dc_c; 1.889 + h->pred4x4[DIAG_DOWN_LEFT_PRED ]= pred4x4_down_left_c; 1.890 + h->pred4x4[DIAG_DOWN_RIGHT_PRED]= pred4x4_down_right_c; 1.891 + h->pred4x4[VERT_RIGHT_PRED ]= pred4x4_vertical_right_c; 1.892 + h->pred4x4[HOR_DOWN_PRED ]= pred4x4_horizontal_down_c; 1.893 + h->pred4x4[VERT_LEFT_PRED ]= pred4x4_vertical_left_c; 1.894 + h->pred4x4[HOR_UP_PRED ]= pred4x4_horizontal_up_c; 1.895 + h->pred4x4[LEFT_DC_PRED ]= pred4x4_left_dc_c; 1.896 + h->pred4x4[TOP_DC_PRED ]= pred4x4_top_dc_c; 1.897 + h->pred4x4[DC_128_PRED ]= pred4x4_128_dc_c; 1.898 + 1.899 + h->pred8x8l[VERT_PRED ]= pred8x8l_vertical_c; 1.900 + h->pred8x8l[HOR_PRED ]= pred8x8l_horizontal_c; 1.901 + h->pred8x8l[DC_PRED ]= pred8x8l_dc_c; 1.902 + h->pred8x8l[DIAG_DOWN_LEFT_PRED ]= pred8x8l_down_left_c; 1.903 + h->pred8x8l[DIAG_DOWN_RIGHT_PRED]= pred8x8l_down_right_c; 1.904 + h->pred8x8l[VERT_RIGHT_PRED ]= pred8x8l_vertical_right_c; 1.905 + h->pred8x8l[HOR_DOWN_PRED ]= pred8x8l_horizontal_down_c; 1.906 + h->pred8x8l[VERT_LEFT_PRED ]= pred8x8l_vertical_left_c; 1.907 + h->pred8x8l[HOR_UP_PRED ]= pred8x8l_horizontal_up_c; 1.908 + h->pred8x8l[LEFT_DC_PRED ]= pred8x8l_left_dc_c; 1.909 + h->pred8x8l[TOP_DC_PRED ]= pred8x8l_top_dc_c; 1.910 + h->pred8x8l[DC_128_PRED ]= pred8x8l_128_dc_c; 1.911 + 1.912 + h->pred8x8[VERT_PRED8x8 ]= pred8x8_vertical_c; 1.913 + h->pred8x8[HOR_PRED8x8 ]= pred8x8_horizontal_c; 1.914 + h->pred8x8[PLANE_PRED8x8 ]= pred8x8_plane_c; 1.915 + 1.916 + h->pred8x8[DC_PRED8x8 ]= pred8x8_dc_c; 1.917 + h->pred8x8[LEFT_DC_PRED8x8]= pred8x8_left_dc_c; 1.918 + h->pred8x8[TOP_DC_PRED8x8 ]= pred8x8_top_dc_c; 1.919 + h->pred8x8[ALZHEIMER_DC_L0T_PRED8x8 ]= pred8x8_mad_cow_dc_l0t; 1.920 + h->pred8x8[ALZHEIMER_DC_0LT_PRED8x8 ]= pred8x8_mad_cow_dc_0lt; 1.921 + h->pred8x8[ALZHEIMER_DC_L00_PRED8x8 ]= pred8x8_mad_cow_dc_l00; 1.922 + h->pred8x8[ALZHEIMER_DC_0L0_PRED8x8 ]= pred8x8_mad_cow_dc_0l0; 1.923 + 1.924 + h->pred8x8[DC_128_PRED8x8 ]= pred8x8_128_dc_c; 1.925 + 1.926 + h->pred16x16[DC_PRED8x8 ]= pred16x16_dc_c; 1.927 + h->pred16x16[VERT_PRED8x8 ]= pred16x16_vertical_c; 1.928 + h->pred16x16[HOR_PRED8x8 ]= pred16x16_horizontal_c; 1.929 + h->pred16x16[PLANE_PRED8x8 ]= pred16x16_plane_c; 1.930 + 1.931 + h->pred16x16[PLANE_PRED8x8 ]= pred16x16_plane_c; 1.932 + 1.933 + h->pred16x16[LEFT_DC_PRED8x8]= pred16x16_left_dc_c; 1.934 + h->pred16x16[TOP_DC_PRED8x8 ]= pred16x16_top_dc_c; 1.935 + h->pred16x16[DC_128_PRED8x8 ]= pred16x16_128_dc_c; 1.936 + 1.937 + //special lossless h/v prediction for h264 1.938 + h->pred4x4_add [VERT_PRED ]= pred4x4_vertical_add_c; 1.939 + h->pred4x4_add [ HOR_PRED ]= pred4x4_horizontal_add_c; 1.940 + h->pred8x8l_add [VERT_PRED ]= pred8x8l_vertical_add_c; 1.941 + h->pred8x8l_add [ HOR_PRED ]= pred8x8l_horizontal_add_c; 1.942 + h->pred8x8_add [VERT_PRED8x8]= pred8x8_vertical_add_c; 1.943 + h->pred8x8_add [ HOR_PRED8x8]= pred8x8_horizontal_add_c; 1.944 + h->pred16x16_add[VERT_PRED8x8]= pred16x16_vertical_add_c; 1.945 + h->pred16x16_add[ HOR_PRED8x8]= pred16x16_horizontal_add_c; 1.946 + 1.947 + if (HAVE_NEON) ff_h264_pred_init_arm(h); 1.948 +}
