Mercurial > cgi-bin > hgwebdir.cgi > PR > Applications > VSs > VSs__H264__App
diff libavcodec/arm/dsputil_iwmmxt.c @ 2:897f711a7157
rearrange to work with autoconf
| author | Nina Engelhardt <nengel@mailbox.tu-berlin.de> |
|---|---|
| date | Tue, 25 Sep 2012 15:55:33 +0200 |
| parents | |
| children |
line diff
1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/libavcodec/arm/dsputil_iwmmxt.c Tue Sep 25 15:55:33 2012 +0200 1.3 @@ -0,0 +1,205 @@ 1.4 +/* 1.5 + * iWMMXt optimized DSP utils 1.6 + * Copyright (c) 2004 AGAWA Koji 1.7 + * 1.8 + * This file is part of FFmpeg. 1.9 + * 1.10 + * FFmpeg is free software; you can redistribute it and/or 1.11 + * modify it under the terms of the GNU Lesser General Public 1.12 + * License as published by the Free Software Foundation; either 1.13 + * version 2.1 of the License, or (at your option) any later version. 1.14 + * 1.15 + * FFmpeg is distributed in the hope that it will be useful, 1.16 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 1.17 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 1.18 + * Lesser General Public License for more details. 1.19 + * 1.20 + * You should have received a copy of the GNU Lesser General Public 1.21 + * License along with FFmpeg; if not, write to the Free Software 1.22 + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 1.23 + */ 1.24 + 1.25 +#include "libavcodec/dsputil.h" 1.26 + 1.27 +#define DEF(x, y) x ## _no_rnd_ ## y ##_iwmmxt 1.28 +#define SET_RND(regd) __asm__ volatile ("mov r12, #1 \n\t tbcsth " #regd ", r12":::"r12"); 1.29 +#define WAVG2B "wavg2b" 1.30 +#include "dsputil_iwmmxt_rnd_template.c" 1.31 +#undef DEF 1.32 +#undef SET_RND 1.33 +#undef WAVG2B 1.34 + 1.35 +#define DEF(x, y) x ## _ ## y ##_iwmmxt 1.36 +#define SET_RND(regd) __asm__ volatile ("mov r12, #2 \n\t tbcsth " #regd ", r12":::"r12"); 1.37 +#define WAVG2B "wavg2br" 1.38 +#include "dsputil_iwmmxt_rnd_template.c" 1.39 +#undef DEF 1.40 +#undef SET_RND 1.41 +#undef WAVG2BR 1.42 + 1.43 +// need scheduling 1.44 +#define OP(AVG) \ 1.45 + __asm__ volatile ( \ 1.46 + /* alignment */ \ 1.47 + "and r12, %[pixels], #7 \n\t" \ 1.48 + "bic %[pixels], %[pixels], #7 \n\t" \ 1.49 + "tmcr wcgr1, r12 \n\t" \ 1.50 + \ 1.51 + "wldrd wr0, [%[pixels]] \n\t" \ 1.52 + "wldrd wr1, [%[pixels], #8] \n\t" \ 1.53 + "add %[pixels], %[pixels], %[line_size] \n\t" \ 1.54 + "walignr1 wr4, wr0, wr1 \n\t" \ 1.55 + \ 1.56 + "1: \n\t" \ 1.57 + \ 1.58 + "wldrd wr2, [%[pixels]] \n\t" \ 1.59 + "wldrd wr3, [%[pixels], #8] \n\t" \ 1.60 + "add %[pixels], %[pixels], %[line_size] \n\t" \ 1.61 + "pld [%[pixels]] \n\t" \ 1.62 + "walignr1 wr5, wr2, wr3 \n\t" \ 1.63 + AVG " wr6, wr4, wr5 \n\t" \ 1.64 + "wstrd wr6, [%[block]] \n\t" \ 1.65 + "add %[block], %[block], %[line_size] \n\t" \ 1.66 + \ 1.67 + "wldrd wr0, [%[pixels]] \n\t" \ 1.68 + "wldrd wr1, [%[pixels], #8] \n\t" \ 1.69 + "add %[pixels], %[pixels], %[line_size] \n\t" \ 1.70 + "walignr1 wr4, wr0, wr1 \n\t" \ 1.71 + "pld [%[pixels]] \n\t" \ 1.72 + AVG " wr6, wr4, wr5 \n\t" \ 1.73 + "wstrd wr6, [%[block]] \n\t" \ 1.74 + "add %[block], %[block], %[line_size] \n\t" \ 1.75 + \ 1.76 + "subs %[h], %[h], #2 \n\t" \ 1.77 + "bne 1b \n\t" \ 1.78 + : [block]"+r"(block), [pixels]"+r"(pixels), [h]"+r"(h) \ 1.79 + : [line_size]"r"(line_size) \ 1.80 + : "memory", "r12"); 1.81 +void put_pixels8_y2_iwmmxt(uint8_t *block, const uint8_t *pixels, const int line_size, int h) 1.82 +{ 1.83 + OP("wavg2br"); 1.84 +} 1.85 +void put_no_rnd_pixels8_y2_iwmmxt(uint8_t *block, const uint8_t *pixels, const int line_size, int h) 1.86 +{ 1.87 + OP("wavg2b"); 1.88 +} 1.89 +#undef OP 1.90 + 1.91 +void add_pixels_clamped_iwmmxt(const DCTELEM *block, uint8_t *pixels, int line_size) 1.92 +{ 1.93 + uint8_t *pixels2 = pixels + line_size; 1.94 + 1.95 + __asm__ volatile ( 1.96 + "mov r12, #4 \n\t" 1.97 + "1: \n\t" 1.98 + "pld [%[pixels], %[line_size2]] \n\t" 1.99 + "pld [%[pixels2], %[line_size2]] \n\t" 1.100 + "wldrd wr4, [%[pixels]] \n\t" 1.101 + "wldrd wr5, [%[pixels2]] \n\t" 1.102 + "pld [%[block], #32] \n\t" 1.103 + "wunpckelub wr6, wr4 \n\t" 1.104 + "wldrd wr0, [%[block]] \n\t" 1.105 + "wunpckehub wr7, wr4 \n\t" 1.106 + "wldrd wr1, [%[block], #8] \n\t" 1.107 + "wunpckelub wr8, wr5 \n\t" 1.108 + "wldrd wr2, [%[block], #16] \n\t" 1.109 + "wunpckehub wr9, wr5 \n\t" 1.110 + "wldrd wr3, [%[block], #24] \n\t" 1.111 + "add %[block], %[block], #32 \n\t" 1.112 + "waddhss wr10, wr0, wr6 \n\t" 1.113 + "waddhss wr11, wr1, wr7 \n\t" 1.114 + "waddhss wr12, wr2, wr8 \n\t" 1.115 + "waddhss wr13, wr3, wr9 \n\t" 1.116 + "wpackhus wr14, wr10, wr11 \n\t" 1.117 + "wpackhus wr15, wr12, wr13 \n\t" 1.118 + "wstrd wr14, [%[pixels]] \n\t" 1.119 + "add %[pixels], %[pixels], %[line_size2] \n\t" 1.120 + "subs r12, r12, #1 \n\t" 1.121 + "wstrd wr15, [%[pixels2]] \n\t" 1.122 + "add %[pixels2], %[pixels2], %[line_size2] \n\t" 1.123 + "bne 1b \n\t" 1.124 + : [block]"+r"(block), [pixels]"+r"(pixels), [pixels2]"+r"(pixels2) 1.125 + : [line_size2]"r"(line_size << 1) 1.126 + : "cc", "memory", "r12"); 1.127 +} 1.128 + 1.129 +static void clear_blocks_iwmmxt(DCTELEM *blocks) 1.130 +{ 1.131 + __asm__ volatile( 1.132 + "wzero wr0 \n\t" 1.133 + "mov r1, #(128 * 6 / 32) \n\t" 1.134 + "1: \n\t" 1.135 + "wstrd wr0, [%0] \n\t" 1.136 + "wstrd wr0, [%0, #8] \n\t" 1.137 + "wstrd wr0, [%0, #16] \n\t" 1.138 + "wstrd wr0, [%0, #24] \n\t" 1.139 + "subs r1, r1, #1 \n\t" 1.140 + "add %0, %0, #32 \n\t" 1.141 + "bne 1b \n\t" 1.142 + : "+r"(blocks) 1.143 + : 1.144 + : "r1" 1.145 + ); 1.146 +} 1.147 + 1.148 +static void nop(uint8_t *block, const uint8_t *pixels, int line_size, int h) 1.149 +{ 1.150 + return; 1.151 +} 1.152 + 1.153 +/* A run time test is not simple. If this file is compiled in 1.154 + * then we should install the functions 1.155 + */ 1.156 +int mm_flags = FF_MM_IWMMXT; /* multimedia extension flags */ 1.157 + 1.158 +void ff_dsputil_init_iwmmxt(DSPContext* c, AVCodecContext *avctx) 1.159 +{ 1.160 + if (avctx->dsp_mask) { 1.161 + if (avctx->dsp_mask & FF_MM_FORCE) 1.162 + mm_flags |= (avctx->dsp_mask & 0xffff); 1.163 + else 1.164 + mm_flags &= ~(avctx->dsp_mask & 0xffff); 1.165 + } 1.166 + 1.167 + if (!(mm_flags & FF_MM_IWMMXT)) return; 1.168 + 1.169 + c->add_pixels_clamped = add_pixels_clamped_iwmmxt; 1.170 + 1.171 + c->clear_blocks = clear_blocks_iwmmxt; 1.172 + 1.173 + c->put_pixels_tab[0][0] = put_pixels16_iwmmxt; 1.174 + c->put_pixels_tab[0][1] = put_pixels16_x2_iwmmxt; 1.175 + c->put_pixels_tab[0][2] = put_pixels16_y2_iwmmxt; 1.176 + c->put_pixels_tab[0][3] = put_pixels16_xy2_iwmmxt; 1.177 + c->put_no_rnd_pixels_tab[0][0] = put_pixels16_iwmmxt; 1.178 + c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_iwmmxt; 1.179 + c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_iwmmxt; 1.180 + c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_iwmmxt; 1.181 + 1.182 + c->put_pixels_tab[1][0] = put_pixels8_iwmmxt; 1.183 + c->put_pixels_tab[1][1] = put_pixels8_x2_iwmmxt; 1.184 + c->put_pixels_tab[1][2] = put_pixels8_y2_iwmmxt; 1.185 + c->put_pixels_tab[1][3] = put_pixels8_xy2_iwmmxt; 1.186 + c->put_no_rnd_pixels_tab[1][0] = put_pixels8_iwmmxt; 1.187 + c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_iwmmxt; 1.188 + c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_iwmmxt; 1.189 + c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_iwmmxt; 1.190 + 1.191 + c->avg_pixels_tab[0][0] = avg_pixels16_iwmmxt; 1.192 + c->avg_pixels_tab[0][1] = avg_pixels16_x2_iwmmxt; 1.193 + c->avg_pixels_tab[0][2] = avg_pixels16_y2_iwmmxt; 1.194 + c->avg_pixels_tab[0][3] = avg_pixels16_xy2_iwmmxt; 1.195 + c->avg_no_rnd_pixels_tab[0][0] = avg_pixels16_iwmmxt; 1.196 + c->avg_no_rnd_pixels_tab[0][1] = avg_no_rnd_pixels16_x2_iwmmxt; 1.197 + c->avg_no_rnd_pixels_tab[0][2] = avg_no_rnd_pixels16_y2_iwmmxt; 1.198 + c->avg_no_rnd_pixels_tab[0][3] = avg_no_rnd_pixels16_xy2_iwmmxt; 1.199 + 1.200 + c->avg_pixels_tab[1][0] = avg_pixels8_iwmmxt; 1.201 + c->avg_pixels_tab[1][1] = avg_pixels8_x2_iwmmxt; 1.202 + c->avg_pixels_tab[1][2] = avg_pixels8_y2_iwmmxt; 1.203 + c->avg_pixels_tab[1][3] = avg_pixels8_xy2_iwmmxt; 1.204 + c->avg_no_rnd_pixels_tab[1][0] = avg_no_rnd_pixels8_iwmmxt; 1.205 + c->avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels8_x2_iwmmxt; 1.206 + c->avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels8_y2_iwmmxt; 1.207 + c->avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels8_xy2_iwmmxt; 1.208 +}
