FFmpeg
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
dsputil_alpha.c
Go to the documentation of this file.
1 /*
2  * Alpha optimized DSP utils
3  * Copyright (c) 2002 Falk Hueffner <falk@debian.org>
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 #include "libavutil/attributes.h"
23 #include "libavcodec/dsputil.h"
24 #include "dsputil_alpha.h"
25 #include "asm.h"
26 
27 void (*put_pixels_clamped_axp_p)(const int16_t *block, uint8_t *pixels,
28  int line_size);
29 void (*add_pixels_clamped_axp_p)(const int16_t *block, uint8_t *pixels,
30  int line_size);
31 
32 #if 0
33 /* These functions were the base for the optimized assembler routines,
34  and remain here for documentation purposes. */
35 static void put_pixels_clamped_mvi(const int16_t *block, uint8_t *pixels,
36  ptrdiff_t line_size)
37 {
38  int i = 8;
39  uint64_t clampmask = zap(-1, 0xaa); /* 0x00ff00ff00ff00ff */
40 
41  do {
42  uint64_t shorts0, shorts1;
43 
44  shorts0 = ldq(block);
45  shorts0 = maxsw4(shorts0, 0);
46  shorts0 = minsw4(shorts0, clampmask);
47  stl(pkwb(shorts0), pixels);
48 
49  shorts1 = ldq(block + 4);
50  shorts1 = maxsw4(shorts1, 0);
51  shorts1 = minsw4(shorts1, clampmask);
52  stl(pkwb(shorts1), pixels + 4);
53 
54  pixels += line_size;
55  block += 8;
56  } while (--i);
57 }
58 
59 void add_pixels_clamped_mvi(const int16_t *block, uint8_t *pixels,
60  ptrdiff_t line_size)
61 {
62  int h = 8;
63  /* Keep this function a leaf function by generating the constants
64  manually (mainly for the hack value ;-). */
65  uint64_t clampmask = zap(-1, 0xaa); /* 0x00ff00ff00ff00ff */
66  uint64_t signmask = zap(-1, 0x33);
67  signmask ^= signmask >> 1; /* 0x8000800080008000 */
68 
69  do {
70  uint64_t shorts0, pix0, signs0;
71  uint64_t shorts1, pix1, signs1;
72 
73  shorts0 = ldq(block);
74  shorts1 = ldq(block + 4);
75 
76  pix0 = unpkbw(ldl(pixels));
77  /* Signed subword add (MMX paddw). */
78  signs0 = shorts0 & signmask;
79  shorts0 &= ~signmask;
80  shorts0 += pix0;
81  shorts0 ^= signs0;
82  /* Clamp. */
83  shorts0 = maxsw4(shorts0, 0);
84  shorts0 = minsw4(shorts0, clampmask);
85 
86  /* Next 4. */
87  pix1 = unpkbw(ldl(pixels + 4));
88  signs1 = shorts1 & signmask;
89  shorts1 &= ~signmask;
90  shorts1 += pix1;
91  shorts1 ^= signs1;
92  shorts1 = maxsw4(shorts1, 0);
93  shorts1 = minsw4(shorts1, clampmask);
94 
95  stl(pkwb(shorts0), pixels);
96  stl(pkwb(shorts1), pixels + 4);
97 
98  pixels += line_size;
99  block += 8;
100  } while (--h);
101 }
102 #endif
103 
104 static void clear_blocks_axp(int16_t *blocks) {
105  uint64_t *p = (uint64_t *) blocks;
106  int n = sizeof(int16_t) * 6 * 64;
107 
108  do {
109  p[0] = 0;
110  p[1] = 0;
111  p[2] = 0;
112  p[3] = 0;
113  p[4] = 0;
114  p[5] = 0;
115  p[6] = 0;
116  p[7] = 0;
117  p += 8;
118  n -= 8 * 8;
119  } while (n);
120 }
121 
123 {
124  const int high_bit_depth = avctx->bits_per_raw_sample > 8;
125 
126  if (!high_bit_depth) {
128  }
129 
130  /* amask clears all bits that correspond to present features. */
131  if (amask(AMASK_MVI) == 0) {
134 
135  if (!high_bit_depth)
138  c->sad[0] = pix_abs16x16_mvi_asm;
139  c->sad[1] = pix_abs8x8_mvi;
140  c->pix_abs[0][0] = pix_abs16x16_mvi_asm;
141  c->pix_abs[1][0] = pix_abs8x8_mvi;
142  c->pix_abs[0][1] = pix_abs16x16_x2_mvi;
143  c->pix_abs[0][2] = pix_abs16x16_y2_mvi;
144  c->pix_abs[0][3] = pix_abs16x16_xy2_mvi;
145  }
146 
149 
150  if (!avctx->lowres && avctx->bits_per_raw_sample <= 8 &&
151  (avctx->idct_algo == FF_IDCT_AUTO ||
152  avctx->idct_algo == FF_IDCT_SIMPLEALPHA)) {
156  }
157 }