]> gcc.gnu.org Git - gcc.git/blob - gcc/testsuite/gcc.target/aarch64/pr98772.c
AArch64: Undo vec_widen_<sur>shiftl optabs [PR106346]
[gcc.git] / gcc / testsuite / gcc.target / aarch64 / pr98772.c
1 /* { dg-do run } */
2 /* { dg-options "-O3 -save-temps" } */
3
4 #pragma GCC target "+nosve"
5
6 #include <stdint.h>
7 #include <string.h>
8
9 #define DSIZE 16
10 #define PIXSIZE 64
11
12 extern void
13 wplus (uint16_t *d, uint8_t *restrict pix1, uint8_t *restrict pix2 )
14 {
15 for (int y = 0; y < 4; y++ )
16 {
17 for (int x = 0; x < 4; x++ )
18 d[x + y*4] = pix1[x] + pix2[x];
19 pix1 += 16;
20 pix2 += 16;
21 }
22 }
23 extern void __attribute__((optimize (0)))
24 wplus_no_opt (uint16_t *d, uint8_t *restrict pix1, uint8_t *restrict pix2 )
25 {
26 for (int y = 0; y < 4; y++ )
27 {
28 for (int x = 0; x < 4; x++ )
29 d[x + y*4] = pix1[x] + pix2[x];
30 pix1 += 16;
31 pix2 += 16;
32 }
33 }
34
35 extern void
36 wminus (uint16_t *d, uint8_t *restrict pix1, uint8_t *restrict pix2 )
37 {
38 for (int y = 0; y < 4; y++ )
39 {
40 for (int x = 0; x < 4; x++ )
41 d[x + y*4] = pix1[x] - pix2[x];
42 pix1 += 16;
43 pix2 += 16;
44 }
45 }
46 extern void __attribute__((optimize (0)))
47 wminus_no_opt (uint16_t *d, uint8_t *restrict pix1, uint8_t *restrict pix2 )
48 {
49 for (int y = 0; y < 4; y++ )
50 {
51 for (int x = 0; x < 4; x++ )
52 d[x + y*4] = pix1[x] - pix2[x];
53 pix1 += 16;
54 pix2 += 16;
55 }
56 }
57
58 extern void
59 wmult (uint16_t *d, uint8_t *restrict pix1, uint8_t *restrict pix2 )
60 {
61 for (int y = 0; y < 4; y++ )
62 {
63 for (int x = 0; x < 4; x++ )
64 d[x + y*4] = pix1[x] * pix2[x];
65 pix1 += 16;
66 pix2 += 16;
67 }
68 }
69 extern void __attribute__((optimize (0)))
70 wmult_no_opt (uint16_t *d, uint8_t *restrict pix1, uint8_t *restrict pix2 )
71 {
72 for (int y = 0; y < 4; y++ )
73 {
74 for (int x = 0; x < 4; x++ )
75 d[x + y*4] = pix1[x] * pix2[x];
76 pix1 += 16;
77 pix2 += 16;
78 }
79 }
80
81 extern void
82 wlshift (uint16_t *d, uint8_t *restrict pix1)
83
84 {
85 for (int y = 0; y < 4; y++ )
86 {
87 for (int x = 0; x < 4; x++ )
88 d[x + y*4] = pix1[x] << 8;
89 pix1 += 16;
90 }
91 }
92 extern void __attribute__((optimize (0)))
93 wlshift_no_opt (uint16_t *d, uint8_t *restrict pix1)
94
95 {
96 for (int y = 0; y < 4; y++ )
97 {
98 for (int x = 0; x < 4; x++ )
99 d[x + y*4] = pix1[x] << 8;
100 pix1 += 16;
101 }
102 }
103
104 void __attribute__((optimize (0)))
105 init_arrays (uint16_t *d_a, uint16_t *d_b, uint8_t *pix1, uint8_t *pix2)
106 {
107 for (int i = 0; i < DSIZE; i++)
108 {
109 d_a[i] = (1074 * i)%17;
110 d_b[i] = (1074 * i)%17;
111 }
112 for (int i = 0; i < PIXSIZE; i++)
113 {
114 pix1[i] = (1024 * i)%17;
115 pix2[i] = (1024 * i)%17;
116 }
117 }
118
119 /* Don't optimize main so we don't get confused over where the vector
120 instructions are generated. */
121 __attribute__((optimize (0)))
122 int main ()
123 {
124 uint16_t d_a[DSIZE];
125 uint16_t d_b[DSIZE];
126 uint8_t pix1[PIXSIZE];
127 uint8_t pix2[PIXSIZE];
128
129 init_arrays (d_a, d_b, pix1, pix2);
130 wplus (d_a, pix1, pix2);
131 wplus_no_opt (d_b, pix1, pix2);
132 if (memcmp (d_a,d_b, DSIZE) != 0)
133 return 1;
134
135 init_arrays (d_a, d_b, pix1, pix2);
136 wminus (d_a, pix1, pix2);
137 wminus_no_opt (d_b, pix1, pix2);
138 if (memcmp (d_a,d_b, DSIZE) != 0)
139 return 2;
140
141 init_arrays (d_a, d_b, pix1, pix2);
142 wmult (d_a, pix1, pix2);
143 wmult_no_opt (d_b, pix1, pix2);
144 if (memcmp (d_a,d_b, DSIZE) != 0)
145 return 3;
146
147 init_arrays (d_a, d_b, pix1, pix2);
148 wlshift (d_a, pix1);
149 wlshift_no_opt (d_b, pix1);
150 if (memcmp (d_a,d_b, DSIZE) != 0)
151 return 4;
152
153 }
154
155 /* { dg-final { scan-assembler-times "uaddl\\tv" 2 } } */
156 /* { dg-final { scan-assembler-times "usubl\\tv" 2 } } */
157 /* { dg-final { scan-assembler-times "umull\\tv" 2 } } */
158 /* { dg-final { scan-assembler-times "shll\\tv" 2 } } */
This page took 0.042407 seconds and 5 git commands to generate.