]>
gcc.gnu.org Git - gcc.git/blob - gcc/testsuite/gcc.target/aarch64/pr98772.c
2 /* { dg-options "-O3 -save-temps" } */
4 #pragma GCC target "+nosve"
13 wplus (uint16_t *d
, uint8_t *restrict pix1
, uint8_t *restrict pix2
)
15 for (int y
= 0; y
< 4; y
++ )
17 for (int x
= 0; x
< 4; x
++ )
18 d
[x
+ y
*4] = pix1
[x
] + pix2
[x
];
23 extern void __attribute__((optimize (0)))
24 wplus_no_opt (uint16_t *d
, uint8_t *restrict pix1
, uint8_t *restrict pix2
)
26 for (int y
= 0; y
< 4; y
++ )
28 for (int x
= 0; x
< 4; x
++ )
29 d
[x
+ y
*4] = pix1
[x
] + pix2
[x
];
36 wminus (uint16_t *d
, uint8_t *restrict pix1
, uint8_t *restrict pix2
)
38 for (int y
= 0; y
< 4; y
++ )
40 for (int x
= 0; x
< 4; x
++ )
41 d
[x
+ y
*4] = pix1
[x
] - pix2
[x
];
46 extern void __attribute__((optimize (0)))
47 wminus_no_opt (uint16_t *d
, uint8_t *restrict pix1
, uint8_t *restrict pix2
)
49 for (int y
= 0; y
< 4; y
++ )
51 for (int x
= 0; x
< 4; x
++ )
52 d
[x
+ y
*4] = pix1
[x
] - pix2
[x
];
59 wmult (uint16_t *d
, uint8_t *restrict pix1
, uint8_t *restrict pix2
)
61 for (int y
= 0; y
< 4; y
++ )
63 for (int x
= 0; x
< 4; x
++ )
64 d
[x
+ y
*4] = pix1
[x
] * pix2
[x
];
69 extern void __attribute__((optimize (0)))
70 wmult_no_opt (uint16_t *d
, uint8_t *restrict pix1
, uint8_t *restrict pix2
)
72 for (int y
= 0; y
< 4; y
++ )
74 for (int x
= 0; x
< 4; x
++ )
75 d
[x
+ y
*4] = pix1
[x
] * pix2
[x
];
82 wlshift (uint16_t *d
, uint8_t *restrict pix1
)
85 for (int y
= 0; y
< 4; y
++ )
87 for (int x
= 0; x
< 4; x
++ )
88 d
[x
+ y
*4] = pix1
[x
] << 8;
92 extern void __attribute__((optimize (0)))
93 wlshift_no_opt (uint16_t *d
, uint8_t *restrict pix1
)
96 for (int y
= 0; y
< 4; y
++ )
98 for (int x
= 0; x
< 4; x
++ )
99 d
[x
+ y
*4] = pix1
[x
] << 8;
104 void __attribute__((optimize (0)))
105 init_arrays (uint16_t *d_a
, uint16_t *d_b
, uint8_t *pix1
, uint8_t *pix2
)
107 for (int i
= 0; i
< DSIZE
; i
++)
109 d_a
[i
] = (1074 * i
)%17;
110 d_b
[i
] = (1074 * i
)%17;
112 for (int i
= 0; i
< PIXSIZE
; i
++)
114 pix1
[i
] = (1024 * i
)%17;
115 pix2
[i
] = (1024 * i
)%17;
119 /* Don't optimize main so we don't get confused over where the vector
120 instructions are generated. */
121 __attribute__((optimize (0)))
126 uint8_t pix1
[PIXSIZE
];
127 uint8_t pix2
[PIXSIZE
];
129 init_arrays (d_a
, d_b
, pix1
, pix2
);
130 wplus (d_a
, pix1
, pix2
);
131 wplus_no_opt (d_b
, pix1
, pix2
);
132 if (memcmp (d_a
,d_b
, DSIZE
) != 0)
135 init_arrays (d_a
, d_b
, pix1
, pix2
);
136 wminus (d_a
, pix1
, pix2
);
137 wminus_no_opt (d_b
, pix1
, pix2
);
138 if (memcmp (d_a
,d_b
, DSIZE
) != 0)
141 init_arrays (d_a
, d_b
, pix1
, pix2
);
142 wmult (d_a
, pix1
, pix2
);
143 wmult_no_opt (d_b
, pix1
, pix2
);
144 if (memcmp (d_a
,d_b
, DSIZE
) != 0)
147 init_arrays (d_a
, d_b
, pix1
, pix2
);
149 wlshift_no_opt (d_b
, pix1
);
150 if (memcmp (d_a
,d_b
, DSIZE
) != 0)
155 /* { dg-final { scan-assembler-times "uaddl\\tv" 2 } } */
156 /* { dg-final { scan-assembler-times "usubl\\tv" 2 } } */
157 /* { dg-final { scan-assembler-times "umull\\tv" 2 } } */
158 /* { dg-final { scan-assembler-times "shll\\tv" 2 } } */
This page took 0.042407 seconds and 5 git commands to generate.