Created attachment 33710 [details] assembly When compile the code shown below using GCC 5.0 for ARM with the following options: -O2 -ftree-vectorize -march=armv7-a -mfpu=neon -funroll-loops --param=max-completely-peeled-insns=400 // The code: typedef struct { unsigned char map[256]; int i; } A, *AP; void* calloc(int, int); AP foo(int n) { AP b = calloc(1, sizeof(A)); int i; for (i = n; i < 256; i++) b->map[i] = i; return b; } A instruction vst1.64 {d0-d1}, [r2:64] is generated, which is an aligned store with 8 bytes alignment requirement. However this requirement cannot be satisfied as the loop is not peeled for alignment, and the start address on the array is unknown at compile time. I have attached the generated assembly code here.
The problem is in 116t.vect pass. ... 67 vector(16) unsigned char * vectp_b.15; 68 <var_decl 0x7f1361c15e10 vectp_b.15 69 type <pointer_type 0x7f1361b56e70 type <vector_type 0x7f1361b56540> 70 unsigned SI 71 size <integer_cst 0x7f1361c0beb8 constant 32> 72 unit size <integer_cst 0x7f1361c0bed0 constant 4> 73 align 32 symtab 0 alias set -1 canonical type 0x7f1361b56e70> 74 used unsigned ignored SI file t3.c line 8 col 8 75 size <integer_cst 0x7f1361c0beb8 constant 32> 76 unit size <integer_cst 0x7f1361c0bed0 constant 4> 77 align 32 context <function_decl 0x7f1361b40300 foo>> 78 vector(16) unsigned char * vectp_b.14; 79 <var_decl 0x7f1361c15d80 vectp_b.14 80 type <pointer_type 0x7f1361b56e70 type <vector_type 0x7f1361b56540> 81 unsigned SI 82 size <integer_cst 0x7f1361c0beb8 constant 32> 83 unit size <integer_cst 0x7f1361c0bed0 constant 4> 84 align 32 symtab 0 alias set -1 canonical type 0x7f1361b56e70> 85 used unsigned ignored SI file t3.c line 8 col 8 86 size <integer_cst 0x7f1361c0beb8 constant 32> 87 unit size <integer_cst 0x7f1361c0bed0 constant 4> 88 align 32 context <function_decl 0x7f1361b40300 foo>> ... 100 vector(16) unsigned char vect__7.12; 101 <var_decl 0x7f1361c15c60 vect__7.12 102 type <vector_type 0x7f1361b56540 type <integer_type 0x7f1361c0f3f0 unsigned char> 103 unsigned V16QI 104 size <integer_cst 0x7f1361c1d270 constant 128> 105 unit size <integer_cst 0x7f1361c1d288 constant 16> 106 align 64 symtab 0 alias set 0 canonical type 0x7f1361b56540 nunits 16 107 pointer_to_this <pointer_type 0x7f1361b56e70>> 108 used unsigned ignored V16QI file t3.c line 8 col 8 109 size <integer_cst 0x7f1361c1d270 constant 128> 110 unit size <integer_cst 0x7f1361c1d288 constant 16> 111 align 64 context <function_decl 0x7f1361b40300 foo>> ... 223 unsigned char _7; 224 unsigned int _11; 225 unsigned int _15; 226 unsigned int _16; 227 unsigned char _20; 228 unsigned int _23; 229 unsigned int _24; 230 int _32; 231 sizetype _48; 232 unsigned int ivtmp_56; 233 unsigned int ivtmp_57; 234 235 <bb 2>: 236 b_5 = calloc (1, 260); 237 if (n_6(D) <= 255) 238 goto <bb 4>; 239 else 240 goto <bb 3>; 241 242 <bb 3>: 243 return b_5; 244 245 <bb 4>: 246 _11 = (unsigned int) n_6(D); 247 niters.3_2 = 256 - _11; 248 _15 = niters.3_2 + 4294967280; 249 _16 = _15 >> 4; 250 bnd.4_10 = _16 + 1; 251 ratio_mult_vf.5_17 = bnd.4_10 << 4; 252 _23 = (unsigned int) n_6(D); 253 _24 = 255 - _23; 254 if (_24 <= 14) 255 goto <bb 10>; 256 else 257 goto <bb 5>; 258 259 <bb 5>: 260 stmp_var_.7_33 = n_6(D) + 1; 261 stmp_var_.7_34 = stmp_var_.7_33 + 1; 262 stmp_var_.7_35 = stmp_var_.7_34 + 1; 263 vect_cst_.8_36 = {n_6(D), stmp_var_.7_33, stmp_var_.7_34, stmp_var_.7_35}; 264 vect_cst_.9_37 = { 16, 16, 16, 16 }; 265 vect_cst_.11_40 = { 4, 4, 4, 4 }; 266 _48 = (sizetype) n_6(D); 267 vectp_b.15_47 = b_5 + _48; 268 vect_cst_.17_8 = { 1, 1, 1, 1 }; 269 270 <bb 6>: 271 # n_12 = PHI <n_6(D)(5), n_9(13)> 272 # vect_vec_iv_.10_38 = PHI <vect_cst_.8_36(5), vect_vec_iv_.10_39(13)> 273 # vectp_b.14_49 = PHI <vectp_b.15_47(5), vectp_b.14_50(13)> 274 # ivtmp_56 = PHI <0(5), ivtmp_57(13)> 275 vect_vec_iv_.10_39 = vect_vec_iv_.10_38 + vect_cst_.9_37; 276 vect_vec_iv_.10_41 = vect_vec_iv_.10_38 + vect_cst_.11_40; 277 vect_vec_iv_.10_42 = vect_vec_iv_.10_41 + vect_cst_.11_40; 278 vect_vec_iv_.10_43 = vect_vec_iv_.10_42 + vect_cst_.11_40; 279 vect__7.13_44 = VEC_PACK_TRUNC_EXPR <vect_vec_iv_.10_38, vect_vec_iv_.10_41>; 280 vect__7.13_45 = VEC_PACK_TRUNC_EXPR <vect_vec_iv_.10_42, vect_vec_iv_.10_43>; 281 vect__7.12_46 = VEC_PACK_TRUNC_EXPR <vect__7.13_44, vect__7.13_45>; 282 _7 = (unsigned char) n_12; 283 MEM[(unsigned char[256] *)vectp_b.14_49] = vect__7.12_46; 284 vect_n_9.16_52 = vect_vec_iv_.10_38 + vect_cst_.17_8; 285 vect_n_9.16_53 = vect_vec_iv_.10_41 + vect_cst_.17_8; 286 vect_n_9.16_54 = vect_vec_iv_.10_42 + vect_cst_.17_8; 287 vect_n_9.16_55 = vect_vec_iv_.10_43 + vect_cst_.17_8; 288 n_9 = n_12 + 1; 289 vectp_b.14_50 = vectp_b.14_49 + 16; 290 ivtmp_57 = ivtmp_56 + 1; 291 if (ivtmp_57 < bnd.4_10) 292 goto <bb 13>; 293 else 294 goto <bb 9>; 295 296 <bb 7>: 297 # n_18 = PHI <n_22(8), n_25(10)> 298 _20 = (unsigned char) n_18; 299 b_5->map[n_18] = _20; 300 n_22 = n_18 + 1; 301 if (n_22 <= 255) 302 goto <bb 8>; 303 else 304 goto <bb 11>; 305 306 <bb 8>: 307 goto <bb 7>; 308 309 <bb 9>: 310 # n_26 = PHI <n_9(6)> 311 _32 = (int) ratio_mult_vf.5_17; 312 tmp.6_31 = n_6(D) + _32; 313 if (niters.3_2 == ratio_mult_vf.5_17) 314 goto <bb 12>; 315 else 316 goto <bb 10>; 317 318 <bb 10>: 319 # n_25 = PHI <tmp.6_31(9), n_6(D)(4)> 320 goto <bb 7>; 321 322 <bb 11>: 323 324 <bb 12>: 325 goto <bb 3>; 326 327 <bb 13>: 328 goto <bb 6>; 329 330 } vectp_b is assigned a value a line 267, 267 vectp_b.15_47 = b_5 + _48; _48 is a passed in parameter, so the pointer value of vectp_b should be unaligned. But the declaration of vectp_b is a pointer to type <vector_type 0x7f1361b56540>, that is 64bit aligned at line 106. So the backend generates a 64bit aligned memory access instruction accordingly.
This issue can also be reproduced on x86_64. Compile the following code with options (assume the file name is t.c): -O2 -ftree-vectorize t.c -fdump-tree-all-alias #include <stdlib.h> typedef struct { unsigned char map[256]; int i; } A, *AP; AP foo(int n) { AP b = malloc(sizeof(A)); int i; for (i = n; i < 256; i++) b->map[i] = i; return b; } The from t.c.116t.vect we can find such a statement: # ALIGN = 8, MISALIGN = 0 vectp_b.15_47 = b_5 + _48; Here b_5 is obtained from malloc which can be 8 bytes aligned, but _48 is from input parameter n, and the alignment of vectp_b.15_47 should be unknown instead of 8 here. I suspect the ptr_info_def object of vectp_b.15_47 is just copied from that of b_5, which is incorrect.
It turns out that when function vect_create_addr_base_for_vector_ref create a new pointer, it doesn't set the correct alignment of pointer value, so the default alignment of the point_to type is used. We should use the alignment information from DR_MISALIGNMENT (dr). I'm testing following patch Index: tree-vect-data-refs.c =================================================================== --- tree-vect-data-refs.c (revision 216341) +++ tree-vect-data-refs.c (working copy) @@ -3957,8 +3957,12 @@ && TREE_CODE (addr_base) == SSA_NAME) { duplicate_ssa_name_ptr_info (addr_base, DR_PTR_INFO (dr)); - if (offset) + unsigned int align = TYPE_ALIGN_UNIT (STMT_VINFO_VECTYPE (stmt_info)); + unsigned misalign = DR_MISALIGNMENT (dr); + if (offset || (misalign == -1)) mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (addr_base)); + else if (misalign) + set_ptr_info_alignment (SSA_NAME_PTR_INFO (addr_base), align, misalign); } if (dump_enabled_p ())
Author: carrot Date: Wed Oct 22 15:56:59 2014 New Revision: 216562 URL: https://gcc.gnu.org/viewcvs?rev=216562&root=gcc&view=rev Log: PR tree-optimization/63530 tree-vect-data-refs.c (vect_create_addr_base_for_vector_ref): Set pointer alignment according to DR_MISALIGNMENT. gcc.dg/vect/pr63530.c: New testcase. Added: trunk/gcc/testsuite/gcc.dg/vect/pr63530.c Modified: trunk/gcc/ChangeLog trunk/gcc/testsuite/ChangeLog trunk/gcc/tree-vect-data-refs.c
Author: carrot Date: Tue Oct 28 00:20:13 2014 New Revision: 216770 URL: https://gcc.gnu.org/viewcvs?rev=216770&root=gcc&view=rev Log: PR tree-optimization/63530 tree-vect-data-refs.c (vect_create_addr_base_for_vector_ref): Set pointer alignment according to DR_MISALIGNMENT. gcc.dg/vect/pr63530.c: New test. Added: branches/gcc-4_9-branch/gcc/testsuite/gcc.dg/vect/pr63530.c Modified: branches/gcc-4_9-branch/gcc/ChangeLog branches/gcc-4_9-branch/gcc/testsuite/ChangeLog branches/gcc-4_9-branch/gcc/tree-vect-data-refs.c
Fixed then ?
(In reply to Ramana Radhakrishnan from comment #6) > Fixed then ? Yes, thanks for closing it.