Following testcase is miscompiled e.g. with -g -Og -fno-strict-aliasing starting with r0-92313-g5006671f1aaa63cd - this is a self-contained testcase from https://gcc.gnu.org/pipermail/gcc-help/2021-December/141021.html With current trunk and -g -Og -fno-strict-aliasing (-Og chosen as something that does just a few optimizations), I see on current gcc trunk the fre1 pass optimizing: - _69 = start.prev; # DEBUG list_ => NULL - last_49 = _69 + 18446744073709551552; - # DEBUG last => last_49 + # DEBUG last => &MEM <struct ovs_list> [(void *)&start + -64B] # DEBUG BEGIN_STMT - printf ("first: %p \nlast: %p\n", first_47, last_49); + printf ("first: %p \nlast: %p\n", first_47, &MEM <struct ovs_list> [(void *)&start + -64B]); We have earlier: start.prev = &start; start.next = &start; and .prev stores in between are: MEM[(struct ovs_list *)member_59 + 64B].prev = _48; ... MEM[(struct ovs_list *)pos_32 + 64B].prev = _15; I bet the alias oracle assumes that pos_32, being an struct member pointer, can't overwrite start.prev where start is much smaller than that (has just struct ovs_list type). That MEM[(struct ovs_list *)pos_32 + 64B].prev = _15; is actually what overwrites start.prev. # pos_32 = PHI <pos_61(8), pos_62(10)> and _6 = start.next; _7 = (long unsigned int) _6; _8 = _7 + 18446744073709551552; pos_61 = (struct member *) _8; and _11 = pos_32->elem.next; _12 = (long unsigned int) _11; _13 = _12 + 18446744073709551552; pos_62 = (struct member *) _13; If it wouldn't use uintptr_t in there, I'd say it is clearly UB, doing pointer arithmetics out of bounds of the start object. With uintptr_t it just materializes a pointer known to point outside of the start object. For -fstrict-aliasing, I think it is just fine to treat it as UB, for -fno-strict-aliasing I don't know. I'm afraid Linux kernel and various other projects that copied such questionable code from it use it heavily. /* How to build: >> gcc -O2 -g -o example2 example2.c ^C >> ./example2 start: 0x7ffc13ba2800 first: 0x1ba32f0 last: 0x7ffc13ba27c0 list is broken! Start: 0x7ffc13ba2800. start->next: 0x1ba3330, start->next->next: 0x1ba3390, start->prev: 0x1ba3390 >> gcc -g -o example2 example2.c >> ./example2 start: 0x7ffd84d91660 first: 0x23b52f0 last: 0x23b5350 Same for clang. */ #include <stddef.h> #include <stdint.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <unistd.h> ////////////////// from include/openvswitch/util.h ////////////////// #define INIT_CONTAINER(OBJECT, POINTER, MEMBER) \ ((OBJECT) = NULL, ASSIGN_CONTAINER(OBJECT, POINTER, MEMBER)) #define OVS_TYPEOF(OBJECT) typeof(OBJECT) #define OBJECT_OFFSETOF(OBJECT, MEMBER) offsetof(typeof(*(OBJECT)), MEMBER) #define OBJECT_CONTAINING(POINTER, OBJECT, MEMBER) \ ((OVS_TYPEOF(OBJECT))( \ void*)((char*)(POINTER)-OBJECT_OFFSETOF(OBJECT, MEMBER))) #define OBJECT_MEMBER(POINTER, OBJECT, MEMBER) \ ((OVS_TYPEOF(&POINTER->MEMBER)) \ ((uintptr_t) POINTER + OBJECT_OFFSETOF(POINTER, MEMBER))) #define ASSIGN_CONTAINER(OBJECT, POINTER, MEMBER) \ ((OBJECT) = OBJECT_CONTAINING(POINTER, OBJECT, MEMBER), (void)0) #define HMAP_FOR_EACH(NODE, MEMBER, HMAP) \ HMAP_FOR_EACH_INIT(NODE, MEMBER, HMAP, (void)0) #define HMAP_FOR_EACH_INIT(NODE, MEMBER, HMAP, ...) \ for (INIT_CONTAINER(NODE, hmap_first(HMAP), MEMBER), __VA_ARGS__; \ (NODE != OBJECT_CONTAINING(NULL, NODE, MEMBER)) || \ ((NODE = NULL), 0); \ ASSIGN_CONTAINER(NODE, hmap_next(HMAP, &(NODE)->MEMBER), MEMBER)) #define LIST_FOR_EACH(ITER, MEMBER, LIST) \ for (INIT_CONTAINER(ITER, (LIST)->next, MEMBER); \ &(ITER)->MEMBER != (LIST); \ ASSIGN_CONTAINER(ITER, (ITER)->MEMBER.next, MEMBER)) ////////////// from lib/list.c / include/openvswitch/list.h ///////////////// struct ovs_list { struct ovs_list* prev; /* Previous list element. */ struct ovs_list* next; /* Next list element. */ }; static inline void ovs_list_init(struct ovs_list* list) { list->next = list->prev = list; } static inline void ovs_list_insert(struct ovs_list* before, struct ovs_list* elem) { elem->prev = before->prev; elem->next = before; before->prev->next = elem; before->prev = elem; } #define CONTAINER_OF(POINTER, STRUCT, MEMBER) \ ((STRUCT*)(void*)((char*)(POINTER)-offsetof(STRUCT, MEMBER))) #define CONST_CAST(TYPE, POINTER) ((TYPE)(POINTER)) static inline struct ovs_list* ovs_list_front(const struct ovs_list* list_) { struct ovs_list* list = CONST_CAST(struct ovs_list*, list_); // ovs_assert(!ovs_list_is_empty(list)); return list->next; } /* Returns the back element in 'list_'. Undefined behavior if 'list_' is empty. */ static inline struct ovs_list* ovs_list_back(const struct ovs_list* list_) { struct ovs_list* list = CONST_CAST(struct ovs_list*, list_); // ovs_assert(!ovs_list_is_empty(list)); return list->prev; } //////////////////////////////////////////////////////////////////////////////// struct member { int padding[14]; int order; struct ovs_list elem; }; int main() { int i, ret = 0; struct member *member, *members[2]; /* TESTED: If i create the struct ovs_list in the heap and change the rest of the program to use start instead of &start, the problem disappears struct ovs_list *start = malloc(sizeof (struct ovs_list)); if (start) { memset(start, 0, sizeof (struct bond)); } TESTED: If I create an entire struct member and I use it's embedded ovs_list (ensuring we have enough stack space so that any pointer arithmetics will always fall into addressable area), the problem dissapears struct member sstart = {}; struct ovs_list *start = &sstart.elem; */ struct ovs_list start = {0}; // Boilerplate initialization for (i = 0; i < 2; i++) { members[i] = malloc(sizeof(struct member)); if (members[i]) { memset(members[i], 0, sizeof(struct member)); } } // Set arbitrary order members[0]->order = 3; members[1]->order = 2; printf("start: %p\n", &start); ovs_list_init(&start); /* Original code. HMAP_FOR_EACH (member, hmap_node, &bond->members) { struct member *pos; LIST_FOR_EACH (pos, elem, &start) { if (member->order > pos->order) { break; } } //printf("Inserting member: %p\n", member); ovs_list_insert(&pos->elem, &member->elem); } What follows is the same code with the macros expanded. */ for (i = 0; i < 2; i++) { struct member* pos; member = members[i]; /* LIST_FOR_EACH (pos, elem, &start) { * 'char *' casts replaced with 'uintptr_t' to make clang also fail. */ for (((pos) = ((void*)0), ((pos) = ((typeof(pos))(void*)((uintptr_t)((&start)->next) - __builtin_offsetof(struct member, elem))))); &(pos)->elem != (&start); // Alternative ways to check: //(uintptr_t) pos + __builtin_offsetof(struct member, elem) != (uintptr_t)(&start); //OBJECT_MEMBER(pos, struct member, elem) != (&start); ((pos) = ((typeof(pos))(void*)((uintptr_t)((pos)->elem.next) - __builtin_offsetof(struct member, elem))))) { if (member->order > pos->order) { break; } } // TESTED: If I add this printf, the problem disappears //printf("pos: %p\n", pos); // Original version of the code. Doesn't work: ovs_list_insert(&pos->elem, &member->elem); // TESTED: This works: //ovs_list_insert((void *)((uintptr_t)pos + __builtin_offsetof(struct member,elem)), &member->elem); // TESTED: This doesn't work: //ovs_list_insert((void *)((char * )pos + __builtin_offsetof(struct member,elem)), &member->elem); // TESTED: This also work: //ovs_list_insert((void *)((uintptr_t)pos + 64), &member->elem); // TESTED: This doesn't: //ovs_list_insert((void *)((char * )pos + 64), &member->elem); // Just a prettier wrapper for __builtin_offsetof math. Works: //ovs_list_insert(OBJECT_MEMBER(pos, struct member, elem), &member->elem); } struct member* first = CONTAINER_OF(ovs_list_front(&start), struct member, elem); struct member* last = CONTAINER_OF(ovs_list_back(&start), struct member, elem); printf("first: %p \nlast: %p\n", first, last); /* I've inserted two members into the 'start' list. * first and last have to be either member1 or member2 * */ if ((first != members[0] && first != members[1]) || (last != members[0] && last != members[1])) { printf("list is broken!\n"); printf("Start: %p. start->next: %p, start->next->next: %p, " "start->prev: %p\n", &start, start.next, start.next->next, start.prev); ret = 1; goto out; } out: // cleanup free(members[0]); free(members[1]); return ret; }
The kernel version of the macro is called list_for_each_entry. There's a Stackoverflow question about this issue: Does Linux kernel list implementation cause UB? https://stackoverflow.com/questions/64859526/does-linux-kernel-list-implementation-cause-ub
*** Bug 103965 has been marked as a duplicate of this bug. ***
Yes, the oracle assumes that for MEM[(struct ovs_list *)pos_32 + 64B] pos_32 needs to still point to some valid object (even if it's not of type ovs_list) and a pointer to 'start' cannot be constructed from this without invoking UB (and the pointer offsetting rules are not part of TBAA). The MEMs appear in forwprop: <bb 12> : _15 = &member_59->elem; _16 = &pos_32->elem; - _48 = _16->prev; - _15->prev = _48; - _15->next = _16; + _48 = MEM[(struct ovs_list *)pos_32 + 64B].prev; + MEM[(struct ovs_list *)member_59 + 64B].prev = _48; + MEM[(struct ovs_list *)member_59 + 64B].next = _16; but there it's already pointer arithmetic. In .original we have pos = 0B;, pos = (struct member *) ((long unsigned int) start.next + 18446744073709551552);; goto <D.3776>; <D.3775>:; if (member->order > pos->order) { goto <D.3773>; } pos = (struct member *) ((long unsigned int) pos->elem.next + 18446744073709551552); <D.3776>:; if (&pos->elem != &start) goto <D.3775>; else goto <D.3773>; <D.3773>:; ovs_list_insert (&pos->elem, &member->elem); where I think passing &pos->elem and &member->elem to ovs_list_insert is already wrong since 'pos' doesn't point to a valid object if the ultimate written destination is 'start'. Doing the 'pos' initialization with uintptr_t isn't enough - you need to do this all the way up to the &pos->elem computation as you say: // TESTED: This works: //ovs_list_insert((void *)((uintptr_t)pos + __builtin_offsetof(struct member,elem)), &member->elem); so yes, it's UB. And UB that's not sanctioned with -fno-strict-aliasing.
And for the curious it's indeed static bool indirect_ref_may_alias_decl_p (tree ref1... ... /* If only one reference is based on a variable, they cannot alias if the pointer access is beyond the extent of the variable access. (the pointer base cannot validly point to an offset less than zero of the variable). ??? IVOPTs creates bases that do not honor this restriction, so do not apply this optimization for TARGET_MEM_REFs. */ if (TREE_CODE (base1) != TARGET_MEM_REF && !ranges_maybe_overlap_p (offset1 + moff, -1, offset2, max_size2)) return false; the IVOPTs reference is likely due to the fact that while IVOPTs uses uintptrs to create the base pointer the TARGET_MEM_REF contained arithmetic itself is still considered pointer arithmetic (like also here the embedded MEM_REF pointer offsetting) and the base "pointer" cannot be a non-pointer to disable that behavior. Not a bug btw. There's no flag to make violating the C pointer offsetting rules well-defined.
(In reply to Richard Biener from comment #4) > the IVOPTs reference is likely due to the fact that while IVOPTs uses > uintptrs to create the base pointer the TARGET_MEM_REF contained arithmetic > itself is still considered pointer arithmetic (like also here the embedded > MEM_REF pointer offsetting) and the base "pointer" cannot be a non-pointer > to disable that behavior. Does this mean that this is UB and the GCC itself relies on a certain result of it? Shouldn't this mean that this should not be an UB in the end, so other users can rely on that behavior as it is beneficial for certain code patterns? Or at least be configurable? Is that correct that dynamically allocated memory is never a subject for this kind of a pointer check, i.e. we can safely offset pointers to the dynamically allocated memory? If so, can we be sure that this check will always be there for dynamically allocated memory, so we can avoid unnecessary fixes for this kind of use cases? (Allocating ovs_list dynamically indeed fixes our test case.) We have a bit less than a million lines of code heavily using discussed data structures and without automated ways to find the problematic places it will be not an easy job to fix (UBsan doesn't flag anything and I doubt someone will actually sit and re-check the whole code base manually). (In reply to Richard Biener from comment #3) > where I think passing &pos->elem and &member->elem to ovs_list_insert is > already wrong since 'pos' doesn't point to a valid object if the > ultimate written destination is 'start'. > Doing the 'pos' initialization with uintptr_t isn't enough - you need to > do this all the way up to the &pos->elem computation Maybe there is a way to not treat a &pos->elem as a pointer arithmetic? Maybe there should be one? I mean, compilers allows users to perform computations with offsets of structure fields where the base pointer is NULL, and NULL obviously doesn't point to any valid object. I'm not sure if it's an UB or not, but constructions like '&((struct s *)NULL)->field' are very common.
(In reply to Ilya Maximets from comment #5) > (In reply to Richard Biener from comment #4) > > the IVOPTs reference is likely due to the fact that while IVOPTs uses > > uintptrs to create the base pointer the TARGET_MEM_REF contained arithmetic > > itself is still considered pointer arithmetic (like also here the embedded > > MEM_REF pointer offsetting) and the base "pointer" cannot be a non-pointer > > to disable that behavior. > > Does this mean that this is UB and the GCC itself relies on a certain result > of it? If GCC through optimizations introduces UB in a code which wasn't there in the user's code, then it would be a GCC bug and something the compiler needs to fix. > Maybe there is a way to not treat a &pos->elem as a pointer arithmetic? > Maybe there should be one? I mean, compilers allows users to perform > computations with offsets of structure fields where the base pointer > is NULL, and NULL obviously doesn't point to any valid object. I'm not > sure if it's an UB or not, but constructions like '&((struct s > *)NULL)->field' > are very common. &((struct s *)NULL)->field is not valid in C or C++, but for many years the offsetof macro which is valid in those has been defined like that and various projects occassionally still use the above, so GCC supports those as an extension (poor man's offsetof). See e.g. spots with comments like Cope with user tricks that amount to offsetof. etc. in GCC sources. That doesn't change anything about this case, the poor man's offsetof is folded into a constant very early (well, with variable offsets in array refs in there could also into an expression, but still integral expression, the pointer arithmetics is gone from there). What is the reason why OVS (and kernel) doesn't use 2 variables, one for the iterator that is a pointer to the prev/next structure only and one assigned e.g. in the condition from the iterator that is used only when it isn't the start? At least if targetting C99 and above (or C++) one can declare one of those iterators in the for loop init expression...
(In reply to Jakub Jelinek from comment #6) > What is the reason why OVS (and kernel) doesn't use 2 variables, one for the > iterator that is a pointer to the prev/next structure only and one assigned > e.g. in the condition from the iterator that is used only when it isn't the > start? > At least if targetting C99 and above (or C++) one can declare one of those > iterators in the for loop init expression... The problem is that we need 2 variables and one of them need to be accessible outside of the loop. And I don't think it is possible to declare one variable and only initialize another one. So, we'll have to ask users of the FOR_EACH macro to declare an extra dummy variable, AFAICT. And we'll also need to have an extra check outside of the loop before calling ovs_list_insert in our example. So, basically, both variables has to be accessible outside of the loop. Or do you know how to avoid this? One thing that is not clear to me is if the following code has an UB or not: struct member* pos; struct ovs_list start; pos = (struct member *)(void*)((uintptr_t)(&start) - 64); ovs_list_insert((void *)((uintptr_t)pos + 64), &member->elem); ? This code works fine. Basically, the question is: can we cast and store the random (aligned) integer to a pointer type, if we're not going to perform any kind of pointer arithmetic (using the integer arithmetic for the ovs_list_insert) nor dereference it, unless it points to the actual valid object? If we can do that, then we can maybe avoid introduction of dummy variables by re-working the loop to only use uintptr_t operations. Inside the loop the pointer is always valid, so that should not be a problem (will it?). Outside of the loop we'll have to use the uintptr_t arithmetic if it's possible to have a pointer to a non-object (e.g. 'start' with an offset), but programmer should know that. We will still have to manually re-check a lot of existing code. For now I don't see the solution that will allow us to avoid manual re-checking, unfortunately.
(In reply to Ilya Maximets from comment #7) > One thing that is not clear to me is if the following code has an UB or not: > > struct member* pos; > struct ovs_list start; > > pos = (struct member *)(void*)((uintptr_t)(&start) - 64); > ovs_list_insert((void *)((uintptr_t)pos + 64), &member->elem); > > ? > > This code works fine. Basically, the question is: can we cast and store > the random (aligned) integer to a pointer type, if we're not going to > perform any kind of pointer arithmetic (using the integer arithmetic for > the ovs_list_insert) nor dereference it, unless it points to the actual > valid object? That should be OK, yes.
(In reply to Ilya Maximets from comment #7) > (In reply to Jakub Jelinek from comment #6) > > What is the reason why OVS (and kernel) doesn't use 2 variables, one for the > > iterator that is a pointer to the prev/next structure only and one assigned > > e.g. in the condition from the iterator that is used only when it isn't the > > start? > > At least if targetting C99 and above (or C++) one can declare one of those > > iterators in the for loop init expression... > > The problem is that we need 2 variables and one of them need to be accessible > outside of the loop. And I don't think it is possible to declare one > variable > and only initialize another one. If you only need to use one variable outside of the loop and not the other one, that should be doable. If you use one in some spots and another in other spots but never both, you could have different macros for those 2 versions. E.g. the one where the list iterator is used inside of the loop only and the var pointing to the containing objects could look like: for (struct ovs_list *iter = (pos = NULL, &start); iter != (&start) && (((pos) = ((typeof(pos))(void*)((uintptr_t)((iter->next) - __builtin_offsetof(struct member, elem))))), 1); iter = iter->next, pos = NULL) which would get you after the loop pos = NULL if break; wasn't done and pos non-NULL otherwise. But in your testcase you actually need to use the other var after the loop, so it would need to be done the other way around, but then the user variable would need to be struct ovs_list * and the macro would need to be told in a different way what type the var declared in the loop should have. > One thing that is not clear to me is if the following code has an UB or not: > > struct member* pos; > struct ovs_list start; > > pos = (struct member *)(void*)((uintptr_t)(&start) - 64); > ovs_list_insert((void *)((uintptr_t)pos + 64), &member->elem); It still creates a pointer out of something that doesn't point to such an object or points to some unrelated one, doesn't necessarily have sufficient alignment etc., so I think it is UB too, but perhaps the compiler at least now will handle it the way you expect. A lot of programs including e.g. POSIX rely on at least (void *) -1 and similar pointer constants to be usable in equality comparisons (e.g. MAP_FAILED macro).
(In reply to Jakub Jelinek from comment #9) > (In reply to Ilya Maximets from comment #7) > > (In reply to Jakub Jelinek from comment #6) > > > What is the reason why OVS (and kernel) doesn't use 2 variables, one for the > > > iterator that is a pointer to the prev/next structure only and one assigned > > > e.g. in the condition from the iterator that is used only when it isn't the > > > start? > > > At least if targetting C99 and above (or C++) one can declare one of those > > > iterators in the for loop init expression... > > > > The problem is that we need 2 variables and one of them need to be accessible > > outside of the loop. And I don't think it is possible to declare one > > variable > > and only initialize another one. > > If you only need to use one variable outside of the loop and not the other > one, > that should be doable. If you use one in some spots and another in other > spots but never both, you could have different macros for those 2 versions. > > E.g. the one where the list iterator is used inside of the loop only and the > var pointing to the containing objects could look like: > for (struct ovs_list *iter = (pos = NULL, &start); iter != (&start) > && (((pos) = ((typeof(pos))(void*)((uintptr_t)((iter->next) - > __builtin_offsetof(struct member, > elem))))), 1); > iter = iter->next, pos = NULL) > which would get you after the loop pos = NULL if break; wasn't done and > pos non-NULL otherwise. Hmm. I missed the possibility of a comma trick in the initialization part. I think, we can try to re-write our macros this way. Thanks! > But in your testcase you actually need to use the other var after the loop, > so it would need to be done the other way around, but then the user variable > would need to be struct ovs_list * and the macro would need to be told in a > different way what type the var declared in the loop should have. Doesn't sound very intuitive. I guess, it's easier to just add a NULL pointer check after the loop, i.e. ovs_list_insert(pos ? &pos->elem : &start, ...). Assuming our unit tests will catch all NULL-pointer dereferences (they will likely not, but still), that could be a semi-automated way to find all the problematic code parts. Not very friendly for dependent projects though. > > > One thing that is not clear to me is if the following code has an UB or not: > > > > struct member* pos; > > struct ovs_list start; > > > > pos = (struct member *)(void*)((uintptr_t)(&start) - 64); > > ovs_list_insert((void *)((uintptr_t)pos + 64), &member->elem); > > It still creates a pointer out of something that doesn't point to such an > object or points to some unrelated one, doesn't necessarily have sufficient > alignment etc., so I think it is UB too, but perhaps the compiler at least > now will handle it the way you expect. A lot of programs including e.g. > POSIX rely on at least > (void *) -1 and similar pointer constants to be usable in equality > comparisons (e.g. MAP_FAILED macro). We also have this kind of stuff: static const struct ovs_list OVS_LIST_POISON = { (struct ovs_list *) (UINTPTR_MAX / 0xf * 0xc), (struct ovs_list *) (UINTPTR_MAX / 0xf * 0xc) };
(In reply to Ilya Maximets from comment #10) > Doesn't sound very intuitive. I guess, it's easier to just add a NULL > pointer > check after the loop, i.e. ovs_list_insert(pos ? &pos->elem : &start, ...). That would be certainly cleaner.
> (In reply to Ilya Maximets from comment #7) > > One thing that is not clear to me is if the following code has an UB or not: > > > > struct member* pos; > > struct ovs_list start; > > > > pos = (struct member *)(void*)((uintptr_t)(&start) - 64); > > ovs_list_insert((void *)((uintptr_t)pos + 64), &member->elem); > > > > ? > > > > This code works fine. Basically, the question is: can we cast and store > > the random (aligned) integer to a pointer type, if we're not going to > > perform any kind of pointer arithmetic (using the integer arithmetic for > > the ovs_list_insert) nor dereference it, unless it points to the actual > > valid object? > (In reply to Richard Biener from comment #8) > That should be OK, yes. (In reply to Jakub Jelinek from comment #9) > It still creates a pointer out of something that doesn't point to such an > object or points to some unrelated one, doesn't necessarily have sufficient > alignment etc., so I think it is UB too, but perhaps the compiler at least > now will handle it the way you expect. I think, we'll try to re-write the loops to have 2 distinct variables, but for the case we'll need the uintptr_t solution too as a backup plan, I have one more question. How bad is this: struct member* pos; struct ovs_list start; pos = (struct member *)(void*)((uintptr_t)(&start) - 64); struct ovs_list *list; list = (typeof(&pos->elem))(void *)((uintptr_t)pos + 64); ? Basically, how much undefined is the 'typeof(&pos->elem)' construction? (I just tried to prototype the part of the solution and I found that I need a correct cast inside the macro, so the code will not look way too ugly.) In general, I think, typeof() should not care about the actual value of a pointer, but as we concluded the '&pos->elem' evaluation itself can be harmful, so I don't know.
typeof (and sizeof and alignof and decltype in C++) should be well defined, it is unevaluated context, it doesn't evaluate it in any way (ok, sizeof on VLAs is an exception), just cares about the type (size, alignment) of it.