Since r11-5160-g9fc9573f9a5e9432e53c7d the following testcase distilled from gstreamer orc's orc_test.c is miscompiled on aarch64-linux with -O2 -ftree-vectorize: typedef struct _OrcExecutor { void *pad1; int n; int pad2, pad3, pad4; void *arrays[64]; int params[64]; int accumulators[4]; } OrcExecutor; typedef union { int i; float f; signed short x2[2]; signed char x4[4]; } orc_union32; __attribute__((noipa)) void _backup_orc_sad_nxm_u8 (OrcExecutor * restrict ex) { int i; int j; int n = ex->n; int m = ex->params[12]; const signed char * restrict ptr4; const signed char * restrict ptr5; orc_union32 var12 = { 0 }; signed char var32; signed char var33; for (j = 0; j < m; j++) { ptr4 = ((void *)(((unsigned char *)(ex->arrays[4])) + (ex->params[4] * j))); ptr5 = ((void *)(((unsigned char *)(ex->arrays[5])) + (ex->params[5] * j))); for (i = 0; i < n; i++) { var32 = ptr4[i]; var33 = ptr5[i]; var12.i = var12.i + (((int)(unsigned char)var32 - (int)(unsigned char)var33)<0 ? -((int)(unsigned char)var32 - (int)(unsigned char)var33) : ((int)(unsigned char)var32 - (int)(unsigned char)var33)); } } ex->accumulators[0] = var12.i; } int main () { if (__CHAR_BIT__ != 8 || __SIZEOF_INT__ != 4) return 0; OrcExecutor ex; int i, j; typedef signed char A[20 * 384 * 2 + 1]; __attribute__((aligned (128))) A buf; signed char a[79 * 20] = { 122, 23, -89, 127, 24, 52, 92, 72, 18, -84, 59, -54, 33, 118, 37, -108, -93, -59, -92, -4, 21, -51, -57, -53, 81, 85, -64, -88, -120, 1, 62, 69, -1, 93, -117, 75, -118, -87, 101, 6, -55, 103, -98, -48, 36, -91, 114, 63, -121, 10, -84, 14, -35, -93, 119, -52, 78, 107, 5, 70, 57, -100, -31, -74, -16, -72, 21, -88, 54, 86, -5, -80, 121, -83, -28, -51, -48, -30, 106, -85, 119, 33, -1, -68, -52, 34, -108, 51, -12, -101, 78, -14, -16, -43, 4, -1, 120, 67, -43, -1, -54, 91, -47, 115, 53, 56, 104, -76, 100, -15, -109, -81, 107, -73, -60, 97, -109, -79, -114, 17, -26, -105, 20, 81, 121, 16, 51, -78, 124, -52, 111, 74, 2, 97, -98, -25, -111, -26, 86, 16, 105, 82, 24, -65, -107, -110, 57, -31, -77, 108, -108, -119, -126, 22, -15, -8, 48, -107, 19, 66, 43, 99, 7, 127, 103, -13, 108, 8, 106, 20, 75, -28, -27, -25, 83, 86, 49, 81, 79, -94, 47, -86, 107, -96, -32, 42, 38, 2, -61, 20, 22, 100, -14, -96, 95, 24, -5, -87, -15, -81, 127, 27, -123, 72, -115, 26, 85, -104, -68, -13, -98, -67, 10, -61, -42, -62, 119, -23, -83, -15, 97, 45, -58, -36, -98, -83, 50, 42, 91, -117, -79, 34, -73, 88, -89, -7, 31, -78, 120, -61, -87, -7, 77, 43, 100, -68, -26, -88, 30, 43, 84, 83, 61, -99, 95, 110, 111, 7, 85, 65, 86, 58, -107, -74, -16, -32, -37, -76, -56, 52, 73, 60, -34, -124, 57, 68, 87, 104, -60, 86, -27, -64, 18, -24, -12, 111, 96, 122, -102, -39, -46, 114, 59, 29, -2, -73, -1, 18, 52, 15, -11, 3, -113, 57, 4, 43, -67, -54, -106, -17, -115, -56, 2, -35, 61, 8, -121, 25, -21, -45, -110, 54, 110, -23, 34, -112, 85, 106, -110, 63, 32, 6, -34, -109, -5, 49, 38, -28, -109, -43, -31, 22, -68, -71, -32, -18, -60, -81, 9, 24, -108, -128, -47, -12, -116, -104, 118, 36, 28, 114, 2, -42, 35, -63, 0, -46, 8, 99, -5, 67, -103, 7, -70, 69, -90, -104, 62, 50, 92, 112, 120, -84, 99, 62, -54, 106, -73, -77, 68, -60, 71, -17, -101, -4, 79, -108, 37, -94, -33, -46, 58, 48, -127, -96, 5, 113, -6, -128, -92, 77, 65, 23, -14, -42, -107, -85, 77, 36, 39, 94, 34, -16, -124, 40, 61, -13, -55, -106, -46, -4, -123, 68, 74, -44, 108, 27, -49, -46, -63, 75, -106, 60, -127, 7, -17, 100, 80, 3, 14, 127, 37, -81, -105, 68, -76, 48, -21, 73, -97, 35, 53, 28, 91, 17, 51, 36, -29, -80, -59, 54, 32, -33, 55, -10, -40, -100, -25, -49, -71, 89, 113, 44, 53, 68, -4, -51, -11, -123, -40, -16, 70, 125, 0, -99, -104, -111, -13, 76, -78, -71, 84, 83, -74, -57, 65, 22, -39, 120, 115, -19, -34, 60, 26, -45, 87, -59, 118, -44, -101, 81, 53, 53, -122, -72, -112, 96, -13, 53, 36, -106, -6, -12, -112, 83, -22, 95, -44, -128, 5, 40, 100, 91, -2, 22, 15, 38, 51, -111, -109, -109, -86, -20, -4, 83, 126, -68, -95, 71, -109, 49, -23, -32, 79, -10, -30, -16, -31, -62, 18, 109, 50, 121, 72, -20, -80, 2, 67, -35, -38, -26, 37, -116, -116, -82, -106, -45, 8, -7, -73, -96, -56, 95, -51, 42, 6, 9, -86, -14, 7, 13, -37, 124, 43, -118, -109, -54, -74, -121, 90, -99, 92, -116, 117, 107, -114, -2, 20, 31, -121, 59, 62, -109, 21, 122, 125, -51, 40, 96, 73, -3, 28, 97, 5, -53, 32, 87, 124, -24, 112, -70, -93, 36, 95, 59, 115, -73, 12, -89, -118, 18, 66, -107, -90, -86, 80, 14, -32, -121, -88, -7, -88, -117, 58, 73, -124, -125, -114, -27, -120, -17, -93, 90, -108, -110, -117, 67, 80, -72, -61, 116, -24, 74, 84, -23, -120, -110, -86, 113, -106, -49, 55, 26, -77, 120, -57, 91, 111, 80, -94, 4, -50, 47, 99, -26, 127, -118, -101, 66, -42, -57, 37, -18, -125, 18, -17, 88, 88, 46, -98, 125, -59, 55, -42, 125, 9, -76, -81, -97, 29, 89, -122, -56, -128, -36, -84, -71, -65, -103, -47, 11, -58, -27, 47, -20, -23, 50, 118, -105, -78, -25, 122, -107, 104, -4, -97, -75, 100, 89, 0, -52, 19, -20, 98, 45, -54, -116, -37, -25, 77, -111, 66, -63, 29, 47, 85, -10, -123, -105, -83, -79, 99, -52, 98, 71, -73, 69, 126, 54, -19, 72, 14, -90, -108, 107, -37, -29, 107, -107, 109, 49, 126, -75, 14, -43, -69, 42, -86, 83, -65, 26, 80, 48, -88, 29, 105, -127, -72, -84, -74, 60, -81, 72, 11, 59, -27, 106, -51, 15, 41, 45, 45, -118, 92, -67, 33, 11, -51, 127, 39, -121, 84, -2, -71, 92, 45, -104, -113, -42, -102, -19, -105, 125, 55, 3, -75, 36, 27, 57, 58, -77, -77, -14, 38, -81, 95, -3, 30, -114, -55, 100, 52, -92, -55, 5, -118, 39, -97, 117, 103, 51, -42, 38, 29, 34, 23, -83, -76, 116, 80, 64, -69, -77, 97, -15, -49, 7, -119, -89, -19, -104, -109, -31, 119, -90, -64, 43, 7, 106, 115, 92, -9, -84, -97, -108, -83, 33, -15, -7, -89, -118, -67, 44, 74, -115, -87, -115, 106, 82, 27, 52, -104, -35, 99, -86, 47, 46, -88, 107, 29, 26, -108, 28, 114, -25, -65, -116, -86, -86, -14, 45, 68, 11, -42, 75, 98, -69, -108, 105, 88, 59, -60, -32, -25, 120, -90, 65, 121, 42, -105, 33, -4, 120, 79, -88, -124, 106, -32, -40, -97, 18, 110, -98, 119, 122, -81, -101, -85, 76, 34, -81, -22, 68, 14, 73, 83, 86, 65, 113, -112, -98, -122, -110, -84, 12, -29, 9, -72, -52, -105, 110, 34, 118, -42, -91, -48, -6, 66, -93, -1, 73, 41, 0, -33, -60, -127, 65, -22, -73, -111, 115, 47, 78, 39, 28, -21, -55, 53, -20, -30, -22, 29, 11, 76, -105, -8, -52, -90, 84, 98, -123, 98, -124, 12, 49, 78, 95, -1, -58, -60, -78, -79, 87, -105, 90, -55, -22, -72, 126, -78, 23, -84, -112, -31, 101, -82, 1, -25, 100, -93, -57, 54, -118, -62, -113, -77, -50, 0, -62, -1, 6, 60, 58, -123, 1, 47, -63, 67, 100, 17, 77, 89, 69, 12, -2, 84, -119, -77, 96, 48, 55, 109, 30, 85, 46, 22, -51, -17, -51, -105, -42, -113, -109, 126, -38, 110, -58, 121, 126, 77, 31, -112, -6, 12, 52, 38, 97, 12, -87, -82, -82, 70, 22, 44, -110, 40, -73, 114, 0, 51, -41, 24, 116, 86, -7, 56, 7, 8, -53, -18, 52, 39, -53, -109, 104, -109, -74, -76, 75, -119, -75, -44, -47, -63, 71, -26, -1, 71, -19, -119, 125, 27, -105, 1, -31, -121, -26, -102, 125, 33, 51, 48, 13, -124, 122, 48, 97, -6, -53, 10, 113, 77, 93, -25, 95, -119, 5, 118, 83, -81, 116, 116, 23, 30, 51, -2, 113, -63, 96, -106, 68, -7, 38, -9, 101, 73, -3, -104, 62, -60, -89, 2, 37, 55, -26, -46, 122, 22, -3, -115, -125, 39, -19, 40, 7, -16, 108, 82, -84, -30, -91, -121, 16, 66, -77, -43, -86, -37, -37, 109, 95, 101, -117, -23, -28, 80, 63, 89, -17, -96, 93, 68, -20, -12, 93, 66, 107, -122, -72, -77, 122, 50, 31, -97, -18, -9, 72, -103, 17, 46, 121, 80, 11, -120, -43, -11, 12, -55, 105, 81, 63, 70, -10, -94, 110, 31, 124, 57, 117, 61, -117, -44, 104, 10, 73, -68, -124, -13, 66, -98, -42, -97, -13, 42, -83, -20, 20, 109, -63, -82, -87, -17, 32, 38, -57, -116, -36, 68, 39, 18, 118, -99, 108, -96, -15, 79, -61, -97, -76, -91, -6, 104, -122, 86, -110, 31, -56, -30, 83, 25, -1, -126, 47, -9, 81, -11, 30, 71, -65, 9, -71, -6, 35, -24, -104, -118, -23, 40, 109, -24, -115, -112, -35, 38, -40, -85, 34, -23, -38, 15, 97, -116, -90, 106, -52, 21, -48, 81, 119, -40, -89, -55, -5, -19, 51, -111, -51, 83, -19, 53, 92, -31, 65, -44, -20, -16, -60, -84, 53, -54, 53, 106, -70, -56, -54, -67, 51, 111, 8, 47, -9, 47, 25, 81, 21, -112, -122, 82, 44, -62, 16, 13, -94, 17, 125, -31, 80, 40, 35, 81, -19, 6, 54, 90, 59, -64, -47, -104, -13, -5, -59, 23, 113, 40, 37, -6, 32, 126, -114, -50, -93, -71, 55, -85, -55, -108, -68, 16, 46, 7, -86, 124, -80, 64, -55, -52, -104, 99, 45, 88, -123, 2, -5, -52, 24, 91, -4, -58, 97, 84, -29, 52, -41, -80, 69, -87, -47, -122, -29, -103, -18, -88, 127, 79, -32, -90, -79, -78, 125, -54, 58, -15, 57, -32, -28, 44, 103, -55, 23, -29, 47, -65, 59, 31, 78, 101, -47, 98, -111, -106, 81, 79, 15, -22, -63, 26, 41, 8, 26, -82, 66, -82, 90, -97, 63, -127, 104, -42, -67, -107, -84, -10, 21, -68, 106, -112, 17, -113, -29, 12, 107, 62, 42, 82, 9, 23, -57, 59, -5, -41, 13 }; signed char b[79 * 20] = { -13, -94, 73, 51, 103, 99, -63, -80, -90, -3, -30, 114, -88, 82, 59, -41, -99, -21, 34, 92, -68, -39, 96, -36, -52, 77, -100, -122, 111, -9, -120, -97, 49, 31, -51, -3, 84, -124, -73, 63, 89, -9, -96, -51, 78, -73, 30, -109, -37, -117, -20, 124, 10, -94, -103, -80, -39, 42, -14, -114, -126, -78, 48, 104, -123, 64, -30, 1, 121, 50, -103, -57, -104, -43, 85, -47, 3, -57, -78, -13, 61, 105, -9, -97, 73, -83, -14, -113, 125, 6, 100, -63, 34, 75, 83, -16, -85, -101, 102, 89, -114, 6, 29, 75, -26, -4, -47, 113, -54, 118, 111, -32, 106, -26, -12, -33, -17, -33, -88, 120, 17, -124, 26, -62, 117, -78, -33, -20, -52, -19, 9, 10, -84, 11, 105, -97, 44, -94, -123, -17, 66, 51, 88, 1, -35, 19, -52, 118, -62, 29, -8, 13, 40, 25, 25, -15, 18, -21, 94, 103, 109, 98, -14, -82, -84, 74, 66, 108, 108, -36, 86, 81, -50, -57, 110, -71, -73, -41, -46, -127, 127, 53, 86, -83, 94, 100, -81, -68, -105, -10, 121, -59, 98, 18, 4, 90, -102, -88, -30, 25, 43, 110, 49, 17, 57, -95, -87, -37, 17, 125, 100, 117, -48, 120, 112, -35, -43, 67, 23, 114, -24, 126, -23, 10, 39, 61, -114, -46, -76, 63, 77, -23, 32, -24, 91, -69, -106, 52, 31, 83, 116, 96, -110, -67, -70, -63, -54, 22, -36, 101, -32, -60, 49, 24, 22, 117, -82, 37, -76, -53, 36, -20, -92, -60, 61, 40, -51, -21, 53, -3, 47, 64, 88, -60, -61, -24, 62, -105, 16, -107, -55, -101, -115, -77, -37, 16, -70, 88, -40, 26, -3, -24, -33, 76, 61, -117, -57, -6, 64, 80, -37, 59, -59, 30, 86, -64, 96, 95, -99, 88, 25, 106, 63, 63, -60, -76, 117, 103, 29, 45, -22, -11, -30, 65, 10, -105, 2, 99, -16, -51, 45, -109, -20, -62, -41, -19, 83, 38, -22, -21, 13, -55, 45, 93, -36, -4, 114, 42, 108, -88, -126, -60, 32, 43, -55, 108, -105, -10, -62, 43, 0, -25, 32, -116, 34, 103, -63, 26, 42, 67, 84, -99, 51, 76, -60, 19, -104, -83, 107, 112, -7, 15, -8, 21, 14, 110, 28, 18, 46, -71, -9, 28, -98, 44, 70, 33, 61, -55, -115, -114, -41, 26, -34, 31, -45, 50, 113, -10, 25, 8, -21, -21, -67, -37, -110, 92, -42, -36, -120, -72, 30, -104, -59, -54, 75, 44, -42, -57, 48, 39, 87, 22, -110, 61, 49, -31, -53, -79, -108, 96, 33, -128, -77, -32, -28, 78, 67, -106, 73, 18, -79, 37, 9, -128, 38, -15, -71, 57, 59, 33, -24, -75, 124, 118, -6, -20, -98, -49, -57, -57, -128, 120, -13, 74, 56, -93, 89, -108, 77, 56, 101, -109, -1, 126, -92, 6, 118, -59, 87, 59, 21, 8, -125, -97, 64, -58, -96, -4, -17, -94, 7, 115, -12, -71, 25, 57, 12, -57, 47, 17, -89, -7, 68, -115, 119, -113, 100, 4, -69, 58, -45, 82, 72, 47, -2, 83, 92, 14, 35, 29, 34, 118, -65, -1, 5, 100, -69, 61, -75, 80, 85, -119, -6, 6, -49, 65, 59, 69, -38, 108, -51, 25, -63, -70, -104, -53, 66, 39, 24, -62, 95, -87, -105, 90, 34, 114, -49, 25, -127, 106, -95, -50, 19, 0, -11, 56, 72, -79, 84, 121, 105, 89, 70, 57, 83, 60, 109, -103, -13, -45, 94, -34, -128, 88, 67, -38, 123, 16, 49, 26, 104, 92, 74, -47, 73, 125, -110, -121, -33, 42, -110, -127, -96, -44, -81, -17, 119, -77, 71, -40, 62, -104, -111, -58, -4, 114, 49, 49, -34, -99, -98, -84, -89, 27, 33, -105, 28, 119, 51, 65, 31, 32, 83, -60, -23, 56, 78, -65, -22, -119, -69, -19, -102, 98, -93, 106, -3, 54, -19, -91, -7, -14, -116, 36, 88, 34, 103, -5, -101, -62, 11, 10, -124, -27, 19, -7, 97, 8, 83, -11, 26, 51, -57, 39, -6, -43, 62, -7, 61, 93, -58, 8, 94, -104, -42, -25, 29, -51, 62, -103, 82, -102, -49, -51, 116, -115, -54, -41, -24, -126, 9, -1, 46, 89, -109, 44, 85, -73, 59, 95, 24, -114, -18, -38, 1, -10, -13, -107, -64, 76, 47, 15, 15, -31, 99, -43, 70, 101, 41, -45, -67, 18, -84, 94, -27, -104, 42, -3, 37, -53, 43, -5, 118, 15, 126, -83, -67, 127, -8, -125, -20, -32, -20, -47, -41, 72, 105, 91, -41, -114, 54, -93, 32, 112, -114, -15, -30, -121, -100, 39, 35, -21, -75, 65, 87, -90, 88, -82, -60, -39, -62, -49, 117, 54, 87, -78, 78, 76, 107, 127, 13, -97, 71, 40, 17, 31, 108, 71, 127, -40, 95, -5, -14, 51, 56, -122, -32, -13, -23, -89, -54, -21, 79, -72, 43, -128, -6, -120, 30, -29, -67, -10, -51, 10, -12, 82, -98, 115, -104, 115, 28, -104, 4, -104, -91, 49, -124, -1, -39, 44, -93, 71, -66, 109, 49, -121, -77, 69, 22, 42, 26, -76, -38, -111, -59, 95, -16, 102, 29, -15, 0, 115, -128, -99, -114, -8, 115, 66, 85, -50, -41, 46, -81, -98, 65, 109, 28, 86, -25, -118, 37, -42, -37, 38, -12, 93, 5, 58, -80, -74, -31, 43, -102, 34, -2, -109, -23, 4, 50, -55, 101, 79, -98, 54, -58, -80, 101, 87, -54, 122, 43, -111, 24, 86, 79, -72, -41, -89, -23, 94, 13, -66, -126, -37, -70, -62, -106, 33, -79, 93, -54, 61, 57, 39, -19, -84, 103, -45, -118, -64, 85, 113, -2, -23, -76, -78, -29, -102, -45, -106, -111, -108, -97, 90, -103, 64, -48, 94, -111, -113, -115, 89, -98, -110, -34, 93, -58, -40, -43, -113, -13, -117, 124, -94, -58, 73, 125, 24, 33, -60, -108, 56, 115, 24, -75, -50, -62, -17, -4, -91, 122, 87, 89, -59, -9, 94, -8, 108, -44, 115, 113, -49, -15, 124, -76, -115, 70, -74, -78, -9, 107, 80, -52, 3, -29, -57, -4, -114, 14, 25, 9, 122, -61, -124, 8, 80, 78, 26, -37, 111, -40, 19, 107, -16, -77, -20, 60, 114, 56, 23, -82, -94, 94, 98, -4, -116, 31, 39, -13, -91, -117, 57, 109, -108, -87, -119, -107, -60, 17, -100, -39, -85, 74, -83, -83, -41, -28, -95, 6, -67, 85, -81, -111, 65, -108, -53, -73, 5, 119, -41, 82, 114, -51, -119, 44, -121, 59, 26, 60, 100, -41, -76, -97, -107, -122, -103, -3, -20, -63, 72, 17, 28, 103, 116, -121, 113, -91, -88, -57, 124, -6, 114, 114, 118, -84, -128, 46, 31, 116, -37, 49, 90, -24, -29, -114, -123, -84, 14, 66, -102, 88, 96, -2, 7, 40, -87, -78, 77, 103, -127, -34, 90, -104, 4, -104, -89, 105, 78, 91, 17, 88, -75, -106, 121, 34, -55, -15, 26, -16, 12, 93, -83, 65, 113, -30, 62, -106, -105, 40, -52, -33, -62, -62, -92, -88, -41, 89, 75, 47, -61, -12, 93, -8, -118, 3, 114, -4, -101, 126, 84, 113, 31, -121, 15, 44, -59, 70, -92, -34, 75, -123, 114, 16, -53, 10, 100, -32, -53, -72, -37, 121, 2, 100, 90, 83, -21, 120, -12, 48, 12, 74, 118, -46, 3, 58, 84, 125, 85, -115, 84, 47, -10, 12, -4, -94, -104, 92, -67, -2, -76, 28, -36, 94, 89, -63, 36, 9, -73, 49, 77, 109, 106, 32, 42, 28, 84, -71, -68, -112, 75, 101, -109, 88, 4, -125, -41, -22, -33, 33, 102, -8, -25, 20, 62, 55, -84, -90, -72, -17, -34, -1, -123, 67, -82, 79, 105, 62, -27, -63, -71, -95, -64, 31, -107, 111, -110, 69, -26, 52, 24, 71, 22, 25, -101, 98, -110, -87, -56, -93, 1, 97, 79, 89, 65, -6, 62, 60, 38, 117, 26, 90, -15, -22, -62, -59, -110, 17, 4, -2, -98, -115, -4, 32, -52, -33, 9, -16, 120, 33, -127, 68, -16, 116, -42, 57, -77, -96, 96, 83, -5, -106, -67, -72, -124, 80, 124, 106, 31, 103, -114, 64, -11, 113, 109, -87, -100, 32, 10, 32, -36, 28, 119, 110, 62, -101, 89, -93, 88, -58, 11, -46, -37, -52, -109, -113, 89, -62, 69, 42, -41, 65, 57, -88, 23, -66, 110, -123, -94, -89, -69, 44, -105, 67, 121, 50, 54, 71, -14, 86, 43, 55, -120, -25, -8, 61, -53, 123, 118, -80, 13, 84, 83, 109, 34, -8, -65, 19, 54, 41, -107, 8, -103, -83, 52, 37, -14, -38, 30, 33, 67, -29, 83, -66, -96, -27, 107, -75, -81, -79, 118, 98, -34, 87, -108, 48, 103, 119, 83, -62, 5, 71, -53, -26, 115, 99, 123, 59, -87, 88, -30, -100, -42, -17, -35, 83, 28, 57, -61, -1, 13, 95, 66, 65, -23, 38, -8, 41, -106, 69, -47, 99, 109, -95, 83, 77, -102, 74, -14, -19, -126, -95, 71, -111, -73, 61, 57, 17, -126, -30, -9, -32, -80, 87, -95, 81, 31, 5, -73, -116, 12, 84, 66, 78, 26, -59, -60, 100, 33, -49, -32, -27, -68, 123, -54, 50, -111, -66, 109, -81, 95, -102, 93, 62, -55, -13, -125, -128, -28, 50, -126, -54, -126, -101, -21, -23, 60, 111, 38, -106, 65, -63, 27, -72, -31, 24, -93, 92, -39, -54, -78 }; ex.n = 79; ex.params[4] = 384; ex.params[5] = 384; ex.params[12] = 20; ex.arrays[4] = &buf[0]; ex.arrays[5] = &buf[20 * 384 + 1]; for (i = 0; i < 20; ++i) { for (j = 0; j < 79; ++j) { buf[i * 384 + j] = a[i * 79 + j]; buf[(i + 20) * 384 + j + 1] = b[i * 79 + j]; } } _backup_orc_sad_nxm_u8 (&ex); if (ex.accumulators[0] != 136665) __builtin_abort (); return 0; } Instead of computing 136665 it computes 135989. The assembly difference r11-5159 to r11-5160 is: --- orc_test.s_ 2023-02-07 12:18:43.029183179 +0100 +++ orc_test.s 2023-02-07 12:19:07.704817488 +0100 @@ -44,9 +44,9 @@ _backup_orc_sad_nxm_u8: ldr q2, [x6, x1] ldr q3, [x5, x1] add x1, x1, 16 - uabdl2 v0.8h, v2.16b, v3.16b - uabal v0.8h, v2.8b, v3.8b - uadalp v1.4s, v0.8h + sabdl2 v0.8h, v2.16b, v3.16b + sabal v0.8h, v2.8b, v3.8b + sadalp v1.4s, v0.8h cmp x1, x8 bne .L5 addv s1, v1.4s
Before the r11-5160 change, the innermost loop was an unsigned char x 16 -> int x 4 SAD_EXPR: <bb 7> [local count: 567644343]: # vect_var12_i_49.13_90 = PHI <vect_patt_46.22_103(7), { 0, 0, 0, 0 }(6)> # ivtmp.57_149 = PHI <ivtmp.57_150(7), 0(6)> vect_var32_32.16_95 = MEM <const vector(16) signed char> [(const signed char *)vectp.15_91 + ivtmp.57_149 * 1]; vect_var33_33.19_100 = MEM <const vector(16) signed char> [(const signed char *)vectp.18_96 + ivtmp.57_149 * 1]; vect_var32.20_101 = VIEW_CONVERT_EXPR<vector(16) unsigned char>(vect_var32_32.16_95); vect_var33.21_102 = VIEW_CONVERT_EXPR<vector(16) unsigned char>(vect_var33_33.19_100); vect_patt_46.22_103 = SAD_EXPR <vect_var32.20_101, vect_var33.21_102, vect_var12_i_49.13_90>; ivtmp.57_150 = ivtmp.57_149 + 16; if (ivtmp.57_150 != _166) goto <bb 7>; [83.33%] else goto <bb 10>; [16.67%] with vect_patt_46.22_103/vect_var12_i_49.13_90 type being vector(4) int. After the change it is signed char x 16 -> int x 4 SAD_EXPR instead: <bb 7> [local count: 567644343]: # vect_var12_i_49.13_99 = PHI <vect_patt_39.20_110(7), { 0, 0, 0, 0 }(6)> # ivtmp.55_156 = PHI <ivtmp.55_157(7), 0(6)> vect_var32_32.16_104 = MEM <const vector(16) signed char> [(const signed char *)vectp.15_100 + ivtmp.55_156 * 1]; vect_var33_33.19_109 = MEM <const vector(16) signed char> [(const signed char *)vectp.18_105 + ivtmp.55_156 * 1]; vect_patt_39.20_110 = SAD_EXPR <vect_var32_32.16_104, vect_var33_33.19_109, vect_var12_i_49.13_99>; ivtmp.55_157 = ivtmp.55_156 + 16; if (ivtmp.55_157 != _173) goto <bb 7>; [83.33%] else goto <bb 10>; [16.67%]
So, on simplified -O2 -ftree-vectorize testcase with trunk: int foo (signed char *x, signed char *y, int n) { int i, r = 0; signed char a, b; for (i = 0; i < n; i++) { a = x[i]; b = y[i]; int c = (unsigned char) a - (unsigned char) b; r = r + (c < 0 ? -c : c); } return r; } everything looks ok to me until vect_recog_sad_pattern is called. The interesting part of the loop in question is: <bb 3> [local count: 955630225]: # i_22 = PHI <i_20(6), 0(5)> # r_23 = PHI <r_19(6), 0(5)> ... a.0_5 = (unsigned char) a_15; _6 = (int) a.0_5; b.1_7 = (unsigned char) b_17; _8 = (int) b.1_7; c_18 = _6 - _8; _9 = ABS_EXPR <c_18>; r_19 = _9 + r_23; with 15/17 having signed char type, 5/7 unsigned char and everything else int. Now, when vect_recog_sad_pattern is called, it sees as diff_stmt_vinfo->stmt patt_34 = (a.0_5) w- (b.1_7); which is reasonable, abs_stmt_vinfo was patt_30 = ABS_EXPR <patt_33>; where 30 is signed short, patt_33 too set to (signed short) patt_34 and 34 unsigned short. Still, the widening subtraction is done with zero extensions from unsigned char operands to unsigned short. But vect_recog_sad_pattern calls 1325 if (!vect_widened_op_tree (vinfo, diff_stmt_vinfo, MINUS_EXPR, WIDEN_MINUS_EXPR, 1326 false, 2, unprom, &half_type)) 1327 return NULL; and instead of returning a.0_5 and b.1_7 as the unpromoted operands and unsigned char as half_type, it returns a_15 and b_17 as the unpromoted operands and signed char as half_type. I'd think if in vect_widened_op_tree after the early checks rhs_code != code we shouldn't look through further promotions and just accept what we have. as
Created attachment 54419 [details] gcc13-pr108692.patch Untested fix.
The master branch has been updated by Jakub Jelinek <jakub@gcc.gnu.org>: https://gcc.gnu.org/g:6ad1c1027628f094260037536f6b6fcdb63b5add commit r13-5742-g6ad1c1027628f094260037536f6b6fcdb63b5add Author: Jakub Jelinek <jakub@redhat.com> Date: Wed Feb 8 18:41:21 2023 +0100 vect-patterns: Fix up vect_widened_op_tree [PR108692] The following testcase is miscompiled on aarch64-linux since r11-5160. Given <bb 3> [local count: 955630225]: # i_22 = PHI <i_20(6), 0(5)> # r_23 = PHI <r_19(6), 0(5)> ... a.0_5 = (unsigned char) a_15; _6 = (int) a.0_5; b.1_7 = (unsigned char) b_17; _8 = (int) b.1_7; c_18 = _6 - _8; _9 = ABS_EXPR <c_18>; r_19 = _9 + r_23; ... where SSA_NAMEs 15/17 have signed char, 5/7 unsigned char and rest is int we first pattern recognize c_18 as patt_34 = (a.0_5) w- (b.1_7); which is still correct, 5/7 are unsigned char subtracted in wider type, but then vect_recog_sad_pattern turns it into SAD_EXPR <a_15, b_17, r_23> which is incorrect, because 15/17 are signed char and so it is sum of absolute signed differences rather than unsigned sum of absolute unsigned differences. The reason why this happens is that vect_recog_sad_pattern calls vect_widened_op_tree with MINUS_EXPR, WIDEN_MINUS_EXPR on the patt_34 = (a.0_5) w- (b.1_7); statement's vinfo and vect_widened_op_tree calls vect_look_through_possible_promotion on the operands of the WIDEN_MINUS_EXPR, which looks through the further casts. vect_look_through_possible_promotion has careful code to stop when there would be nested casts that need to be preserved, but the problem here is that the WIDEN_*_EXPR operation itself has an implicit cast on the operands already - in this case of WIDEN_MINUS_EXPR the unsigned char 5/7 SSA_NAMEs are widened to unsigned short before the subtraction, and vect_look_through_possible_promotion obviously isn't told about that. Now, I think when we see those WIDEN_{MULT,MINUS,PLUS}_EXPR codes, we had to look through possible promotions already when creating those and so vect_look_through_possible_promotion again isn't really needed, all we need to do is arrange what that function will do if the operand isn't result of any cast. Other option would be let vect_look_through_possible_promotion know about the implicit promotion from the WIDEN_*_EXPR, but I'm afraid that would be much harder. 2023-02-08 Jakub Jelinek <jakub@redhat.com> PR tree-optimization/108692 * tree-vect-patterns.cc (vect_widened_op_tree): If rhs_code is widened_code which is different from code, don't call vect_look_through_possible_promotion but instead just check op is SSA_NAME with integral type for which vect_is_simple_use is true and call set_op on this_unprom. * gcc.dg/pr108692.c: New test.
Fixed on the trunk so far.
The releases/gcc-12 branch has been updated by Jakub Jelinek <jakub@gcc.gnu.org>: https://gcc.gnu.org/g:00136f439e2849af2bfd9934d79a8297ab09a1d9 commit r12-9152-g00136f439e2849af2bfd9934d79a8297ab09a1d9 Author: Jakub Jelinek <jakub@redhat.com> Date: Wed Feb 8 18:41:21 2023 +0100 vect-patterns: Fix up vect_widened_op_tree [PR108692] The following testcase is miscompiled on aarch64-linux since r11-5160. Given <bb 3> [local count: 955630225]: # i_22 = PHI <i_20(6), 0(5)> # r_23 = PHI <r_19(6), 0(5)> ... a.0_5 = (unsigned char) a_15; _6 = (int) a.0_5; b.1_7 = (unsigned char) b_17; _8 = (int) b.1_7; c_18 = _6 - _8; _9 = ABS_EXPR <c_18>; r_19 = _9 + r_23; ... where SSA_NAMEs 15/17 have signed char, 5/7 unsigned char and rest is int we first pattern recognize c_18 as patt_34 = (a.0_5) w- (b.1_7); which is still correct, 5/7 are unsigned char subtracted in wider type, but then vect_recog_sad_pattern turns it into SAD_EXPR <a_15, b_17, r_23> which is incorrect, because 15/17 are signed char and so it is sum of absolute signed differences rather than unsigned sum of absolute unsigned differences. The reason why this happens is that vect_recog_sad_pattern calls vect_widened_op_tree with MINUS_EXPR, WIDEN_MINUS_EXPR on the patt_34 = (a.0_5) w- (b.1_7); statement's vinfo and vect_widened_op_tree calls vect_look_through_possible_promotion on the operands of the WIDEN_MINUS_EXPR, which looks through the further casts. vect_look_through_possible_promotion has careful code to stop when there would be nested casts that need to be preserved, but the problem here is that the WIDEN_*_EXPR operation itself has an implicit cast on the operands already - in this case of WIDEN_MINUS_EXPR the unsigned char 5/7 SSA_NAMEs are widened to unsigned short before the subtraction, and vect_look_through_possible_promotion obviously isn't told about that. Now, I think when we see those WIDEN_{MULT,MINUS,PLUS}_EXPR codes, we had to look through possible promotions already when creating those and so vect_look_through_possible_promotion again isn't really needed, all we need to do is arrange what that function will do if the operand isn't result of any cast. Other option would be let vect_look_through_possible_promotion know about the implicit promotion from the WIDEN_*_EXPR, but I'm afraid that would be much harder. 2023-02-08 Jakub Jelinek <jakub@redhat.com> PR tree-optimization/108692 * tree-vect-patterns.cc (vect_widened_op_tree): If rhs_code is widened_code which is different from code, don't call vect_look_through_possible_promotion but instead just check op is SSA_NAME with integral type for which vect_is_simple_use is true and call set_op on this_unprom. * gcc.dg/pr108692.c: New test. (cherry picked from commit 6ad1c1027628f094260037536f6b6fcdb63b5add)
Fixed for gcc 12.3 too.
The releases/gcc-11 branch has been updated by Jakub Jelinek <jakub@gcc.gnu.org>: https://gcc.gnu.org/g:eadc8fd8d7fd509b001fcdbe160e2d6862c745a4 commit r11-10709-geadc8fd8d7fd509b001fcdbe160e2d6862c745a4 Author: Jakub Jelinek <jakub@redhat.com> Date: Wed Feb 8 18:41:21 2023 +0100 vect-patterns: Fix up vect_widened_op_tree [PR108692] The following testcase is miscompiled on aarch64-linux since r11-5160. Given <bb 3> [local count: 955630225]: # i_22 = PHI <i_20(6), 0(5)> # r_23 = PHI <r_19(6), 0(5)> ... a.0_5 = (unsigned char) a_15; _6 = (int) a.0_5; b.1_7 = (unsigned char) b_17; _8 = (int) b.1_7; c_18 = _6 - _8; _9 = ABS_EXPR <c_18>; r_19 = _9 + r_23; ... where SSA_NAMEs 15/17 have signed char, 5/7 unsigned char and rest is int we first pattern recognize c_18 as patt_34 = (a.0_5) w- (b.1_7); which is still correct, 5/7 are unsigned char subtracted in wider type, but then vect_recog_sad_pattern turns it into SAD_EXPR <a_15, b_17, r_23> which is incorrect, because 15/17 are signed char and so it is sum of absolute signed differences rather than unsigned sum of absolute unsigned differences. The reason why this happens is that vect_recog_sad_pattern calls vect_widened_op_tree with MINUS_EXPR, WIDEN_MINUS_EXPR on the patt_34 = (a.0_5) w- (b.1_7); statement's vinfo and vect_widened_op_tree calls vect_look_through_possible_promotion on the operands of the WIDEN_MINUS_EXPR, which looks through the further casts. vect_look_through_possible_promotion has careful code to stop when there would be nested casts that need to be preserved, but the problem here is that the WIDEN_*_EXPR operation itself has an implicit cast on the operands already - in this case of WIDEN_MINUS_EXPR the unsigned char 5/7 SSA_NAMEs are widened to unsigned short before the subtraction, and vect_look_through_possible_promotion obviously isn't told about that. Now, I think when we see those WIDEN_{MULT,MINUS,PLUS}_EXPR codes, we had to look through possible promotions already when creating those and so vect_look_through_possible_promotion again isn't really needed, all we need to do is arrange what that function will do if the operand isn't result of any cast. Other option would be let vect_look_through_possible_promotion know about the implicit promotion from the WIDEN_*_EXPR, but I'm afraid that would be much harder. 2023-02-08 Jakub Jelinek <jakub@redhat.com> PR tree-optimization/108692 * tree-vect-patterns.c (vect_widened_op_tree): If rhs_code is widened_code which is different from code, don't call vect_look_through_possible_promotion but instead just check op is SSA_NAME with integral type for which vect_is_simple_use is true and call set_op on this_unprom. * gcc.dg/pr108692.c: New test. (cherry picked from commit 6ad1c1027628f094260037536f6b6fcdb63b5add)
Fixed for 11.4 as well.