mov ar.lc = r16 .L28: [.L3:] [.L2:] ... // The loop is unrolled, and the address givs are splitted. // Register r14, r8, r3, r35, r33, r31,r28, r29 is used to // caculate the address of each array element respectively. .mmi ldfd f38 = [r14] ldfd f39 = [r8] add r31 = r34, r23 .mmi ldfd f35 = [r3] ldfd f37 = [r35] add r33 = r34, r24 ;; .mmb ldfd f33 = [r33] ldfd f34 = [r31] nop 0 .mmi add r28 = r30, r23 add r29 = r30, r24 adds r22 = 4, r22 ;; .mmf ldfd f32 = [r29] ldfd f15 = [r28] fma.d f36 = f38, f39, f14 ;; .mmf nop 0 nop 0 fma.d f13 = f35, f37, f36 ;; .mmf nop 0 nop 0 fma.d f12 = f33, f34, f13 ;; .mfb nop 0 fma.d f14 = f32, f15, f12 br.cloop.sptk.few .L28 ;; .L9: .mfb stfd [r32] = f14 nop 0 nop 0 ... .endp dot_product__# .ident "GCC: (GNU) 3.5-tree-ssa 20031221 (CCRG)"