GCC Bugzilla – Attachment 50597 Details for
Bug 100089
[11 Regression] 30% performance regression for denbench/mp2decoddata2 with -O3
Home
|
New
|
Browse
|
Search
|
[?]
|
Reports
|
Help
|
New Account
|
Log In
Remember
[x]
|
Forgot Password
Login:
[x]
denbench_mp2decoddata2.cpp
test.cpp (text/x-csrc), 2.08 KB, created by
Hongtao.liu
on 2021-04-15 06:35:57 UTC
(
hide
)
Description:
denbench_mp2decoddata2.cpp
Filename:
MIME Type:
Creator:
Hongtao.liu
Created:
2021-04-15 06:35:57 UTC
Size:
2.08 KB
patch
obsolete
>#include<stdint.h> >#include <chrono> >#include <iostream> >#include <cstring> >#include <stdlib.h> >#include <stdint.h> >typedef signed short e_s16; >typedef signed long e_s32; > >std::chrono::high_resolution_clock::time_point start,end; >std::chrono::duration<double> elapsed; >std::chrono::high_resolution_clock timer; >bool pause = false; > > >void pause_timer() >{ > end = timer.now(); > elapsed += (end-start); > pause = true; >} > >void unpause_timer() >{ > pause = false; > start = timer.now(); >} > >void >__attribute__ ((noipa)) >foo (e_s16* a) >{ > free (a); >} > > >static inline void idctrow(e_s16 *blk) >{ > e_s32 x0, x1, x2, x3, x4, x5, x6, x7, x8; > > > if (!((x1 = blk[4]<<11) | (x2 = blk[6]) | (x3 = blk[2]) | > (x4 = blk[1]) | (x5 = blk[7]) | (x6 = blk[5]) | (x7 = blk[3]))) > { > blk[0]=blk[1]=blk[2]=blk[3]=blk[4]=blk[5]=blk[6]=blk[7]=(e_s16)blk[0]<<3; > return; > } > > x0 = (blk[0]<<11) + 128; > > > x8 = 565*(x4+x5); > x4 = x8 + (2841 -565)*x4; > x5 = x8 - (2841 +565)*x5; > x8 = 2408*(x6+x7); > x6 = x8 - (2408 -1609)*x6; > x7 = x8 - (2408 +1609)*x7; > > > x8 = x0 + x1; > x0 -= x1; > x1 = 1108*(x3+x2); > x2 = x1 - (2676 +1108)*x2; > x3 = x1 + (2676 -1108)*x3; > x1 = x4 + x6; > x4 -= x6; > x6 = x5 + x7; > x5 -= x7; > x7 = x8 + x3; > x8 -= x3; > x3 = x0 + x2; > x0 -= x2; > x2 = (181*(x4+x5)+128)>>8; > x4 = (181*(x4-x5)+128)>>8; > > blk[0] = (e_s16)((x7+x1)>>8); > blk[1] = (e_s16)((x3+x2)>>8); > blk[2] = (e_s16)((x0+x4)>>8); > blk[3] = (e_s16)((x8+x6)>>8); > blk[4] = (e_s16)((x8-x6)>>8); > blk[5] = (e_s16)((x0-x4)>>8); > blk[6] = (e_s16)((x3-x2)>>8); > blk[7] = (e_s16)((x7-x1)>>8); > >} > >int >__attribute__ ((noipa)) >Fast_IDCT(e_s16 *block) >{ > e_s32 i; > > for (i=0; i<8; i++) > idctrow(block+8*i); > > return 1; >} > >int main () >{ > int i = 0; > while (i++ != 30000000) > { > e_s16* p = new e_s16[64]; > for (int j = 0;j != 64; j++) > p[j] = j*j - 3*j + 2; > unpause_timer (); > Fast_IDCT (p); > pause_timer (); > foo (p); > } > > std::cout << "elapsed time: "<< elapsed.count() <<" seconds for Fast_IDCT with 30000000 iterations"<<std::endl; > return 1; >} >
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Raw
Actions:
View
Attachments on
bug 100089
: 50597 |
51350