This is the mail archive of the gcc@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

gcc -O1 performs better than gcc -O2


Noticed while optimizing crc16 that gcc -O performed much better
than gcc -O2 while doing crc16:

# > gcc -O1  CRC16.c ;./a.out
crc1:f532 crc2:f532
crc16 tv_res:0 :12768
CRC16 tv_res:0 :10795
# > gcc -O2  CRC16.c ;./a.out
crc1:f532 crc2:f532
crc16 tv_res:0 :17092
CRC16 tv_res:0 :11581

#> gcc --version
gcc (Gentoo 4.3.4 p1.0, pie-10.1.5) 4.3.4

cpu:
vendor_id	: GenuineIntel
cpu family	: 6
model		: 23
model name	: Intel(R) Core(TM)2 Duo CPU     E8500  @ 3.16GHz
stepping	: 10
cpu MHz	: 3159.236

Here is the CRC16.c:

#define u32 unsigned long
#define u16 unsigned short
#define u8 unsigned char

#include <sys/time.h>

#include <stdio.h>
#include <stdlib.h>

/** CRC table for the CRC-16. The poly is 0x8005 (x^16 + x^15 + x^2 + 1) */
u16 const crc16_table[256] = {
	0x0000, 0xC0C1, 0xC181, 0x0140, 0xC301, 0x03C0, 0x0280, 0xC241,
	0xC601, 0x06C0, 0x0780, 0xC741, 0x0500, 0xC5C1, 0xC481, 0x0440,
	0xCC01, 0x0CC0, 0x0D80, 0xCD41, 0x0F00, 0xCFC1, 0xCE81, 0x0E40,
	0x0A00, 0xCAC1, 0xCB81, 0x0B40, 0xC901, 0x09C0, 0x0880, 0xC841,
	0xD801, 0x18C0, 0x1980, 0xD941, 0x1B00, 0xDBC1, 0xDA81, 0x1A40,
	0x1E00, 0xDEC1, 0xDF81, 0x1F40, 0xDD01, 0x1DC0, 0x1C80, 0xDC41,
	0x1400, 0xD4C1, 0xD581, 0x1540, 0xD701, 0x17C0, 0x1680, 0xD641,
	0xD201, 0x12C0, 0x1380, 0xD341, 0x1100, 0xD1C1, 0xD081, 0x1040,
	0xF001, 0x30C0, 0x3180, 0xF141, 0x3300, 0xF3C1, 0xF281, 0x3240,
	0x3600, 0xF6C1, 0xF781, 0x3740, 0xF501, 0x35C0, 0x3480, 0xF441,
	0x3C00, 0xFCC1, 0xFD81, 0x3D40, 0xFF01, 0x3FC0, 0x3E80, 0xFE41,
	0xFA01, 0x3AC0, 0x3B80, 0xFB41, 0x3900, 0xF9C1, 0xF881, 0x3840,
	0x2800, 0xE8C1, 0xE981, 0x2940, 0xEB01, 0x2BC0, 0x2A80, 0xEA41,
	0xEE01, 0x2EC0, 0x2F80, 0xEF41, 0x2D00, 0xEDC1, 0xEC81, 0x2C40,
	0xE401, 0x24C0, 0x2580, 0xE541, 0x2700, 0xE7C1, 0xE681, 0x2640,
	0x2200, 0xE2C1, 0xE381, 0x2340, 0xE101, 0x21C0, 0x2080, 0xE041,
	0xA001, 0x60C0, 0x6180, 0xA141, 0x6300, 0xA3C1, 0xA281, 0x6240,
	0x6600, 0xA6C1, 0xA781, 0x6740, 0xA501, 0x65C0, 0x6480, 0xA441,
	0x6C00, 0xACC1, 0xAD81, 0x6D40, 0xAF01, 0x6FC0, 0x6E80, 0xAE41,
	0xAA01, 0x6AC0, 0x6B80, 0xAB41, 0x6900, 0xA9C1, 0xA881, 0x6840,
	0x7800, 0xB8C1, 0xB981, 0x7940, 0xBB01, 0x7BC0, 0x7A80, 0xBA41,
	0xBE01, 0x7EC0, 0x7F80, 0xBF41, 0x7D00, 0xBDC1, 0xBC81, 0x7C40,
	0xB401, 0x74C0, 0x7580, 0xB541, 0x7700, 0xB7C1, 0xB681, 0x7640,
	0x7200, 0xB2C1, 0xB381, 0x7340, 0xB101, 0x71C0, 0x7080, 0xB041,
	0x5000, 0x90C1, 0x9181, 0x5140, 0x9301, 0x53C0, 0x5280, 0x9241,
	0x9601, 0x56C0, 0x5780, 0x9741, 0x5500, 0x95C1, 0x9481, 0x5440,
	0x9C01, 0x5CC0, 0x5D80, 0x9D41, 0x5F00, 0x9FC1, 0x9E81, 0x5E40,
	0x5A00, 0x9AC1, 0x9B81, 0x5B40, 0x9901, 0x59C0, 0x5880, 0x9841,
	0x8801, 0x48C0, 0x4980, 0x8941, 0x4B00, 0x8BC1, 0x8A81, 0x4A40,
	0x4E00, 0x8EC1, 0x8F81, 0x4F40, 0x8D01, 0x4DC0, 0x4C80, 0x8C41,
	0x4400, 0x84C1, 0x8581, 0x4540, 0x8701, 0x47C0, 0x4680, 0x8641,
	0x8201, 0x42C0, 0x4380, 0x8341, 0x4100, 0x81C1, 0x8081, 0x4040
};
#include <asm/byteorder.h>
#define tole(x) __constant_cpu_to_le16(x)
u16 const crc16_table_le[256] = {
	tole(0x0000), tole(0xC0C1), tole(0xC181), tole(0x0140), tole(0xC301),
	tole(0x03C0), tole(0x0280), tole(0xC241), tole(0xC601), tole(0x06C0),
	tole(0x0780), tole(0xC741), tole(0x0500), tole(0xC5C1), tole(0xC481),
	tole(0x0440), tole(0xCC01), tole(0x0CC0), tole(0x0D80), tole(0xCD41),
	tole(0x0F00), tole(0xCFC1), tole(0xCE81), tole(0x0E40),	tole(0x0A00),
	tole(0xCAC1), tole(0xCB81), tole(0x0B40), tole(0xC901), tole(0x09C0),
	tole(0x0880), tole(0xC841), tole(0xD801), tole(0x18C0), tole(0x1980),
	tole(0xD941), tole(0x1B00), tole(0xDBC1), tole(0xDA81), tole(0x1A40),
	tole(0x1E00), tole(0xDEC1), tole(0xDF81), tole(0x1F40), tole(0xDD01),
	tole(0x1DC0), tole(0x1C80), tole(0xDC41), tole(0x1400), tole(0xD4C1),
	tole(0xD581), tole(0x1540), tole(0xD701), tole(0x17C0), tole(0x1680),
	tole(0xD641), tole(0xD201), tole(0x12C0), tole(0x1380), tole(0xD341),
	tole(0x1100), tole(0xD1C1), tole(0xD081), tole(0x1040),	tole(0xF001),
	tole(0x30C0), tole(0x3180), tole(0xF141), tole(0x3300), tole(0xF3C1),
	tole(0xF281), tole(0x3240), tole(0x3600), tole(0xF6C1), tole(0xF781),
	tole(0x3740), tole(0xF501), tole(0x35C0), tole(0x3480), tole(0xF441),
	tole(0x3C00), tole(0xFCC1), tole(0xFD81), tole(0x3D40), tole(0xFF01),
	tole(0x3FC0), tole(0x3E80), tole(0xFE41), tole(0xFA01), tole(0x3AC0),
	tole(0x3B80), tole(0xFB41), tole(0x3900), tole(0xF9C1), tole(0xF881),
	tole(0x3840), tole(0x2800), tole(0xE8C1), tole(0xE981), tole(0x2940),
	tole(0xEB01), tole(0x2BC0), tole(0x2A80), tole(0xEA41),	tole(0xEE01),
	tole(0x2EC0), tole(0x2F80), tole(0xEF41), tole(0x2D00), tole(0xEDC1),
	tole(0xEC81), tole(0x2C40), tole(0xE401), tole(0x24C0), tole(0x2580),
	tole(0xE541), tole(0x2700), tole(0xE7C1), tole(0xE681), tole(0x2640),
	tole(0x2200), tole(0xE2C1), tole(0xE381), tole(0x2340), tole(0xE101),
	tole(0x21C0), tole(0x2080), tole(0xE041), tole(0xA001), tole(0x60C0),
	tole(0x6180), tole(0xA141), tole(0x6300), tole(0xA3C1), tole(0xA281),
	tole(0x6240), tole(0x6600), tole(0xA6C1), tole(0xA781), tole(0x6740),
	tole(0xA501), tole(0x65C0), tole(0x6480), tole(0xA441),	tole(0x6C00),
	tole(0xACC1), tole(0xAD81), tole(0x6D40), tole(0xAF01), tole(0x6FC0),
	tole(0x6E80), tole(0xAE41), tole(0xAA01), tole(0x6AC0), tole(0x6B80),
	tole(0xAB41), tole(0x6900), tole(0xA9C1), tole(0xA881), tole(0x6840),
	tole(0x7800), tole(0xB8C1), tole(0xB981), tole(0x7940), tole(0xBB01),
	tole(0x7BC0), tole(0x7A80), tole(0xBA41), tole(0xBE01), tole(0x7EC0),
	tole(0x7F80), tole(0xBF41), tole(0x7D00), tole(0xBDC1), tole(0xBC81),
	tole(0x7C40), tole(0xB401), tole(0x74C0), tole(0x7580), tole(0xB541),
	tole(0x7700), tole(0xB7C1), tole(0xB681), tole(0x7640),	tole(0x7200),
	tole(0xB2C1), tole(0xB381), tole(0x7340), tole(0xB101), tole(0x71C0),
	tole(0x7080), tole(0xB041), tole(0x5000), tole(0x90C1), tole(0x9181),
	tole(0x5140), tole(0x9301), tole(0x53C0), tole(0x5280), tole(0x9241),
	tole(0x9601), tole(0x56C0), tole(0x5780), tole(0x9741), tole(0x5500),
	tole(0x95C1), tole(0x9481), tole(0x5440), tole(0x9C01), tole(0x5CC0),
	tole(0x5D80), tole(0x9D41), tole(0x5F00), tole(0x9FC1), tole(0x9E81),
	tole(0x5E40), tole(0x5A00), tole(0x9AC1), tole(0x9B81), tole(0x5B40),
	tole(0x9901), tole(0x59C0), tole(0x5880), tole(0x9841),	tole(0x8801),
	tole(0x48C0), tole(0x4980), tole(0x8941), tole(0x4B00), tole(0x8BC1),
	tole(0x8A81), tole(0x4A40), tole(0x4E00), tole(0x8EC1), tole(0x8F81),
	tole(0x4F40), tole(0x8D01), tole(0x4DC0), tole(0x4C80), tole(0x8C41),
	tole(0x4400), tole(0x84C1), tole(0x8581), tole(0x4540), tole(0x8701),
	tole(0x47C0), tole(0x4680), tole(0x8641), tole(0x8201), tole(0x42C0),
	tole(0x4380), tole(0x8341), tole(0x4100), tole(0x81C1), tole(0x8081),
	tole(0x4040)
};

extern u16 const crc16_table[256];

extern u16 crc16(u16 crc, const u8 *buffer, u32 len);

static inline u16 crc16_byte(u16 crc, const u8 data)
{
	return (crc >> 8) ^ crc16_table[(crc ^ data) & 0xff];
}

/**
 * crc16 - compute the CRC-16 for the data buffer
 * @crc:	previous CRC value
 * @buffer:	data pointer
 * @len:	number of bytes in the buffer
 *
 * Returns the updated CRC value.
 */
u16 crc16(u16 crc, u8 const *buffer, u32 len)
{
	while (len--)
		crc = crc16_byte(crc, *buffer++);
	return crc;
}
#include <endian.h>
# if __BYTE_ORDER == __LITTLE_ENDIAN
#  define DO_CRC16(x) crc = tab[(crc ^ (x)) & 255] ^ (crc >> 8)
# else
#  define DO_CRC16(x) crc = tab[((crc >> 8) ^ (x))] ^ (crc << 8)
# endif

static inline u16 CRC16_byte(u16 crc, const u8 data)
{
	const u16 *tab = crc16_table_le;

	crc = __cpu_to_le16(crc);
	DO_CRC16(data);
	return __le16_to_cpu(crc);
}

u16 CRC16(u16 crc, u8 const *buffer, u32 len)
{
	u32 loops;
	u16 *b;
	const u16 *tab = crc16_table_le;;

	crc = __cpu_to_le16(crc);
	/* Align */
	if ((long)buffer & 1 && len) {
		DO_CRC16(*buffer++);
		--len;
	}
	loops = len >> 1;
	b = (u16 *)buffer;
	for(--b; loops; --loops) {
		crc ^= *++b; /* use pre increment for speed */
		DO_CRC16(0);
		DO_CRC16(0);
	}
	if (len & 1) {
		u8 *p = (u8 *)(b + 1);
		DO_CRC16(*p);
	}
	return __le16_to_cpu(crc);
}
#define TST "1234567890"
#define BUF_SIZ 5*1024*1024
main()
{
	u16 crc1, crc2;
	char *buffer =  malloc(BUF_SIZ);
	struct timeval tv, tv2, tv3, tv_res, tv2_res;

	memset(buffer, -1, BUF_SIZ);
	memcpy (buffer, TST, sizeof(TST));
	/*warm up */
	crc1 = crc16(~0, buffer, BUF_SIZ);
	crc2 = CRC16(~0, buffer, BUF_SIZ);

	gettimeofday(&tv, NULL);
	crc1 = crc16(~0, buffer, BUF_SIZ);
	gettimeofday(&tv2, NULL);
	crc2 = CRC16(~0, buffer, BUF_SIZ);
	gettimeofday(&tv3, NULL);
	timersub(&tv2, &tv, &tv_res);
	printf("crc1:%x crc2:%x\n", crc1, crc2);
	printf("crc16 tv_res:%d :%d\n", (int)tv_res.tv_sec, (int)tv_res.tv_usec);
	timersub(&tv3, &tv2, &tv2_res);
	printf("CRC16 tv_res:%d :%d\n", (int)tv2_res.tv_sec, (int)tv2_res.tv_usec);
}


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]