For long test(long a, long b) { if (a > 65535 || a < 0) __builtin_unreachable (); if (b > 65535 || b < 0) __builtin_unreachable (); return a/b; } we produce test: .LFB0: .cfi_startproc movq %rdi, %rax cqto idivq %rsi ret while clang does: test: # @test .cfi_startproc # %bb.0: movq %rdi, %rax # kill: def $ax killed $ax killed $rax xorl %edx, %edx divw %si movzwl %ax, %eax retq clang also by default adds 32bit divide path even when value range is not known long test(long a, long b) { return a/b; } compiles as test: # @test .cfi_startproc # %bb.0: movq %rdi, %rax movq %rdi, %rcx orq %rsi, %rcx shrq $32, %rcx je .LBB0_1 # %bb.2: cqto idivq %rsi retq
former fold_stmt_using_ranges has code to narrow ops.