Tutorial part 5: Implementing an Ahead-of-Time compiler

If you have a pre-existing language frontend that’s compatible with libgccjit’s license, it’s possible to hook it up to libgccjit as a backend. In the previous example we showed how to do that for in-memory JIT-compilation, but libgccjit can also compile code directly to a file, allowing you to implement a more traditional ahead-of-time compiler (“JIT” is something of a misnomer for this use-case).

The essential difference is to compile the context using gcc_jit_context_compile_to_file() rather than gcc_jit_context_compile().

The “brainf” language

In this example we use libgccjit to construct an ahead-of-time compiler for an esoteric programming language that we shall refer to as “brainf”.

brainf scripts operate on an array of bytes, with a notional data pointer within the array.

brainf is hard for humans to read, but it’s trivial to write a parser for it, as there is no lexing; just a stream of bytes. The operations are:

Character

Meaning

>

idx += 1

<

idx -= 1

+

data[idx] += 1

-

data[idx] -= 1

.

output (data[idx])

,

data[idx] = input ()

[

loop until data[idx] == 0

]

end of loop

Anything else

ignored

Unlike the previous example, we’ll implement an ahead-of-time compiler, which reads .bf scripts and outputs executables (though it would be trivial to have it run them JIT-compiled in-process).

Here’s what a simple .bf script looks like:

[
  Emit the uppercase alphabet
]

cell 0 = 26
++++++++++++++++++++++++++

cell 1 = 65
>+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++<

while cell#0 != 0
[
 >
 .      emit cell#1
 +      increment cell@1
 <-     decrement cell@0
]

Note

This example makes use of whitespace and comments for legibility, but could have been written as:

++++++++++++++++++++++++++
>+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++<
[>.+<-]

It’s not a particularly useful language, except for providing compiler-writers with a test case that’s easy to parse. The point is that you can use gcc_jit_context_compile_to_file() to use libgccjit as a backend for a pre-existing language frontend (provided that the pre-existing frontend is compatible with libgccjit’s license).

Converting a brainf script to libgccjit IR

As before we write simple code to populate a gcc_jit_context*.


typedef struct bf_compiler
{
  const char *filename;
  int line;
  int column;

  gcc_jit_context *ctxt;

  gcc_jit_type *void_type;
  gcc_jit_type *int_type;
  gcc_jit_type *byte_type;
  gcc_jit_type *array_type;

  gcc_jit_function *func_getchar;
  gcc_jit_function *func_putchar;

  gcc_jit_function *func;
  gcc_jit_block *curblock;

  gcc_jit_rvalue *int_zero;
  gcc_jit_rvalue *int_one;
  gcc_jit_rvalue *byte_zero;
  gcc_jit_rvalue *byte_one;
  gcc_jit_lvalue *data_cells;
  gcc_jit_lvalue *idx;

  int num_open_parens;
  gcc_jit_block *paren_test[MAX_OPEN_PARENS];
  gcc_jit_block *paren_body[MAX_OPEN_PARENS];
  gcc_jit_block *paren_after[MAX_OPEN_PARENS];

} bf_compiler;

/* Bail out, with a message on stderr.  */

static void
fatal_error (bf_compiler *bfc, const char *msg)
{
  fprintf (stderr,
	   "%s:%i:%i: %s",
	   bfc->filename, bfc->line, bfc->column, msg);
  abort ();
}

/* Get "data_cells[idx]" as an lvalue.  */

static gcc_jit_lvalue *
bf_get_current_data (bf_compiler *bfc, gcc_jit_location *loc)
{
  return gcc_jit_context_new_array_access (
    bfc->ctxt,
    loc,
    gcc_jit_lvalue_as_rvalue (bfc->data_cells),
    gcc_jit_lvalue_as_rvalue (bfc->idx));
}

/* Get "data_cells[idx] == 0" as a boolean rvalue.  */

static gcc_jit_rvalue *
bf_current_data_is_zero (bf_compiler *bfc, gcc_jit_location *loc)
{
  return gcc_jit_context_new_comparison (
    bfc->ctxt,
    loc,
    GCC_JIT_COMPARISON_EQ,
    gcc_jit_lvalue_as_rvalue (bf_get_current_data (bfc, loc)),
    bfc->byte_zero);
}

/* Compile one bf character.  */

static void
bf_compile_char (bf_compiler *bfc,
		 unsigned char ch)
{
  gcc_jit_location *loc =
    gcc_jit_context_new_location (bfc->ctxt,
				  bfc->filename,
				  bfc->line,
				  bfc->column);

  /* Turn this on to trace execution, by injecting putchar ()
     of each source char. */
  if (0)
    {
      gcc_jit_rvalue *arg =
	gcc_jit_context_new_rvalue_from_int (
					     bfc->ctxt,
					     bfc->int_type,
					     ch);
      gcc_jit_rvalue *call =
	gcc_jit_context_new_call (bfc->ctxt,
				  loc,
				  bfc->func_putchar,
				  1, &arg);
      gcc_jit_block_add_eval (bfc->curblock,
			      loc,
			      call);
    }

  switch (ch)
    {
      case '>':
	gcc_jit_block_add_comment (bfc->curblock,
				   loc,
				   "'>': idx += 1;");
	gcc_jit_block_add_assignment_op (bfc->curblock,
					 loc,
					 bfc->idx,
					 GCC_JIT_BINARY_OP_PLUS,
					 bfc->int_one);
	break;

      case '<':
	gcc_jit_block_add_comment (bfc->curblock,
				   loc,
				   "'<': idx -= 1;");
	gcc_jit_block_add_assignment_op (bfc->curblock,
					 loc,
					 bfc->idx,
					 GCC_JIT_BINARY_OP_MINUS,
					 bfc->int_one);
	break;

      case '+':
	gcc_jit_block_add_comment (bfc->curblock,
				   loc,
				   "'+': data[idx] += 1;");
	gcc_jit_block_add_assignment_op (bfc->curblock,
					 loc,
					 bf_get_current_data (bfc, loc),
					 GCC_JIT_BINARY_OP_PLUS,
					 bfc->byte_one);
	break;

      case '-':
	gcc_jit_block_add_comment (bfc->curblock,
				   loc,
				   "'-': data[idx] -= 1;");
	gcc_jit_block_add_assignment_op (bfc->curblock,
					 loc,
					 bf_get_current_data (bfc, loc),
					 GCC_JIT_BINARY_OP_MINUS,
					 bfc->byte_one);
	break;

      case '.':
	{
	  gcc_jit_rvalue *arg =
	    gcc_jit_context_new_cast (
	      bfc->ctxt,
	      loc,
	      gcc_jit_lvalue_as_rvalue (bf_get_current_data (bfc, loc)),
	      bfc->int_type);
	  gcc_jit_rvalue *call =
	    gcc_jit_context_new_call (bfc->ctxt,
				      loc,
				      bfc->func_putchar,
				      1, &arg);
	  gcc_jit_block_add_comment (bfc->curblock,
				     loc,
				     "'.': putchar ((int)data[idx]);");
	  gcc_jit_block_add_eval (bfc->curblock,
				  loc,
				  call);
	}
	break;

      case ',':
	{
	  gcc_jit_rvalue *call =
	    gcc_jit_context_new_call (bfc->ctxt,
				      loc,
				      bfc->func_getchar,
				      0, NULL);
	  gcc_jit_block_add_comment (
	    bfc->curblock,
	    loc,
	    "',': data[idx] = (unsigned char)getchar ();");
	  gcc_jit_block_add_assignment (bfc->curblock,
					loc,
					bf_get_current_data (bfc, loc),
					gcc_jit_context_new_cast (
					  bfc->ctxt,
					  loc,
					  call,
					  bfc->byte_type));
	}
	break;

      case '[':
	{
	  gcc_jit_block *loop_test =
	    gcc_jit_function_new_block (bfc->func, NULL);
	  gcc_jit_block *on_zero =
	    gcc_jit_function_new_block (bfc->func, NULL);
	  gcc_jit_block *on_non_zero =
	    gcc_jit_function_new_block (bfc->func, NULL);

	  if (bfc->num_open_parens == MAX_OPEN_PARENS)
	    fatal_error (bfc, "too many open parens");

	  gcc_jit_block_end_with_jump (
	    bfc->curblock,
	    loc,
	    loop_test);

	  gcc_jit_block_add_comment (
	    loop_test,
	    loc,
	    "'['");
	  gcc_jit_block_end_with_conditional (
	    loop_test,
	    loc,
	    bf_current_data_is_zero (bfc, loc),
	    on_zero,
	    on_non_zero);
	  bfc->paren_test[bfc->num_open_parens] = loop_test;
	  bfc->paren_body[bfc->num_open_parens] = on_non_zero;
	  bfc->paren_after[bfc->num_open_parens] = on_zero;
	  bfc->num_open_parens += 1;
	  bfc->curblock = on_non_zero;
	}
	break;

      case ']':
	{
	  gcc_jit_block_add_comment (
	    bfc->curblock,
	    loc,
	    "']'");

	  if (bfc->num_open_parens == 0)
	    fatal_error (bfc, "mismatching parens");
	  bfc->num_open_parens -= 1;
	  gcc_jit_block_end_with_jump (
	    bfc->curblock,
	    loc,
	    bfc->paren_test[bfc->num_open_parens]);
	  bfc->curblock = bfc->paren_after[bfc->num_open_parens];
	}
	break;

    case '\n':
      bfc->line +=1;
      bfc->column = 0;
      break;
    }

  if (ch != '\n')
    bfc->column += 1;
}

/* Compile the given .bf file into a gcc_jit_context, containing a
   single "main" function suitable for compiling into an executable.  */

gcc_jit_context *
bf_compile (const char *filename)
{
  bf_compiler bfc;
  FILE *f_in;
  int ch;

  memset (&bfc, 0, sizeof (bfc));

  bfc.filename = filename;
  f_in = fopen (filename, "r");
  if (!f_in)
    fatal_error (&bfc, "unable to open file");
  bfc.line = 1;

  bfc.ctxt = gcc_jit_context_acquire ();

  gcc_jit_context_set_int_option (
    bfc.ctxt,
    GCC_JIT_INT_OPTION_OPTIMIZATION_LEVEL,
    3);
  gcc_jit_context_set_bool_option (
    bfc.ctxt,
    GCC_JIT_BOOL_OPTION_DUMP_INITIAL_GIMPLE,
    0);
  gcc_jit_context_set_bool_option (
    bfc.ctxt,
    GCC_JIT_BOOL_OPTION_DEBUGINFO,
    1);
  gcc_jit_context_set_bool_option (
    bfc.ctxt,
    GCC_JIT_BOOL_OPTION_DUMP_EVERYTHING,
    0);
  gcc_jit_context_set_bool_option (
    bfc.ctxt,
    GCC_JIT_BOOL_OPTION_KEEP_INTERMEDIATES,
    0);

  bfc.void_type =
    gcc_jit_context_get_type (bfc.ctxt, GCC_JIT_TYPE_VOID);
  bfc.int_type =
    gcc_jit_context_get_type (bfc.ctxt, GCC_JIT_TYPE_INT);
  bfc.byte_type =
    gcc_jit_context_get_type (bfc.ctxt, GCC_JIT_TYPE_UNSIGNED_CHAR);
  bfc.array_type =
    gcc_jit_context_new_array_type (bfc.ctxt,
				    NULL,
				    bfc.byte_type,
				    30000);

  bfc.func_getchar =
    gcc_jit_context_new_function (bfc.ctxt, NULL,
				  GCC_JIT_FUNCTION_IMPORTED,
				  bfc.int_type,
				  "getchar",
				  0, NULL,
				  0);

  gcc_jit_param *param_c =
    gcc_jit_context_new_param (bfc.ctxt, NULL, bfc.int_type, "c");
  bfc.func_putchar =
    gcc_jit_context_new_function (bfc.ctxt, NULL,
				  GCC_JIT_FUNCTION_IMPORTED,
				  bfc.void_type,
				  "putchar",
				  1, &param_c,
				  0);

  bfc.func = make_main (bfc.ctxt);
   bfc.curblock =
    gcc_jit_function_new_block (bfc.func, "initial");
  bfc.int_zero = gcc_jit_context_zero (bfc.ctxt, bfc.int_type);
  bfc.int_one = gcc_jit_context_one (bfc.ctxt, bfc.int_type);
  bfc.byte_zero = gcc_jit_context_zero (bfc.ctxt, bfc.byte_type);
  bfc.byte_one = gcc_jit_context_one (bfc.ctxt, bfc.byte_type);

  bfc.data_cells =
    gcc_jit_context_new_global (bfc.ctxt, NULL,
				 GCC_JIT_GLOBAL_INTERNAL,
				 bfc.array_type,
				 "data_cells");
  bfc.idx =
    gcc_jit_function_new_local (bfc.func, NULL,
				bfc.int_type,
				"idx");

  gcc_jit_block_add_comment (bfc.curblock,
			     NULL,
			     "idx = 0;");
  gcc_jit_block_add_assignment (bfc.curblock,
				NULL,
				bfc.idx,
				bfc.int_zero);

  bfc.num_open_parens = 0;

  while ( EOF != (ch = fgetc (f_in)))
    bf_compile_char (&bfc, (unsigned char)ch);

  gcc_jit_block_end_with_return (bfc.curblock, NULL, bfc.int_zero);

  fclose (f_in);

  return bfc.ctxt;
}

Compiling a context to a file

Unlike the previous tutorial, this time we’ll compile the context directly to an executable, using gcc_jit_context_compile_to_file():

gcc_jit_context_compile_to_file (ctxt,
                                 GCC_JIT_OUTPUT_KIND_EXECUTABLE,
                                 output_file);

Here’s the top-level of the compiler, which is what actually calls into gcc_jit_context_compile_to_file():


int
main (int argc, char **argv)
{
  const char *input_file;
  const char *output_file;
  gcc_jit_context *ctxt;
  const char *err;

  if (argc != 3)
    {
      fprintf (stderr, "%s: INPUT_FILE OUTPUT_FILE\n", argv[0]);
      return 1;
    }

  input_file = argv[1];
  output_file = argv[2];
  ctxt = bf_compile (input_file);

  gcc_jit_context_compile_to_file (ctxt,
				   GCC_JIT_OUTPUT_KIND_EXECUTABLE,
				   output_file);

  err = gcc_jit_context_get_first_error (ctxt);

  if (err)
    {
      gcc_jit_context_release (ctxt);
      return 1;
    }

  gcc_jit_context_release (ctxt);
  return 0;
}

Note how once the context is populated you could trivially instead compile it to memory using gcc_jit_context_compile() and run it in-process as in the previous tutorial.

To create an executable, we need to export a main function. Here’s how to create one from the JIT API:


/* Make "main" function:
     int
     main (int argc, char **argv)
     {
       ...
     }
*/
static gcc_jit_function *
make_main (gcc_jit_context *ctxt)
{
  gcc_jit_type *int_type =
    gcc_jit_context_get_type (ctxt, GCC_JIT_TYPE_INT);
  gcc_jit_param *param_argc =
    gcc_jit_context_new_param (ctxt, NULL, int_type, "argc");
  gcc_jit_type *char_ptr_ptr_type =
    gcc_jit_type_get_pointer (
      gcc_jit_type_get_pointer (
	gcc_jit_context_get_type (ctxt, GCC_JIT_TYPE_CHAR)));
  gcc_jit_param *param_argv =
    gcc_jit_context_new_param (ctxt, NULL, char_ptr_ptr_type, "argv");
  gcc_jit_param *params[2] = {param_argc, param_argv};
  gcc_jit_function *func_main =
    gcc_jit_context_new_function (ctxt, NULL,
				  GCC_JIT_FUNCTION_EXPORTED,
				  int_type,
				  "main",
				  2, params,
				  0);
  return func_main;
}

Note

The above implementation ignores argc and argv, but you could make use of them by exposing param_argc and param_argv to the caller.

Upon compiling this C code, we obtain a bf-to-machine-code compiler; let’s call it bfc:

$ gcc \
    tut05-bf.c \
    -o bfc \
    -lgccjit

We can now use bfc to compile .bf files into machine code executables:

$ ./bfc \
     emit-alphabet.bf \
     a.out

which we can run directly:

$ ./a.out
ABCDEFGHIJKLMNOPQRSTUVWXYZ

Success!

We can also inspect the generated executable using standard tools:

$ objdump -d a.out |less

which shows that libgccjit has managed to optimize the function somewhat (for example, the runs of 26 and 65 increment operations have become integer constants 0x1a and 0x41):

0000000000400620 <main>:
  400620:     80 3d 39 0a 20 00 00    cmpb   $0x0,0x200a39(%rip)        # 601060 <data
  400627:     74 07                   je     400630 <main
  400629:     eb fe                   jmp    400629 <main+0x9>
  40062b:     0f 1f 44 00 00          nopl   0x0(%rax,%rax,1)
  400630:     48 83 ec 08             sub    $0x8,%rsp
  400634:     0f b6 05 26 0a 20 00    movzbl 0x200a26(%rip),%eax        # 601061 <data_cells+0x1>
  40063b:     c6 05 1e 0a 20 00 1a    movb   $0x1a,0x200a1e(%rip)       # 601060 <data_cells>
  400642:     8d 78 41                lea    0x41(%rax),%edi
  400645:     40 88 3d 15 0a 20 00    mov    %dil,0x200a15(%rip)        # 601061 <data_cells+0x1>
  40064c:     0f 1f 40 00             nopl   0x0(%rax)
  400650:     40 0f b6 ff             movzbl %dil,%edi
  400654:     e8 87 fe ff ff          callq  4004e0 <putchar@plt>
  400659:     0f b6 05 01 0a 20 00    movzbl 0x200a01(%rip),%eax        # 601061 <data_cells+0x1>
  400660:     80 2d f9 09 20 00 01    subb   $0x1,0x2009f9(%rip)        # 601060 <data_cells>
  400667:     8d 78 01                lea    0x1(%rax),%edi
  40066a:     40 88 3d f0 09 20 00    mov    %dil,0x2009f0(%rip)        # 601061 <data_cells+0x1>
  400671:     75 dd                   jne    400650 <main+0x30>
  400673:     31 c0                   xor    %eax,%eax
  400675:     48 83 c4 08             add    $0x8,%rsp
  400679:     c3                      retq
  40067a:     66 0f 1f 44 00 00       nopw   0x0(%rax,%rax,1)

We also set up debugging information (via gcc_jit_context_new_location() and GCC_JIT_BOOL_OPTION_DEBUGINFO), so it’s possible to use gdb to singlestep through the generated binary and inspect the internal state idx and data_cells:

(gdb) break main
Breakpoint 1 at 0x400790
(gdb) run
Starting program: a.out

Breakpoint 1, 0x0000000000400790 in main (argc=1, argv=0x7fffffffe448)
(gdb) stepi
0x0000000000400797 in main (argc=1, argv=0x7fffffffe448)
(gdb) stepi
0x00000000004007a0 in main (argc=1, argv=0x7fffffffe448)
(gdb) stepi
9     >+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++<
(gdb) list
4
5     cell 0 = 26
6     ++++++++++++++++++++++++++
7
8     cell 1 = 65
9     >+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++<
10
11    while cell#0 != 0
12    [
13     >
(gdb) n
6     ++++++++++++++++++++++++++
(gdb) n
9     >+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++<
(gdb) p idx
$1 = 1
(gdb) p data_cells
$2 = "\032", '\000' <repeats 29998 times>
(gdb) p data_cells[0]
$3 = 26 '\032'
(gdb) p data_cells[1]
$4 = 0 '\000'
(gdb) list
4
5     cell 0 = 26
6     ++++++++++++++++++++++++++
7
8     cell 1 = 65
9     >+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++<
10
11    while cell#0 != 0
12    [
13     >

Other forms of ahead-of-time-compilation

The above demonstrates compiling a gcc_jit_context* directly to an executable. It’s also possible to compile it to an object file, and to a dynamic library. See the documentation of gcc_jit_context_compile_to_file() for more information.