This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: PATCH: named address space support (2/2: SPU backend)
- From: Ben Elliston <bje at au1 dot ibm dot com>
- To: Trevor_Smigiel at playstation dot sony dot com
- Cc: gcc-patches <gcc-patches at gcc dot gnu dot org>, andrew_pinski at playstation dot sony dot com, David Edelsohn <dje at watson dot ibm dot com>, "Joseph S. Myers" <joseph at codesourcery dot com>
- Date: Thu, 28 Aug 2008 16:10:37 +1000
- Subject: Re: PATCH: named address space support (2/2: SPU backend)
- References: <1219295660.17217.9.camel@helios> <20080828032054.GP27746@playstation.sony.com>
> Otherwise it looks ok, assuming there are no major changes after the
> first part of the patch is accepted.
There are no major changes, but here is a revised patch for reference.
Cheers, Ben
2008-08-28 Ben Elliston <bje@au.ibm.com>
* config.gcc (spu-*-elf*): Add spu_cache.h to extra_headers.
* config/spu/spu-c.c (spu_cpu_cpp_builtins): Define __EA32__ or
__EA64__, depending on the ea pointer size. *
* config/spu/spu-elf.h (DRIVER_SELF_SPECS): Link the right
gcc_cache library depending on the -mcache-size and
-matomic-updates option given.
(LIB_SPEC): Link gcc_cachemgr library.
* config/spu/spu.c (struct spu_address_space): New.
(spu_address_spaces): New table.
(TARGET_ADDR_SPACE_POINTER_MODE): Define.
(TARGET_ADDR_SPACE_NAME): Likewise.
(TARGET_ADDR_SPACE_NUMBER): Likewise.
(TARGET_ADDR_SPACE_CONVERSION_RTL): Likewise.
(TARGET_SECTION_TYPE_FLAGS): Likewise.
(TARGET_VALID_POINTER_MODE): Likewise.
(TARGET_VALID_ADDR_SPACE): Likewise.
(TARGET_ASM_UNALIGNED_DI_OP): Remove.
(TARGET_ASM_ALIGNED_DI_OP): Define instead.
(ea_symbol_ref): New.
(spu_legitimate_constant_p): Reject __ea qualified references.
(spu_legitimate_address): Keep __ea references until reload.
(EAmode): Define.
(cache_fetch, cache_fetch_dirty, ea_alias_set): New variables.
(ea_load_store): New function.
(ea_load_store_inline): Likewise.
(expand_ea_mem): Likewise.
(spu_expand_mov): Handle __ea memory operands.
(spu_ea_pointer_mode): New function.
(spu_valid_pointer_mode): Likewise.
(spu_section_type_flags): Likewise.
(spu_addr_space_name): Likewise.
(spu_addr_space_conversion_rtl): Likewise.
(spu_valid_addr_space): Likewise.
(spu_addr_space_number): Likewise.
* config/spu/spu.h (ASM_OUTPUT_SYMBOL_REF): New macro.
(TEXT_SECTION_ASM_OP): Prepend a tab.
(DATA_SECTION_ASM_OP): Likewise.
* config/spu/spu.md (to_ea): New expander.
(from_ea): Likewise.
* config/spu/spu.opt (mea32, mea64): New options.
* config/spu/t-spu-elf (MULTILIB_OPTIONS): Add mea64.
(EXTRA_MULTILIB_PARTS): Add cache libraries.
(cachemgr.o, cachemgr_nonatomic.o): New targets.
(libgcc_%.a): Likewise.
(cache8k.o, cache16k.o, cache32k.o, etc): Likewise.
* config/spu/cache.S: New file.
* config/spu/cachemgr.c: Likewise.
* config/spu/spu_cache.h: Likewise.
* doc/invoke.texi (SPU Options): Document -mea32, -mea64,
-mcache-size and -matomic-updates options.
--- gcc-clean/gcc/config.gcc 2008-08-28 13:18:30.000000000 +1000
+++ gcc-nas/gcc/config.gcc 2008-08-27 12:04:25.000000000 +1000
@@ -2281,7 +2281,7 @@
spu-*-elf*)
tm_file="dbxelf.h elfos.h spu/spu-elf.h spu/spu.h"
tmake_file="spu/t-spu-elf"
- extra_headers="spu_intrinsics.h spu_internals.h vmx2spu.h spu_mfcio.h vec_types.h"
+ extra_headers="spu_intrinsics.h spu_internals.h vmx2spu.h spu_mfcio.h vec_types.h spu_cache.h"
extra_modes=spu/spu-modes.def
c_target_objs="${c_target_objs} spu-c.o"
cxx_target_objs="${cxx_target_objs} spu-c.o"
--- gcc-clean/gcc/config/spu/spu.c 2008-08-13 10:58:01.000000000 +1000
+++ gcc-nas/gcc/config/spu/spu.c 2008-08-27 11:55:29.000000000 +1000
@@ -61,6 +61,19 @@ struct spu_builtin_range
int low, high;
};
+struct spu_address_space
+{
+ const char *name;
+ rtx (*to_generic_insn) (rtx, rtx);
+ rtx (*from_generic_insn) (rtx, rtx);
+};
+
+static struct spu_address_space spu_address_spaces[] = {
+ {"generic", NULL, NULL },
+ {"__ea", gen_from_ea, gen_to_ea },
+ {NULL, NULL, NULL},
+};
+
static struct spu_builtin_range spu_builtin_range[] = {
{-0x40ll, 0x7fll}, /* SPU_BTI_7 */
{-0x40ll, 0x3fll}, /* SPU_BTI_S7 */
@@ -189,6 +202,34 @@ tree spu_builtin_types[SPU_BTI_MAX];
/* TARGET overrides. */
+static enum machine_mode spu_ea_pointer_mode (int);
+#undef TARGET_ADDR_SPACE_POINTER_MODE
+#define TARGET_ADDR_SPACE_POINTER_MODE spu_ea_pointer_mode
+
+static const char *spu_addr_space_name (int);
+#undef TARGET_ADDR_SPACE_NAME
+#define TARGET_ADDR_SPACE_NAME spu_addr_space_name
+
+static unsigned char spu_addr_space_number (const_tree);
+#undef TARGET_ADDR_SPACE_NUMBER
+#define TARGET_ADDR_SPACE_NUMBER spu_addr_space_number
+
+static rtx (* spu_addr_space_conversion_rtl (int, int)) (rtx, rtx);
+#undef TARGET_ADDR_SPACE_CONVERSION_RTL
+#define TARGET_ADDR_SPACE_CONVERSION_RTL spu_addr_space_conversion_rtl
+
+static unsigned int spu_section_type_flags (tree, const char *, int);
+#undef TARGET_SECTION_TYPE_FLAGS
+#define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags
+
+static bool spu_valid_pointer_mode (enum machine_mode mode);
+#undef TARGET_VALID_POINTER_MODE
+#define TARGET_VALID_POINTER_MODE spu_valid_pointer_mode
+
+static bool spu_valid_addr_space (const_tree);
+#undef TARGET_VALID_ADDR_SPACE
+#define TARGET_VALID_ADDR_SPACE spu_valid_addr_space
+
#undef TARGET_INIT_BUILTINS
#define TARGET_INIT_BUILTINS spu_init_builtins
@@ -198,10 +239,8 @@ tree spu_builtin_types[SPU_BTI_MAX];
#undef TARGET_UNWIND_WORD_MODE
#define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
-/* The .8byte directive doesn't seem to work well for a 32 bit
- architecture. */
-#undef TARGET_ASM_UNALIGNED_DI_OP
-#define TARGET_ASM_UNALIGNED_DI_OP NULL
+#undef TARGET_ASM_ALIGNED_DI_OP
+#define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
#undef TARGET_RTX_COSTS
#define TARGET_RTX_COSTS spu_rtx_costs
@@ -2826,6 +2865,20 @@ arith_immediate_p (rtx op, enum machine_
return val >= low && val <= high;
}
+/* Return true if X is a SYMBOL_REF to an __ea qualified variable. */
+
+static int
+ea_symbol_ref (rtx *px, void *data ATTRIBUTE_UNUSED)
+{
+ rtx x = *px;
+ tree decl;
+
+ return (GET_CODE (x) == SYMBOL_REF
+ && (decl = SYMBOL_REF_DECL (x)) != 0
+ && TREE_CODE (decl) == VAR_DECL
+ && TYPE_ADDR_SPACE (strip_array_types (TREE_TYPE (decl))));
+}
+
/* We accept:
- any 32-bit constant (SImode, SFmode)
- any constant that can be generated with fsmbi (any mode)
@@ -2837,19 +2890,27 @@ spu_legitimate_constant_p (rtx x)
{
if (GET_CODE (x) == HIGH)
x = XEXP (x, 0);
- /* V4SI with all identical symbols is valid. */
- if (!flag_pic
- && GET_MODE (x) == V4SImode
- && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
- || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
- || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST))
- return CONST_VECTOR_ELT (x, 0) == CONST_VECTOR_ELT (x, 1)
- && CONST_VECTOR_ELT (x, 1) == CONST_VECTOR_ELT (x, 2)
- && CONST_VECTOR_ELT (x, 2) == CONST_VECTOR_ELT (x, 3);
- if (GET_CODE (x) == CONST_VECTOR
- && !const_vector_immediate_p (x))
+ /* Reject any __ea qualified reference. These can't appear in
+ instructions but must be forced to the constant pool. */
+ if (for_each_rtx (&x, ea_symbol_ref, 0))
return 0;
+
+ if (GET_CODE (x) == CONST_VECTOR)
+ {
+ /* V4SI with all identical symbols is valid. */
+ if (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
+ || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
+ || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST)
+ return (!flag_pic
+ && GET_MODE (x) == V4SImode
+ && CONST_VECTOR_ELT (x, 0) == CONST_VECTOR_ELT (x, 1)
+ && CONST_VECTOR_ELT (x, 1) == CONST_VECTOR_ELT (x, 2)
+ && CONST_VECTOR_ELT (x, 2) == CONST_VECTOR_ELT (x, 3));
+
+ if (!const_vector_immediate_p (x))
+ return 0;
+ }
return 1;
}
@@ -2871,10 +2932,16 @@ spu_legitimate_address (enum machine_mod
x = XEXP (x, 0);
switch (GET_CODE (x))
{
- case SYMBOL_REF:
case LABEL_REF:
return !TARGET_LARGE_MEM;
+ case SYMBOL_REF:
+ /* Keep __ea references until reload so that spu_expand_mov
+ can see them in MEMs. */
+ if (ea_symbol_ref (&x, 0))
+ return !reload_in_progress && !reload_completed;
+ return !TARGET_LARGE_MEM;
+
case CONST:
if (!TARGET_LARGE_MEM && GET_CODE (XEXP (x, 0)) == PLUS)
{
@@ -2884,6 +2951,10 @@ spu_legitimate_address (enum machine_mod
/* Accept any symbol_ref + constant, assuming it does not
wrap around the local store addressability limit. */
if (GET_CODE (sym) == SYMBOL_REF && GET_CODE (cst) == CONST_INT)
+ {
+ if (ea_symbol_ref (&sym, 0))
+ return 0;
+ }
return 1;
}
return 0;
@@ -3491,6 +3562,229 @@ store_with_one_insn_p (rtx mem)
return 0;
}
+#define EAmode (spu_ea_model != 32 ? DImode : SImode)
+
+rtx cache_fetch;
+rtx cache_fetch_dirty;
+int ea_alias_set = -1;
+
+/* MEM is known to be an __ea qualified memory access. Emit a call to
+ fetch the ppu memory to local store, and return its address in local
+ store. */
+
+static void
+ea_load_store (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
+{
+ if (is_store)
+ {
+ rtx ndirty = GEN_INT (GET_MODE_SIZE (GET_MODE (mem)));
+ if (!cache_fetch_dirty)
+ cache_fetch_dirty = init_one_libfunc ("__cache_fetch_dirty");
+ emit_library_call_value (cache_fetch_dirty, data_addr, LCT_NORMAL, Pmode,
+ 2, ea_addr, EAmode, ndirty, SImode);
+ }
+ else
+ {
+ if (!cache_fetch)
+ cache_fetch = init_one_libfunc ("__cache_fetch");
+ emit_library_call_value (cache_fetch, data_addr, LCT_NORMAL, Pmode,
+ 1, ea_addr, EAmode);
+ }
+}
+
+/* Like ea_load_store, but do the cache tag comparison and, for stores,
+ dirty bit marking, inline.
+
+ The cache control data structure is an array of
+
+ struct __cache_tag_array
+ {
+ unsigned int tag_lo[4];
+ unsigned int tag_hi[4];
+ void *data_pointer[4];
+ int reserved[4];
+ vector unsigned short dirty_bits[4];
+ } */
+
+static void
+ea_load_store_inline (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
+{
+ rtx ea_addr_si;
+ HOST_WIDE_INT v;
+ rtx tag_size_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array_size");
+ rtx tag_arr_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array");
+ rtx index_mask = gen_reg_rtx (SImode);
+ rtx tag_arr = gen_reg_rtx (Pmode);
+ rtx splat_mask = gen_reg_rtx (TImode);
+ rtx splat = gen_reg_rtx (V4SImode);
+ rtx splat_hi = NULL_RTX;
+ rtx tag_index = gen_reg_rtx (Pmode);
+ rtx block_off = gen_reg_rtx (SImode);
+ rtx tag_addr = gen_reg_rtx (Pmode);
+ rtx tag = gen_reg_rtx (V4SImode);
+ rtx cache_tag = gen_reg_rtx (V4SImode);
+ rtx cache_tag_hi = NULL_RTX;
+ rtx cache_ptrs = gen_reg_rtx (TImode);
+ rtx cache_ptrs_si = gen_reg_rtx (SImode);
+ rtx tag_equal = gen_reg_rtx (V4SImode);
+ rtx tag_equal_hi = NULL_RTX;
+ rtx tag_eq_pack = gen_reg_rtx (V4SImode);
+ rtx tag_eq_pack_si = gen_reg_rtx (SImode);
+ rtx eq_index = gen_reg_rtx (SImode);
+ rtx bcomp, hit_label, hit_ref, cont_label, insn;
+
+ if (spu_ea_model != 32)
+ {
+ splat_hi = gen_reg_rtx (V4SImode);
+ cache_tag_hi = gen_reg_rtx (V4SImode);
+ tag_equal_hi = gen_reg_rtx (V4SImode);
+ }
+
+ emit_move_insn (index_mask, plus_constant (tag_size_sym, -128));
+ emit_move_insn (tag_arr, tag_arr_sym);
+ v = 0x0001020300010203LL;
+ emit_move_insn (splat_mask, immed_double_const (v, v, TImode));
+ ea_addr_si = ea_addr;
+ if (spu_ea_model != 32)
+ ea_addr_si = convert_to_mode (SImode, ea_addr, 1);
+
+ /* tag_index = ea_addr & (tag_array_size - 128) */
+ emit_insn (gen_andsi3 (tag_index, ea_addr_si, index_mask));
+
+ /* splat ea_addr to all 4 slots. */
+ emit_insn (gen_shufb (splat, ea_addr_si, ea_addr_si, splat_mask));
+ /* Similarly for high 32 bits of ea_addr. */
+ if (spu_ea_model != 32)
+ emit_insn (gen_shufb (splat_hi, ea_addr, ea_addr, splat_mask));
+
+ /* block_off = ea_addr & 127 */
+ emit_insn (gen_andsi3 (block_off, ea_addr_si, spu_const (SImode, 127)));
+
+ /* tag_addr = tag_arr + tag_index */
+ emit_insn (gen_addsi3 (tag_addr, tag_arr, tag_index));
+
+ /* Read cache tags. */
+ emit_move_insn (cache_tag, gen_rtx_MEM (V4SImode, tag_addr));
+ if (spu_ea_model != 32)
+ emit_move_insn (cache_tag_hi, gen_rtx_MEM (V4SImode,
+ plus_constant (tag_addr, 16)));
+
+ /* tag = ea_addr & -128 */
+ emit_insn (gen_andv4si3 (tag, splat, spu_const (V4SImode, -128)));
+
+ /* Read all four cache data pointers. */
+ emit_move_insn (cache_ptrs, gen_rtx_MEM (TImode,
+ plus_constant (tag_addr, 32)));
+
+ /* Compare tags. */
+ emit_insn (gen_ceq_v4si (tag_equal, tag, cache_tag));
+ if (spu_ea_model != 32)
+ {
+ emit_insn (gen_ceq_v4si (tag_equal_hi, splat_hi, cache_tag_hi));
+ emit_insn (gen_andv4si3 (tag_equal, tag_equal, tag_equal_hi));
+ }
+
+ /* At most one of the tags compare equal, so tag_equal has one
+ 32-bit slot set to all 1's, with the other slots all zero.
+ gbb picks off low bit from each byte in the 128-bit registers,
+ so tag_eq_pack is one of 0xf000, 0x0f00, 0x00f0, 0x000f, assuming
+ we have a hit. */
+ emit_insn (gen_spu_gbb (tag_eq_pack, spu_gen_subreg (V16QImode, tag_equal)));
+ emit_insn (gen_spu_convert (tag_eq_pack_si, tag_eq_pack));
+
+ /* So counting leading zeros will set eq_index to 16, 20, 24 or 28. */
+ emit_insn (gen_clzsi2 (eq_index, tag_eq_pack_si));
+
+ /* Allowing us to rotate the corresponding cache data pointer to slot0.
+ (rotating eq_index mod 16 bytes). */
+ emit_insn (gen_rotqby_ti (cache_ptrs, cache_ptrs, eq_index));
+ emit_insn (gen_spu_convert (cache_ptrs_si, cache_ptrs));
+
+ /* Add block offset to form final data address. */
+ emit_insn (gen_addsi3 (data_addr, cache_ptrs_si, block_off));
+
+ /* Check that we did hit. */
+ hit_label = gen_label_rtx ();
+ hit_ref = gen_rtx_LABEL_REF (VOIDmode, hit_label);
+ bcomp = gen_rtx_NE (SImode, tag_eq_pack_si, const0_rtx);
+ insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
+ gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
+ hit_ref, pc_rtx)));
+ /* Say that this branch is very likely to happen. */
+ v = REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100 - 1;
+ REG_NOTES (insn)
+ = gen_rtx_EXPR_LIST (REG_BR_PROB, GEN_INT (v), REG_NOTES (insn));
+
+ ea_load_store (mem, is_store, ea_addr, data_addr);
+ cont_label = gen_label_rtx ();
+ emit_jump_insn (gen_jump (cont_label));
+ emit_barrier ();
+
+ emit_label (hit_label);
+
+ if (is_store)
+ {
+ HOST_WIDE_INT v_hi;
+ rtx dirty_bits = gen_reg_rtx (TImode);
+ rtx dirty_off = gen_reg_rtx (SImode);
+ rtx dirty_128 = gen_reg_rtx (TImode);
+ rtx neg_block_off = gen_reg_rtx (SImode);
+
+ /* Set up mask with one dirty bit per byte of the mem we are
+ writing, starting from top bit. */
+ v_hi = v = -1;
+ v <<= (128 - GET_MODE_SIZE (GET_MODE (mem))) & 63;
+ if ((128 - GET_MODE_SIZE (GET_MODE (mem))) >= 64)
+ {
+ v_hi = v;
+ v = 0;
+ }
+ emit_move_insn (dirty_bits, immed_double_const (v, v_hi, TImode));
+
+ /* Form index into cache dirty_bits. eq_index is one of
+ 0x10, 0x14, 0x18 or 0x1c. Multiplying by 4 gives us
+ 0x40, 0x50, 0x60 or 0x70 which just happens to be the
+ offset to each of the four dirty_bits elements. */
+ emit_insn (gen_ashlsi3 (dirty_off, eq_index, spu_const (SImode, 2)));
+
+ emit_insn (gen_spu_lqx (dirty_128, tag_addr, dirty_off));
+
+ /* Rotate bit mask to proper bit. */
+ emit_insn (gen_negsi2 (neg_block_off, block_off));
+ emit_insn (gen_rotqbybi_ti (dirty_bits, dirty_bits, neg_block_off));
+ emit_insn (gen_rotqbi_ti (dirty_bits, dirty_bits, neg_block_off));
+
+ /* Or in the new dirty bits. */
+ emit_insn (gen_iorti3 (dirty_128, dirty_bits, dirty_128));
+
+ /* Store. */
+ emit_insn (gen_spu_stqx (dirty_128, tag_addr, dirty_off));
+ }
+
+ emit_label (cont_label);
+}
+
+static rtx
+expand_ea_mem (rtx mem, bool is_store)
+{
+ rtx ea_addr;
+ rtx data_addr = gen_reg_rtx (Pmode);
+
+ ea_addr = force_reg (EAmode, XEXP (mem, 0));
+ if (optimize_size || optimize == 0)
+ ea_load_store (mem, is_store, ea_addr, data_addr);
+ else
+ ea_load_store_inline (mem, is_store, ea_addr, data_addr);
+
+ mem = change_address (mem, VOIDmode, data_addr);
+
+ if (ea_alias_set == -1)
+ ea_alias_set = new_alias_set ();
+ set_mem_alias_set (mem, 0);
+ set_mem_alias_set (mem, ea_alias_set);
+ return mem;
+}
+
int
spu_expand_mov (rtx * ops, enum machine_mode mode)
{
@@ -3540,6 +3834,8 @@ spu_expand_mov (rtx * ops, enum machine_
{
if (GET_CODE (ops[0]) == MEM)
{
+ if (MEM_ADDR_SPACE (ops[0]))
+ ops[0] = expand_ea_mem (ops[0], true);
if (!spu_valid_move (ops))
{
emit_insn (gen_store (ops[0], ops[1], gen_reg_rtx (TImode),
@@ -3549,6 +3845,8 @@ spu_expand_mov (rtx * ops, enum machine_
}
else if (GET_CODE (ops[1]) == MEM)
{
+ if (MEM_ADDR_SPACE (ops[1]))
+ ops[1] = expand_ea_mem (ops[1], false);
if (!spu_valid_move (ops))
{
emit_insn (gen_load
@@ -5543,6 +5841,34 @@ spu_vector_alignment_reachable (const_tr
return true;
}
+static enum machine_mode
+spu_ea_pointer_mode (int addrspace)
+{
+ switch (addrspace)
+ {
+ case 0:
+ return ptr_mode;
+ case 1:
+ return (spu_ea_model == 64 ? DImode : ptr_mode);
+ default:
+ gcc_unreachable ();
+ }
+}
+
+static bool
+spu_valid_pointer_mode (enum machine_mode mode)
+{
+ return (mode == ptr_mode || mode == Pmode || mode == spu_ea_pointer_mode (1));
+}
+
+static unsigned int
+spu_section_type_flags (tree decl, const char *name, int reloc)
+{
+ if (strcmp (name, "._ea") == 0)
+ return SECTION_WRITE | SECTION_DEBUG;
+ return default_section_type_flags (decl, name, reloc);
+}
+
/* Count the total number of instructions in each pipe and return the
maximum, which is used as the Minimum Iteration Interval (MII)
in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
@@ -5601,3 +5927,50 @@ spu_libgcc_shift_count_mode (void)
for shift counts. */
return SImode;
}
+
+const char *
+spu_addr_space_name (int addrspace)
+{
+ gcc_assert (addrspace > 0 && addrspace <= 1);
+ return (spu_address_spaces [addrspace].name);
+}
+
+static
+rtx (* spu_addr_space_conversion_rtl (int from, int to)) (rtx, rtx)
+{
+ gcc_assert ((from == 0 && to == 1) || (from == 1 && to == 0));
+
+ if (to == 0)
+ return spu_address_spaces[1].to_generic_insn;
+ else if (to == 1)
+ return spu_address_spaces[1].from_generic_insn;
+
+ return 0;
+}
+
+static
+bool spu_valid_addr_space (const_tree value)
+{
+ int i;
+ if (!value)
+ return false;
+
+ for (i = 0; spu_address_spaces[i].name; i++)
+ if (strcmp (IDENTIFIER_POINTER (value), spu_address_spaces[i].name) == 0)
+ return true;
+ return false;
+}
+
+static
+unsigned char spu_addr_space_number (const_tree ident)
+{
+ int i;
+ if (!ident)
+ return 0;
+
+ for (i = 0; spu_address_spaces[i].name; i++)
+ if (strcmp (IDENTIFIER_POINTER (ident), spu_address_spaces[i].name) == 0)
+ return i;
+
+ gcc_unreachable ();
+}
--- gcc-clean/gcc/config/spu/spu_cache.h 1970-01-01 10:00:00.000000000 +1000
+++ gcc-nas/gcc/config/spu/spu_cache.h 2008-08-20 14:14:36.000000000 +1000
@@ -0,0 +1,41 @@
+/* Copyright (C) 2008 Free Software Foundation, Inc.
+
+ This file is free software; you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your option)
+ any later version.
+
+ This file is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this file; see the file COPYING. If not, write to the Free
+ Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA. */
+
+/* As a special exception, if you include this header file into source files
+ compiled by GCC, this header file does not by itself cause the resulting
+ executable to be covered by the GNU General Public License. This exception
+ does not however invalidate any other reasons why the executable file might be
+ covered by the GNU General Public License. */
+
+#ifndef SPU_CACHEH
+#define SPU_CACHEH
+
+void *__cache_fetch_dirty (__ea void *ea, int n_bytes_dirty);
+void *__cache_fetch (__ea void *ea);
+void __cache_evict (__ea void *ea);
+void __cache_flush (void);
+void __cache_touch (__ea void *ea);
+
+#define cache_fetch_dirty(_ea, _n_bytes_dirty) \
+ __cache_fetch_dirty(_ea, _n_bytes_dirty)
+
+#define cache_fetch(_ea) __cache_fetch(_ea)
+#define cache_touch(_ea) __cache_touch(_ea)
+#define cache_evict(_ea) __cache_evict(_ea)
+#define cache_flush() __cache_flush()
+
+#endif
--- gcc-clean/gcc/config/spu/spu-c.c 2008-07-24 14:02:11.000000000 +1000
+++ gcc-nas/gcc/config/spu/spu-c.c 2008-07-25 10:26:58.000000000 +1000
@@ -198,6 +198,17 @@ spu_cpu_cpp_builtins (struct cpp_reader
if (spu_arch == PROCESSOR_CELLEDP)
builtin_define_std ("__SPU_EDP__");
builtin_define_std ("__vector=__attribute__((__spu_vector__))");
+ switch (spu_ea_model)
+ {
+ case 32:
+ builtin_define_std ("__EA32__");
+ break;
+ case 64:
+ builtin_define_std ("__EA64__");
+ break;
+ default:
+ gcc_unreachable ();
+ }
if (!flag_iso)
{
--- gcc-clean/gcc/config/spu/spu-elf.h 2008-04-03 15:13:48.000000000 +1100
+++ gcc-nas/gcc/config/spu/spu-elf.h 2008-06-16 16:14:08.000000000 +1000
@@ -49,10 +49,26 @@
#define EH_FRAME_IN_DATA_SECTION 1
+#define DRIVER_SELF_SPECS "\
+ %{mcache-size=128 : -lgcc_cache128k ; \
+ mcache-size=64 : -lgcc_cache64k ; \
+ mcache-size=32 : -lgcc_cache32k ; \
+ mcache-size=16 : -lgcc_cache16k ; \
+ mcache-size=8 : -lgcc_cache8k ; \
+ : -lgcc_cache64k } \
+ %<mcache-size=* \
+ %{mno-atomic-updates:-lgcc_cachemgr_nonatomic; :-lgcc_cachemgr} \
+ %<matomic-updates %<mno-atomic-updates"
+
#define LINK_SPEC "%{mlarge-mem: --defsym __stack=0xfffffff0 }"
-#define LIB_SPEC \
- "-( %{!shared:%{g*:-lg}} -lc -lgloss -)"
+/* Match each of the mutually exclusive cache<n>k libraries because
+ lgcc_cache* did not seem to work -- perhaps a bug in the specs
+ handling? */
+#define LIB_SPEC "-( %{!shared:%{g*:-lg}} -lc -lgloss -) \
+ %{lgcc_cachemgr*:-lgcc_cachemgr%*} \
+ %{lgcc_cache128k} %{lgcc_cache64k} %{lgcc_cache32k} \
+ %{lgcc_cache16k} %{lgcc_cache8k}"
/* Turn off warnings in the assembler too. */
#undef ASM_SPEC
--- gcc-clean/gcc/config/spu/spu.h 2008-08-27 07:16:42.000000000 +1000
+++ gcc-nas/gcc/config/spu/spu.h 2008-08-27 12:04:42.000000000 +1000
@@ -445,9 +445,9 @@ targetm.resolve_overloaded_builtin = spu
/* Sections */
-#define TEXT_SECTION_ASM_OP ".text"
+#define TEXT_SECTION_ASM_OP "\t.text"
-#define DATA_SECTION_ASM_OP ".data"
+#define DATA_SECTION_ASM_OP "\t.data"
#define JUMP_TABLES_IN_TEXT_SECTION 1
@@ -488,6 +488,17 @@ targetm.resolve_overloaded_builtin = spu
#define ASM_OUTPUT_LABELREF(FILE, NAME) \
asm_fprintf (FILE, "%U%s", default_strip_name_encoding (NAME))
+#define ASM_OUTPUT_SYMBOL_REF(FILE, X) \
+ do \
+ { \
+ tree decl; \
+ assemble_name (FILE, XSTR (X, 0)); \
+ if ((decl = SYMBOL_REF_DECL (X)) != 0 \
+ && TREE_CODE (decl) == VAR_DECL \
+ && TYPE_ADDR_SPACE (strip_array_types (TREE_TYPE (decl)))) \
+ fputs ("@ppu", FILE); \
+ } while (0)
+
/* Instruction Output */
#define REGISTER_NAMES \
--- gcc-clean/gcc/config/spu/spu.md 2008-08-26 19:48:17.000000000 +1000
+++ gcc-nas/gcc/config/spu/spu.md 2008-08-28 15:59:56.000000000 +1000
@@ -5229,4 +5229,45 @@ DONE;
DONE;
}")
+(define_expand "to_ea"
+ [(use (match_operand 0 "" ""))
+ (use (match_operand 1 "" ""))]
+ ""
+{
+ rtx ls_mem, op0, op1;
+ enum machine_mode mode = (spu_ea_model == 32) ? Pmode : DImode;
+ ls_mem = gen_rtx_MEM (DImode, gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
+
+ op0 = force_reg (mode, operands[0]);
+ op1 = force_reg (Pmode, operands[1]);
+
+ if (mode == Pmode)
+ emit_insn (gen_addsi3 (op0, op1, force_reg (mode, gen_lowpart (mode, ls_mem))));
+ else
+ {
+ rtx tmp = gen_reg_rtx (DImode);
+ emit_move_insn (tmp, gen_rtx_ZERO_EXTEND (DImode, op1));
+ emit_insn (gen_adddi3 (op0, tmp, force_reg (mode, ls_mem)));
+ }
+ DONE;
+})
+
+(define_expand "from_ea"
+ [(use (match_operand 0 "" ""))
+ (use (match_operand 1 "" ""))]
+ ""
+{
+ rtx ls_mem, ls, op0, op1, tmp;
+ enum machine_mode mode = (spu_ea_model == 32) ? Pmode : DImode;
+
+ ls_mem = gen_rtx_MEM (DImode, gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
+ ls = force_reg (Pmode, gen_lowpart (Pmode, ls_mem));
+
+ op0 = force_reg (Pmode, operands[0]);
+ op1 = force_reg (mode, operands[1]);
+ tmp = (mode == Pmode) ? op1 : force_reg (Pmode, gen_lowpart (Pmode, op1));
+
+ emit_insn (gen_subsi3 (op0, tmp, ls));
+ DONE;
+})
--- gcc-clean/gcc/config/spu/spu.opt 2008-04-03 15:13:48.000000000 +1100
+++ gcc-nas/gcc/config/spu/spu.opt 2008-08-22 15:48:24.000000000 +1000
@@ -62,3 +62,11 @@ Generate code for given CPU
mtune=
Target RejectNegative Joined Var(spu_tune_string)
Schedule code for given CPU
+
+mea32
+Target Report RejectNegative Var(spu_ea_model,32) Init(32)
+Access variables in 32-bit PPU objects
+
+mea64
+Target Report RejectNegative Var(spu_ea_model,64) VarExists
+Access variables in 64-bit PPU objects
--- gcc-clean/gcc/config/spu/t-spu-elf 2008-08-13 10:58:01.000000000 +1000
+++ gcc-nas/gcc/config/spu/t-spu-elf 2008-08-28 14:14:36.000000000 +1000
@@ -59,13 +59,40 @@ fp-bit.c: $(srcdir)/config/fp-bit.c $(sr
CRTSTUFF_T_CFLAGS =
#MULTILIB_OPTIONS=mlarge-mem/mtest-abi
+MULTILIB_OPTIONS=mea64
#MULTILIB_DIRNAMES=large-mem test-abi
#MULTILIB_MATCHES=
# Neither gcc or newlib seem to have a standard way to generate multiple
# crt*.o files. So we don't use the standard crt0.o name anymore.
-EXTRA_MULTILIB_PARTS = crtbegin.o crtend.o
+EXTRA_MULTILIB_PARTS = crtbegin.o crtend.o libgcc_cachemgr.a libgcc_cachemgr_nonatomic.a \
+ libgcc_cache8k.a libgcc_cache16k.a libgcc_cache32k.a libgcc_cache64k.a libgcc_cache128k.a
+
+$(T)cachemgr.o: $(srcdir)/config/spu/cachemgr.c
+ $(GCC_FOR_TARGET) $(LIBGCC2_CFLAGS) $(MULTILIB_CFLAGS) -c $< -o $@
+
+# Specialised rule to add a -D flag.
+$(T)cachemgr_nonatomic.o: $(srcdir)/config/spu/cachemgr.c
+ $(GCC_FOR_TARGET) $(LIBGCC2_CFLAGS) $(MULTILIB_CFLAGS) -DNONATOMIC -c $< -o $@
+
+$(T)libgcc_%.a: $(T)%.o
+ $(AR_FOR_TARGET) -rcs $@ $<
+
+$(T)cache8k.o: $(srcdir)/config/spu/cache.S
+ $(GCC_FOR_TARGET) $(MULTILIB_CFLAGS) -D__CACHE_SIZE__=8 -o $@ -c $<
+
+$(T)cache16k.o: $(srcdir)/config/spu/cache.S
+ $(GCC_FOR_TARGET) $(MULTILIB_CFLAGS) -D__CACHE_SIZE__=16 -o $@ -c $<
+
+$(T)cache32k.o: $(srcdir)/config/spu/cache.S
+ $(GCC_FOR_TARGET) $(MULTILIB_CFLAGS) -D__CACHE_SIZE__=32 -o $@ -c $<
+
+$(T)cache64k.o: $(srcdir)/config/spu/cache.S
+ $(GCC_FOR_TARGET) $(MULTILIB_CFLAGS) -D__CACHE_SIZE__=64 -o $@ -c $<
+
+$(T)cache128k.o: $(srcdir)/config/spu/cache.S
+ $(GCC_FOR_TARGET) $(MULTILIB_CFLAGS) -D__CACHE_SIZE__=128 -o $@ -c $<
LIBGCC = stmp-multilib
INSTALL_LIBGCC = install-multilib
--- gcc-clean/gcc/config/spu/cache.S 1970-01-01 10:00:00.000000000 +1000
+++ gcc-nas/gcc/config/spu/cache.S 2008-08-20 14:14:01.000000000 +1000
@@ -0,0 +1,47 @@
+/* Copyright (C) 2008 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+In addition to the permissions in the GNU General Public License, the
+Free Software Foundation gives you unlimited permission to link the
+compiled version of this file into combinations with other programs,
+and to distribute those combinations without any restriction coming
+from the use of this file. (The General Public License restrictions
+do apply in other respects; for example, they cover modification of
+the file, and distribution when not linked into a combine
+executable.)
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING. If not, write to the Free
+Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301, USA. */
+
+.data
+.p2align 7
+.global __cache
+__cache:
+.rept __CACHE_SIZE__ * 8
+.fill 128
+.endr
+
+.p2align 7
+.global __cache_tag_array
+__cache_tag_array:
+.rept __CACHE_SIZE__ * 2
+.long 1, 1, 1, 1
+.fill 128-16
+.endr
+__end_cache_tag_array:
+
+.globl __cache_tag_array_size
+.set __cache_tag_array_size, __end_cache_tag_array-__cache_tag_array
--- gcc-clean/gcc/config/spu/cachemgr.c 1970-01-01 10:00:00.000000000 +1000
+++ gcc-nas/gcc/config/spu/cachemgr.c 2008-08-25 09:38:06.000000000 +1000
@@ -0,0 +1,459 @@
+/* Copyright (C) 2008 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+In addition to the permissions in the GNU General Public License, the
+Free Software Foundation gives you unlimited permission to link the
+compiled version of this file into combinations with other programs,
+and to distribute those combinations without any restriction coming
+from the use of this file. (The General Public License restrictions
+do apply in other respects; for example, they cover modification of
+the file, and distribution when not linked into a combine
+executable.)
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING. If not, write to the Free
+Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301, USA. */
+
+#include <spu_mfcio.h>
+#include <spu_internals.h>
+#include <spu_intrinsics.h>
+#include <spu_cache.h>
+
+extern unsigned long long __ea_local_store;
+extern char __cache_tag_array_size;
+
+#define LINE_SIZE 128
+#define TAG_MASK (LINE_SIZE - 1)
+
+#define WAYS 4
+#define SET_MASK ((int) &__cache_tag_array_size - LINE_SIZE)
+
+#define CACHE_LINES ((int) &__cache_tag_array_size / \
+ sizeof (struct __cache_tag_array) * WAYS)
+
+struct __cache_tag_array
+{
+ unsigned int tag_lo[WAYS];
+ unsigned int tag_hi[WAYS];
+ void *base[WAYS];
+ int reserved[WAYS];
+ vector unsigned short dirty_bits[WAYS];
+};
+
+extern struct __cache_tag_array __cache_tag_array[];
+extern char __cache[];
+
+/* In order to make the code seem a little cleaner, and to avoid having
+ 64/32 bit ifdefs all over the place, we macro. */
+
+/* It may seem poor taste to define variables within a macro, but
+ it's C99 compliant. */
+
+#ifdef __EA64__
+#define CHECK_TAG(_entry, _way, _tag) ((_entry->tag_lo[_way] == \
+ (_tag & 0xFFFFFFFF))&&(_entry->tag_hi[_way] == (_tag >> 32)))
+
+#define GET_TAG(_entry, _way) unsigned long long tag = _entry->tag_hi[_way]; \
+ tag = tag << 32; \
+ tag |= (_entry->tag_lo[_way]);
+
+#define SET_TAG(_entry, _way, _tag) \
+ _entry->tag_lo[_way] = (_tag & 0xFFFFFFFF); \
+ _entry->tag_hi[_way] = (_tag >> 32);
+
+#define addr unsigned long long
+#define si_from_eavoid(_x) si_from_ullong (eavoid_to_eanum(_x))
+#else /*__EA32__*/
+#define CHECK_TAG(_entry, _way, _tag) (_entry->tag_lo[_way] == _tag)
+
+#define GET_TAG(_entry, _way) unsigned long tag = _entry->tag_lo[_way]
+
+#define SET_TAG(_entry, _way, _tag) \
+ _entry->tag_lo[_way] = _tag;
+
+#define addr unsigned long
+#define si_from_eavoid(_x) si_from_uint (eavoid_to_eanum(_x))
+#endif
+
+/* In GET_ENTRY, we cast away the high 32 bits,
+ as the tag is only in the low 32. */
+
+#define GET_ENTRY(_addr) ((struct __cache_tag_array *) \
+ si_to_ptr(si_a \
+ (si_and(si_from_uint((unsigned int) (addr) _addr), \
+ si_from_uint(SET_MASK)), \
+ si_from_uint((unsigned int) __cache_tag_array))));
+
+#define GET_CACHE_LINE(_addr, _way) ((void *) (__cache + \
+ (_addr & SET_MASK) * WAYS) + (_way * LINE_SIZE));
+
+#define eavoid_to_eanum(_ea) ((addr) _ea)
+
+#define CHECK_DIRTY(_vec) (si_to_uint (si_orx ((qword) _vec)))
+#define SET_EMPTY(_entry, _way) (_entry->tag_lo[_way] = 1)
+#define CHECK_EMPTY(_entry, _way) (_entry->tag_lo[_way] == 1)
+
+#define LS_FLAG 0x80000000
+#define SET_IS_LS(_entry, _way) (_entry->reserved[_way] |= LS_FLAG)
+#define CHECK_IS_LS(_entry, _way) (_entry->reserved[_way] & LS_FLAG)
+#define GET_LRU(_entry, _way) (_entry->reserved[_way] & ~(LS_FLAG))
+
+static void __cache_flush_stub (void) __attribute__ ((destructor));
+static int dma_tag = 32;
+
+static void
+__cache_evict_entry (struct __cache_tag_array *entry, int way)
+{
+
+ GET_TAG (entry, way);
+
+ if ((CHECK_DIRTY (entry->dirty_bits[way])) && (!CHECK_IS_LS (entry, way)))
+ {
+#ifdef NONATOMIC
+ /* Non-atomic writes. */
+ unsigned int oldmask, mach_stat;
+ char *line = ((void *) 0);
+
+ /* Enter critical section. */
+ mach_stat = spu_readch (SPU_RdMachStat);
+ spu_idisable ();
+
+ /* Issue DMA request. */
+ line = GET_CACHE_LINE (entry->tag_lo[way], way);
+ mfc_put (line, tag, LINE_SIZE, dma_tag, 0, 0);
+
+ /* Wait for DMA completion. */
+ oldmask = mfc_read_tag_mask ();
+ mfc_write_tag_mask (1 << dma_tag);
+ mfc_read_tag_status_all ();
+ mfc_write_tag_mask (oldmask);
+
+ /* Leave critical section. */
+ if (__builtin_expect (mach_stat & 1, 0))
+ spu_ienable ();
+#else
+ /* Allocate a buffer large enough that we know it has 128 bytes
+ that are 128 byte aligned (for DMA). */
+
+ char buffer[LINE_SIZE + 127];
+ qword *buf_ptr = (qword *) (((unsigned int) (buffer) + 127) & ~127);
+ qword *line = GET_CACHE_LINE (entry->tag_lo[way], way);
+ qword bits;
+ unsigned int mach_stat;
+
+ /* Enter critical section. */
+ mach_stat = spu_readch (SPU_RdMachStat);
+ spu_idisable ();
+
+ do
+ {
+ /* We atomically read the current memory into a buffer
+ modify the dirty bytes in the buffer, and write it
+ back. If writeback fails, loop and try again. */
+
+ mfc_getllar (buf_ptr, tag, 0, 0);
+ mfc_read_atomic_status ();
+
+ /* The method we're using to write 16 dirty bytes into
+ the buffer at a time uses fsmb which in turn uses
+ the least significant 16 bits of word 0, so we
+ load the bits and rotate so that the first bit of
+ the bitmap is in the first bit that fsmb will use. */
+
+ bits = (qword) entry->dirty_bits[way];
+ bits = si_rotqbyi (bits, -2);
+
+ /* Si_fsmb creates the mask of dirty bytes.
+ Use selb to nab the appropriate bits. */
+ buf_ptr[0] = si_selb (buf_ptr[0], line[0], si_fsmb (bits));
+
+ /* Rotate to next 16 byte section of cache. */
+ bits = si_rotqbyi (bits, 2);
+
+ buf_ptr[1] = si_selb (buf_ptr[1], line[1], si_fsmb (bits));
+ bits = si_rotqbyi (bits, 2);
+ buf_ptr[2] = si_selb (buf_ptr[2], line[2], si_fsmb (bits));
+ bits = si_rotqbyi (bits, 2);
+ buf_ptr[3] = si_selb (buf_ptr[3], line[3], si_fsmb (bits));
+ bits = si_rotqbyi (bits, 2);
+ buf_ptr[4] = si_selb (buf_ptr[4], line[4], si_fsmb (bits));
+ bits = si_rotqbyi (bits, 2);
+ buf_ptr[5] = si_selb (buf_ptr[5], line[5], si_fsmb (bits));
+ bits = si_rotqbyi (bits, 2);
+ buf_ptr[6] = si_selb (buf_ptr[6], line[6], si_fsmb (bits));
+ bits = si_rotqbyi (bits, 2);
+ buf_ptr[7] = si_selb (buf_ptr[7], line[7], si_fsmb (bits));
+ bits = si_rotqbyi (bits, 2);
+
+ mfc_putllc (buf_ptr, tag, 0, 0);
+ }
+ while (mfc_read_atomic_status ());
+
+ /* Leave critical section. */
+ if (__builtin_expect (mach_stat & 1, 0))
+ spu_ienable ();
+#endif
+ }
+
+ /* In any case, marking the lo tag with 1 which denotes empty. */
+ SET_EMPTY (entry, way);
+ entry->dirty_bits[way] = (vector unsigned short) si_from_uint (0);
+}
+
+void
+__cache_evict (__ea void *ea)
+{
+ addr tag = (eavoid_to_eanum (ea) & ~(TAG_MASK));
+ struct __cache_tag_array *entry = GET_ENTRY (ea);
+ int i = 0;
+
+ /* Cycles through all the possible ways an address could be at
+ and evicts the way if found */
+
+ for (i = 0; i < WAYS; i++)
+ {
+ if (CHECK_TAG (entry, i, tag))
+ {
+ __cache_evict_entry (entry, i);
+ }
+ }
+}
+
+static void *
+__cache_fill (int way, addr tag)
+{
+ unsigned int oldmask, mach_stat;
+ char *line = ((void *) 0);
+
+ /* Reserve our DMA tag. */
+ if (dma_tag == 32)
+ dma_tag = mfc_tag_reserve ();
+
+ /* Enter critical section. */
+ mach_stat = spu_readch (SPU_RdMachStat);
+ spu_idisable ();
+
+ /* Issue DMA request. */
+ line = GET_CACHE_LINE (tag, way);
+ mfc_get (line, tag, LINE_SIZE, dma_tag, 0, 0);
+
+ /* Wait for DMA completion. */
+ oldmask = mfc_read_tag_mask ();
+ mfc_write_tag_mask (1 << dma_tag);
+ mfc_read_tag_status_all ();
+ mfc_write_tag_mask (oldmask);
+
+ /* Leave critical section. */
+ if (__builtin_expect (mach_stat & 1, 0))
+ spu_ienable ();
+
+ return (void *) line;
+}
+
+static void
+__cache_miss (__ea void *ea, struct __cache_tag_array *entry, int way)
+{
+
+ addr tag = (eavoid_to_eanum (ea) & ~(TAG_MASK));
+ unsigned int lru = 0;
+ int i = 0;
+ int idx = 0;
+
+ /* If way > 4, then there are no empty slots, so we must evict
+ the least recently used entry. */
+ if (way >= 4)
+ {
+ for (i = 0; i < WAYS; i++)
+ {
+ if (GET_LRU (entry, i) > lru)
+ {
+ lru = GET_LRU (entry, i);
+ idx = i;
+ }
+ }
+ __cache_evict_entry (entry, idx);
+ way = idx;
+ }
+
+ /* Set the empty entry's tag and fill it's cache line. */
+
+ SET_TAG (entry, way, tag);
+ entry->reserved[way] = 0;
+
+ /* Check if the address is just an effective address within the
+ SPU's local store. */
+
+ /* Because the LS is not 256k aligned, we can't do a nice and mask
+ here to compare, so we must check the whole range. */
+
+ if ((eavoid_to_eanum (ea) >= (addr) __ea_local_store) &&
+ (eavoid_to_eanum (ea) < (addr) (__ea_local_store + 0x40000)))
+ {
+ SET_IS_LS (entry, way);
+ entry->base[way] =
+ (void *) ((unsigned int) (eavoid_to_eanum (ea) -
+ (addr) __ea_local_store) & ~(0x7f));
+ }
+ else
+ {
+ entry->base[way] = __cache_fill (way, tag);
+ }
+}
+
+void *
+__cache_fetch_dirty (__ea void *ea, int n_bytes_dirty)
+{
+#ifdef __EA64__
+ unsigned int tag_hi;
+ qword etag_hi;
+#endif
+ unsigned int tag_lo;
+ struct __cache_tag_array *entry;
+
+ qword etag_lo;
+ qword equal;
+ qword bit_mask;
+ qword way;
+
+ /* This first chunk, we merely fill the pointer and tag. */
+
+ entry = GET_ENTRY (ea);
+
+#ifndef __EA64__
+ tag_lo =
+ si_to_uint (si_andc
+ (si_shufb
+ (si_from_eavoid (ea), si_from_uint (0),
+ si_from_uint (0x00010203)), si_from_uint (TAG_MASK)));
+#else
+ tag_lo =
+ si_to_uint (si_andc
+ (si_shufb
+ (si_from_eavoid (ea), si_from_uint (0),
+ si_from_uint (0x04050607)), si_from_uint (TAG_MASK)));
+
+ tag_hi =
+ si_to_uint (si_shufb
+ (si_from_eavoid (ea), si_from_uint (0),
+ si_from_uint (0x00010203)));
+#endif
+
+ /* Increment LRU in reserved bytes. */
+ si_stqd (si_ai (si_lqd (si_from_ptr (entry), 48), 1),
+ si_from_ptr (entry), 48);
+
+missreturn:
+ /* Check if the entry's lo_tag is equal to the address' lo_tag. */
+ etag_lo = si_lqd (si_from_ptr (entry), 0);
+ equal = si_ceq (etag_lo, si_from_uint (tag_lo));
+#ifdef __EA64__
+ /* And the high tag too */
+ etag_hi = si_lqd (si_from_ptr (entry), 16);
+ equal = si_and (equal, (si_ceq (etag_hi, si_from_uint (tag_hi))));
+#endif
+
+ if ((si_to_uint (si_orx (equal)) == 0))
+ goto misshandler;
+
+ if (n_bytes_dirty)
+ {
+ /* way = 0x40,0x50,0x60,0x70 for each way, which is also the
+ offset of the appropriate dirty bits. */
+ way = si_shli (si_clz (si_gbb (equal)), 2);
+
+ /* To create the bit_mask, we set it to all 1s (uint -1), then we
+ shift it over (128 - n_bytes_dirty) times. */
+
+ bit_mask = si_from_uint (-1);
+
+ bit_mask =
+ si_shlqby (bit_mask, si_from_uint ((LINE_SIZE - n_bytes_dirty) / 8));
+
+ bit_mask =
+ si_shlqbi (bit_mask, si_from_uint ((LINE_SIZE - n_bytes_dirty) % 8));
+
+ /* Rotate it around to the correct offset. */
+ bit_mask =
+ si_rotqby (bit_mask,
+ si_from_uint (-1 * (eavoid_to_eanum (ea) & TAG_MASK) / 8));
+
+ bit_mask =
+ si_rotqbi (bit_mask,
+ si_from_uint (-1 * (eavoid_to_eanum (ea) & TAG_MASK) % 8));
+
+ /* Update the dirty bits. */
+ si_stqx (si_or (si_lqx (si_from_ptr (entry), way), bit_mask),
+ si_from_ptr (entry), way);
+ };
+
+ /* We've definitely found the right entry, set LRU (reserved) to 0
+ maintaining the LS flag (MSB). */
+
+ si_stqd (si_andc
+ (si_lqd (si_from_ptr (entry), 48),
+ si_and (equal, si_from_uint (~(LS_FLAG)))),
+ si_from_ptr (entry), 48);
+
+ return (void *)
+ si_to_ptr (si_a
+ (si_orx
+ (si_and (si_lqd (si_from_ptr (entry), 32), equal)),
+ si_from_uint (((unsigned int) (addr) ea) & TAG_MASK)));
+
+misshandler:
+ equal = si_ceqi (etag_lo, 1);
+ __cache_miss (ea, entry, (si_to_uint (si_clz (si_gbb (equal))) - 16) >> 2);
+ goto missreturn;
+}
+
+void *
+__cache_fetch (__ea void *ea)
+{
+ return __cache_fetch_dirty (ea, 0);
+}
+
+void
+__cache_touch (__ea void *ea __attribute__ ((unused)))
+{
+ /* NO-OP for now. */
+}
+
+static void
+__cache_flush_stub (void)
+{
+ __cache_flush ();
+}
+
+void
+__cache_flush (void)
+{
+ struct __cache_tag_array *entry = __cache_tag_array;
+ unsigned int i = 0;
+ int j = 0;
+
+ /* Cycle through each cache entry and evict all used ways. */
+
+ for (i = 0; i < (CACHE_LINES / WAYS); i++)
+ {
+ for (j = 0; j < WAYS; j++)
+ {
+ if (!CHECK_EMPTY (entry, j))
+ {
+ __cache_evict_entry (entry, j);
+ }
+ }
+ entry++;
+ }
+}
--- gcc-clean/gcc/doc/invoke.texi 2008-08-27 07:16:34.000000000 +1000
+++ gcc-nas/gcc/doc/invoke.texi 2008-08-27 11:59:03.000000000 +1000
@@ -779,7 +779,10 @@
-msafe-dma -munsafe-dma @gol
-mbranch-hints @gol
-msmall-mem -mlarge-mem -mstdmain @gol
--mfixed-range=@var{register-range}}
+-mfixed-range=@var{register-range} @gol
+-mea32 -mea64 @gol
+-mcache-size=@var{cache-size} @gol
+-matomic-updates -mno-atomic-updates}
@emph{System V Options}
@gccoptlist{-Qy -Qn -YP,@var{paths} -Ym,@var{dir}}
@@ -14606,6 +14609,34 @@
two registers separated by a dash. Multiple register ranges can be
specified separated by a comma.
+@item -mea32
+@itemx -mea64
+@opindex mea32
+@opindex mea64
+
+Compile code assuming that pointers to the __ea address space are either
+32 or 64 bits wide. The default is 32 bits. As this is an ABI changing
+option, all object code in an executable must be compiled with the same
+option.
+
+@item -mcache-size=@var{cache-size}
+@opindex mcache-size
+
+This option controls the version of libgcc that the compiler links to an
+executable and selects software cache support with a particular software
+cache size. Possible options for @var{cache-size} are @samp{8},
+@samp{16}, @samp{32}, @samp{64} and @samp{128}. The default cache size
+is 64KB.
+
+@item -matomic-updates
+@itemx -mno-atomic-updates
+@opindex matomic-updates
+@opindex mno-atomic-updates
+
+This option controls the version of libgcc that the compiler links to an
+executable and selects whether atomic updates to the software cache are
+used. The default behavior is to use atomic updates.
+
@end table
@node System V Options