Index: gcc/config/s390/s390.c =================================================================== --- gcc/config/s390/s390.c.orig 2009-09-25 10:04:32.000000000 +0200 +++ gcc/config/s390/s390.c 2009-09-25 10:05:33.000000000 +0200 @@ -9571,6 +9571,104 @@ s390_optimize_prologue (void) } } + +/* Exchange the two operands of COND, and swap its mask so that the + semantics does not change. */ +static void +s390_swap_cmp (rtx cond) +{ + enum rtx_code code = swap_condition (GET_CODE (cond)); + rtx tmp = XEXP (cond, 0); + + XEXP (cond, 0) = XEXP (cond, 1); + XEXP (cond, 1) = tmp; + PUT_CODE (cond, code); +} + + +/* Returns 1 if INSN reads the value of REG for purposes not related + to addressing of memory, and 0 otherwise. */ +static int +s390_non_addr_reg_read_p (rtx reg, rtx insn) +{ + return reg_referenced_p (reg, PATTERN (insn)) + && !reg_used_in_mem_p (REGNO (reg), PATTERN (insn)); +} + + +/* On z10, instructions of the compare-and-branch family have the + property to access the register occurring as second operand with + its bits complemented. If such a compare is grouped with a second + instruction that accesses the same register non-complemented, and + if that register's value is delivered via a bypass, then the + pipeline recycles, thereby causing significant performance decline. + This function locates such situations and exchanges the two + operands of the compare. */ +static void +s390_z10_optimize_cmp (void) +{ + rtx insn, prev_insn, next_insn; + int added_NOPs = 0; + + for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) + { + if (!INSN_P (insn) || INSN_CODE (insn) <= 0) + continue; + + if (get_attr_z10prop (insn) == Z10PROP_Z10_COBRA) + { + rtx op0, op1, pattern, jump_expr, cond; + + /* Extract the comparison´s condition and its operands. */ + pattern = single_set (insn); + gcc_assert (GET_CODE (pattern) == SET); + jump_expr = XEXP (pattern, 1); + gcc_assert (GET_CODE (jump_expr) == IF_THEN_ELSE); + cond = XEXP (jump_expr, 0); + op0 = XEXP (cond, 0); + op1 = XEXP (cond, 1); + + /* Swap the COMPARE´s arguments and its mask if there is a + conflicting access in the previous insn. */ + prev_insn = PREV_INSN (insn); + if (prev_insn != NULL_RTX && INSN_P (prev_insn) + && reg_referenced_p (op1, PATTERN (prev_insn))) + { + s390_swap_cmp (cond); + op0 = XEXP (cond, 0); + op1 = XEXP (cond, 1); + } + + /* Check if there is a conflict with the next insn. If there + was no conflict with the previous insn, then swap the + COMPARE´s arguments and its mask. If we already swapped + the operands, or if swapping them would cause a conflict + with the previous insn, issue a NOP after the COMPARE in + order to separate the two instuctions. */ + next_insn = NEXT_INSN (insn); + if (next_insn != NULL_RTX && INSN_P (next_insn) + && s390_non_addr_reg_read_p (op1, next_insn)) + { + if (s390_non_addr_reg_read_p (op0, prev_insn)) + { + if (REGNO(op1) == 0) + emit_insn_after (gen_nop1 (), insn); + else + emit_insn_after (gen_nop (), insn); + added_NOPs = 1; + } + else + s390_swap_cmp (cond); + } + } + } + + /* Adjust branches if we added new instructions. */ + if (added_NOPs) + shorten_branches (get_insns ()); +} + + /* Perform machine-dependent processing. */ static void @@ -9683,6 +9781,11 @@ s390_reorg (void) /* Try to optimize prologue and epilogue further. */ s390_optimize_prologue (); + + /* Eliminate z10-specific pipeline recycles related to some compare + instructions. */ + if (TARGET_Z10) + s390_z10_optimize_cmp (); } Index: gcc/config/s390/s390.md =================================================================== --- gcc/config/s390/s390.md.orig 2009-09-25 10:04:47.000000000 +0200 +++ gcc/config/s390/s390.md 2009-09-25 10:04:48.000000000 +0200 @@ -8469,6 +8469,12 @@ [(set_attr "op_type" "RR") (set_attr "z10prop" "z10_fr_E1")]) +(define_insn "nop1" + [(const_int 1)] + "" + "lr\t1,1" + [(set_attr "op_type" "RR")]) + ; ; Special literal pool access instruction pattern(s).