| From: Richard Sandiford <richard.sandiford@arm.com> |
| Subject: [PATCH 00/19] aarch64: Fix -fstack-protector issue |
| Date: Tue, 12 Sep 2023 16:25:10 +0100 |
| |
| This series of patches fixes deficiencies in GCC's -fstack-protector |
| implementation for AArch64 when using dynamically allocated stack space. |
| This is CVE-2023-4039. See: |
| |
| https://developer.arm.com/Arm%20Security%20Center/GCC%20Stack%20Protector%20Vulnerability%20AArch64 |
| https://github.com/metaredteam/external-disclosures/security/advisories/GHSA-x7ch-h5rf-w2mf |
| |
| for more details. |
| |
| The fix is to put the saved registers above the locals area when |
| -fstack-protector is used. |
| |
| The series also fixes a stack-clash problem that I found while working |
| on the CVE. In unpatched sources, the stack-clash problem would only |
| trigger for unrealistic numbers of arguments (8K 64-bit arguments, or an |
| equivalent). But it would be a more significant issue with the new |
| -fstack-protector frame layout. It's therefore important that both |
| problems are fixed together. |
| |
| Some reorganisation of the code seemed necessary to fix the problems in a |
| cleanish way. The series is therefore quite long, but only a handful of |
| patches should have any effect on code generation. |
| |
| See the individual patches for a detailed description. |
| |
| Tested on aarch64-linux-gnu. Pushed to trunk and to all active branches. |
| I've also pushed backports to GCC 7+ to vendors/ARM/heads/CVE-2023-4039. |
| |
| CVE: CVE-2023-4039 |
| Upstream-Status: Backport |
| Signed-off-by: Ross Burton <ross.burton@arm.com> |
| |
| |
| From 71a2aa2127283f450c623d3604dbcabe0e14a8d4 Mon Sep 17 00:00:00 2001 |
| From: Richard Sandiford <richard.sandiford@arm.com> |
| Date: Tue, 12 Sep 2023 16:07:12 +0100 |
| Subject: [PATCH 01/19] aarch64: Use local frame vars in shrink-wrapping code |
| |
| aarch64_layout_frame uses a shorthand for referring to |
| cfun->machine->frame: |
| |
| aarch64_frame &frame = cfun->machine->frame; |
| |
| This patch does the same for some other heavy users of the structure. |
| No functional change intended. |
| |
| gcc/ |
| * config/aarch64/aarch64.cc (aarch64_save_callee_saves): Use |
| a local shorthand for cfun->machine->frame. |
| (aarch64_restore_callee_saves, aarch64_get_separate_components): |
| (aarch64_process_components): Likewise. |
| (aarch64_allocate_and_probe_stack_space): Likewise. |
| (aarch64_expand_prologue, aarch64_expand_epilogue): Likewise. |
| (aarch64_layout_frame): Use existing shorthand for one more case. |
| --- |
| gcc/config/aarch64/aarch64.cc | 123 ++++++++++++++++++---------------- |
| 1 file changed, 64 insertions(+), 59 deletions(-) |
| |
| diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc |
| index 822a2b49a46..5d473d161d9 100644 |
| --- a/gcc/config/aarch64/aarch64.cc |
| +++ b/gcc/config/aarch64/aarch64.cc |
| @@ -8612,7 +8612,7 @@ aarch64_layout_frame (void) |
| frame.is_scs_enabled |
| = (!crtl->calls_eh_return |
| && sanitize_flags_p (SANITIZE_SHADOW_CALL_STACK) |
| - && known_ge (cfun->machine->frame.reg_offset[LR_REGNUM], 0)); |
| + && known_ge (frame.reg_offset[LR_REGNUM], 0)); |
| |
| /* When shadow call stack is enabled, the scs_pop in the epilogue will |
| restore x30, and we don't need to pop x30 again in the traditional |
| @@ -9078,6 +9078,7 @@ aarch64_save_callee_saves (poly_int64 start_offset, |
| unsigned start, unsigned limit, bool skip_wb, |
| bool hard_fp_valid_p) |
| { |
| + aarch64_frame &frame = cfun->machine->frame; |
| rtx_insn *insn; |
| unsigned regno; |
| unsigned regno2; |
| @@ -9092,8 +9093,8 @@ aarch64_save_callee_saves (poly_int64 start_offset, |
| bool frame_related_p = aarch64_emit_cfi_for_reg_p (regno); |
| |
| if (skip_wb |
| - && (regno == cfun->machine->frame.wb_push_candidate1 |
| - || regno == cfun->machine->frame.wb_push_candidate2)) |
| + && (regno == frame.wb_push_candidate1 |
| + || regno == frame.wb_push_candidate2)) |
| continue; |
| |
| if (cfun->machine->reg_is_wrapped_separately[regno]) |
| @@ -9101,7 +9102,7 @@ aarch64_save_callee_saves (poly_int64 start_offset, |
| |
| machine_mode mode = aarch64_reg_save_mode (regno); |
| reg = gen_rtx_REG (mode, regno); |
| - offset = start_offset + cfun->machine->frame.reg_offset[regno]; |
| + offset = start_offset + frame.reg_offset[regno]; |
| rtx base_rtx = stack_pointer_rtx; |
| poly_int64 sp_offset = offset; |
| |
| @@ -9114,7 +9115,7 @@ aarch64_save_callee_saves (poly_int64 start_offset, |
| { |
| gcc_assert (known_eq (start_offset, 0)); |
| poly_int64 fp_offset |
| - = cfun->machine->frame.below_hard_fp_saved_regs_size; |
| + = frame.below_hard_fp_saved_regs_size; |
| if (hard_fp_valid_p) |
| base_rtx = hard_frame_pointer_rtx; |
| else |
| @@ -9136,8 +9137,7 @@ aarch64_save_callee_saves (poly_int64 start_offset, |
| && (regno2 = aarch64_next_callee_save (regno + 1, limit)) <= limit |
| && !cfun->machine->reg_is_wrapped_separately[regno2] |
| && known_eq (GET_MODE_SIZE (mode), |
| - cfun->machine->frame.reg_offset[regno2] |
| - - cfun->machine->frame.reg_offset[regno])) |
| + frame.reg_offset[regno2] - frame.reg_offset[regno])) |
| { |
| rtx reg2 = gen_rtx_REG (mode, regno2); |
| rtx mem2; |
| @@ -9187,6 +9187,7 @@ static void |
| aarch64_restore_callee_saves (poly_int64 start_offset, unsigned start, |
| unsigned limit, bool skip_wb, rtx *cfi_ops) |
| { |
| + aarch64_frame &frame = cfun->machine->frame; |
| unsigned regno; |
| unsigned regno2; |
| poly_int64 offset; |
| @@ -9203,13 +9204,13 @@ aarch64_restore_callee_saves (poly_int64 start_offset, unsigned start, |
| rtx reg, mem; |
| |
| if (skip_wb |
| - && (regno == cfun->machine->frame.wb_pop_candidate1 |
| - || regno == cfun->machine->frame.wb_pop_candidate2)) |
| + && (regno == frame.wb_pop_candidate1 |
| + || regno == frame.wb_pop_candidate2)) |
| continue; |
| |
| machine_mode mode = aarch64_reg_save_mode (regno); |
| reg = gen_rtx_REG (mode, regno); |
| - offset = start_offset + cfun->machine->frame.reg_offset[regno]; |
| + offset = start_offset + frame.reg_offset[regno]; |
| rtx base_rtx = stack_pointer_rtx; |
| if (mode == VNx2DImode && BYTES_BIG_ENDIAN) |
| aarch64_adjust_sve_callee_save_base (mode, base_rtx, anchor_reg, |
| @@ -9220,8 +9221,7 @@ aarch64_restore_callee_saves (poly_int64 start_offset, unsigned start, |
| && (regno2 = aarch64_next_callee_save (regno + 1, limit)) <= limit |
| && !cfun->machine->reg_is_wrapped_separately[regno2] |
| && known_eq (GET_MODE_SIZE (mode), |
| - cfun->machine->frame.reg_offset[regno2] |
| - - cfun->machine->frame.reg_offset[regno])) |
| + frame.reg_offset[regno2] - frame.reg_offset[regno])) |
| { |
| rtx reg2 = gen_rtx_REG (mode, regno2); |
| rtx mem2; |
| @@ -9326,6 +9326,7 @@ offset_12bit_unsigned_scaled_p (machine_mode mode, poly_int64 offset) |
| static sbitmap |
| aarch64_get_separate_components (void) |
| { |
| + aarch64_frame &frame = cfun->machine->frame; |
| sbitmap components = sbitmap_alloc (LAST_SAVED_REGNUM + 1); |
| bitmap_clear (components); |
| |
| @@ -9342,18 +9343,18 @@ aarch64_get_separate_components (void) |
| if (mode == VNx2DImode && BYTES_BIG_ENDIAN) |
| continue; |
| |
| - poly_int64 offset = cfun->machine->frame.reg_offset[regno]; |
| + poly_int64 offset = frame.reg_offset[regno]; |
| |
| /* If the register is saved in the first SVE save slot, we use |
| it as a stack probe for -fstack-clash-protection. */ |
| if (flag_stack_clash_protection |
| - && maybe_ne (cfun->machine->frame.below_hard_fp_saved_regs_size, 0) |
| + && maybe_ne (frame.below_hard_fp_saved_regs_size, 0) |
| && known_eq (offset, 0)) |
| continue; |
| |
| /* Get the offset relative to the register we'll use. */ |
| if (frame_pointer_needed) |
| - offset -= cfun->machine->frame.below_hard_fp_saved_regs_size; |
| + offset -= frame.below_hard_fp_saved_regs_size; |
| else |
| offset += crtl->outgoing_args_size; |
| |
| @@ -9372,11 +9373,11 @@ aarch64_get_separate_components (void) |
| /* If the spare predicate register used by big-endian SVE code |
| is call-preserved, it must be saved in the main prologue |
| before any saves that use it. */ |
| - if (cfun->machine->frame.spare_pred_reg != INVALID_REGNUM) |
| - bitmap_clear_bit (components, cfun->machine->frame.spare_pred_reg); |
| + if (frame.spare_pred_reg != INVALID_REGNUM) |
| + bitmap_clear_bit (components, frame.spare_pred_reg); |
| |
| - unsigned reg1 = cfun->machine->frame.wb_push_candidate1; |
| - unsigned reg2 = cfun->machine->frame.wb_push_candidate2; |
| + unsigned reg1 = frame.wb_push_candidate1; |
| + unsigned reg2 = frame.wb_push_candidate2; |
| /* If registers have been chosen to be stored/restored with |
| writeback don't interfere with them to avoid having to output explicit |
| stack adjustment instructions. */ |
| @@ -9485,6 +9486,7 @@ aarch64_get_next_set_bit (sbitmap bmp, unsigned int start) |
| static void |
| aarch64_process_components (sbitmap components, bool prologue_p) |
| { |
| + aarch64_frame &frame = cfun->machine->frame; |
| rtx ptr_reg = gen_rtx_REG (Pmode, frame_pointer_needed |
| ? HARD_FRAME_POINTER_REGNUM |
| : STACK_POINTER_REGNUM); |
| @@ -9499,9 +9501,9 @@ aarch64_process_components (sbitmap components, bool prologue_p) |
| machine_mode mode = aarch64_reg_save_mode (regno); |
| |
| rtx reg = gen_rtx_REG (mode, regno); |
| - poly_int64 offset = cfun->machine->frame.reg_offset[regno]; |
| + poly_int64 offset = frame.reg_offset[regno]; |
| if (frame_pointer_needed) |
| - offset -= cfun->machine->frame.below_hard_fp_saved_regs_size; |
| + offset -= frame.below_hard_fp_saved_regs_size; |
| else |
| offset += crtl->outgoing_args_size; |
| |
| @@ -9526,14 +9528,14 @@ aarch64_process_components (sbitmap components, bool prologue_p) |
| break; |
| } |
| |
| - poly_int64 offset2 = cfun->machine->frame.reg_offset[regno2]; |
| + poly_int64 offset2 = frame.reg_offset[regno2]; |
| /* The next register is not of the same class or its offset is not |
| mergeable with the current one into a pair. */ |
| if (aarch64_sve_mode_p (mode) |
| || !satisfies_constraint_Ump (mem) |
| || GP_REGNUM_P (regno) != GP_REGNUM_P (regno2) |
| || (crtl->abi->id () == ARM_PCS_SIMD && FP_REGNUM_P (regno)) |
| - || maybe_ne ((offset2 - cfun->machine->frame.reg_offset[regno]), |
| + || maybe_ne ((offset2 - frame.reg_offset[regno]), |
| GET_MODE_SIZE (mode))) |
| { |
| insn = emit_insn (set); |
| @@ -9555,7 +9557,7 @@ aarch64_process_components (sbitmap components, bool prologue_p) |
| /* REGNO2 can be saved/restored in a pair with REGNO. */ |
| rtx reg2 = gen_rtx_REG (mode, regno2); |
| if (frame_pointer_needed) |
| - offset2 -= cfun->machine->frame.below_hard_fp_saved_regs_size; |
| + offset2 -= frame.below_hard_fp_saved_regs_size; |
| else |
| offset2 += crtl->outgoing_args_size; |
| rtx addr2 = plus_constant (Pmode, ptr_reg, offset2); |
| @@ -9650,6 +9652,7 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, |
| bool frame_related_p, |
| bool final_adjustment_p) |
| { |
| + aarch64_frame &frame = cfun->machine->frame; |
| HOST_WIDE_INT guard_size |
| = 1 << param_stack_clash_protection_guard_size; |
| HOST_WIDE_INT guard_used_by_caller = STACK_CLASH_CALLER_GUARD; |
| @@ -9670,25 +9673,25 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, |
| register as a probe. We can't assume that LR was saved at position 0 |
| though, so treat any space below it as unprobed. */ |
| if (final_adjustment_p |
| - && known_eq (cfun->machine->frame.below_hard_fp_saved_regs_size, 0)) |
| + && known_eq (frame.below_hard_fp_saved_regs_size, 0)) |
| { |
| - poly_int64 lr_offset = cfun->machine->frame.reg_offset[LR_REGNUM]; |
| + poly_int64 lr_offset = frame.reg_offset[LR_REGNUM]; |
| if (known_ge (lr_offset, 0)) |
| min_probe_threshold -= lr_offset.to_constant (); |
| else |
| gcc_assert (!flag_stack_clash_protection || known_eq (poly_size, 0)); |
| } |
| |
| - poly_int64 frame_size = cfun->machine->frame.frame_size; |
| + poly_int64 frame_size = frame.frame_size; |
| |
| /* We should always have a positive probe threshold. */ |
| gcc_assert (min_probe_threshold > 0); |
| |
| if (flag_stack_clash_protection && !final_adjustment_p) |
| { |
| - poly_int64 initial_adjust = cfun->machine->frame.initial_adjust; |
| - poly_int64 sve_callee_adjust = cfun->machine->frame.sve_callee_adjust; |
| - poly_int64 final_adjust = cfun->machine->frame.final_adjust; |
| + poly_int64 initial_adjust = frame.initial_adjust; |
| + poly_int64 sve_callee_adjust = frame.sve_callee_adjust; |
| + poly_int64 final_adjust = frame.final_adjust; |
| |
| if (known_eq (frame_size, 0)) |
| { |
| @@ -9977,17 +9980,18 @@ aarch64_epilogue_uses (int regno) |
| void |
| aarch64_expand_prologue (void) |
| { |
| - poly_int64 frame_size = cfun->machine->frame.frame_size; |
| - poly_int64 initial_adjust = cfun->machine->frame.initial_adjust; |
| - HOST_WIDE_INT callee_adjust = cfun->machine->frame.callee_adjust; |
| - poly_int64 final_adjust = cfun->machine->frame.final_adjust; |
| - poly_int64 callee_offset = cfun->machine->frame.callee_offset; |
| - poly_int64 sve_callee_adjust = cfun->machine->frame.sve_callee_adjust; |
| + aarch64_frame &frame = cfun->machine->frame; |
| + poly_int64 frame_size = frame.frame_size; |
| + poly_int64 initial_adjust = frame.initial_adjust; |
| + HOST_WIDE_INT callee_adjust = frame.callee_adjust; |
| + poly_int64 final_adjust = frame.final_adjust; |
| + poly_int64 callee_offset = frame.callee_offset; |
| + poly_int64 sve_callee_adjust = frame.sve_callee_adjust; |
| poly_int64 below_hard_fp_saved_regs_size |
| - = cfun->machine->frame.below_hard_fp_saved_regs_size; |
| - unsigned reg1 = cfun->machine->frame.wb_push_candidate1; |
| - unsigned reg2 = cfun->machine->frame.wb_push_candidate2; |
| - bool emit_frame_chain = cfun->machine->frame.emit_frame_chain; |
| + = frame.below_hard_fp_saved_regs_size; |
| + unsigned reg1 = frame.wb_push_candidate1; |
| + unsigned reg2 = frame.wb_push_candidate2; |
| + bool emit_frame_chain = frame.emit_frame_chain; |
| rtx_insn *insn; |
| |
| if (flag_stack_clash_protection && known_eq (callee_adjust, 0)) |
| @@ -10018,7 +10022,7 @@ aarch64_expand_prologue (void) |
| } |
| |
| /* Push return address to shadow call stack. */ |
| - if (cfun->machine->frame.is_scs_enabled) |
| + if (frame.is_scs_enabled) |
| emit_insn (gen_scs_push ()); |
| |
| if (flag_stack_usage_info) |
| @@ -10057,7 +10061,7 @@ aarch64_expand_prologue (void) |
| |
| /* The offset of the frame chain record (if any) from the current SP. */ |
| poly_int64 chain_offset = (initial_adjust + callee_adjust |
| - - cfun->machine->frame.hard_fp_offset); |
| + - frame.hard_fp_offset); |
| gcc_assert (known_ge (chain_offset, 0)); |
| |
| /* The offset of the bottom of the save area from the current SP. */ |
| @@ -10160,16 +10164,17 @@ aarch64_use_return_insn_p (void) |
| void |
| aarch64_expand_epilogue (bool for_sibcall) |
| { |
| - poly_int64 initial_adjust = cfun->machine->frame.initial_adjust; |
| - HOST_WIDE_INT callee_adjust = cfun->machine->frame.callee_adjust; |
| - poly_int64 final_adjust = cfun->machine->frame.final_adjust; |
| - poly_int64 callee_offset = cfun->machine->frame.callee_offset; |
| - poly_int64 sve_callee_adjust = cfun->machine->frame.sve_callee_adjust; |
| + aarch64_frame &frame = cfun->machine->frame; |
| + poly_int64 initial_adjust = frame.initial_adjust; |
| + HOST_WIDE_INT callee_adjust = frame.callee_adjust; |
| + poly_int64 final_adjust = frame.final_adjust; |
| + poly_int64 callee_offset = frame.callee_offset; |
| + poly_int64 sve_callee_adjust = frame.sve_callee_adjust; |
| poly_int64 below_hard_fp_saved_regs_size |
| - = cfun->machine->frame.below_hard_fp_saved_regs_size; |
| - unsigned reg1 = cfun->machine->frame.wb_pop_candidate1; |
| - unsigned reg2 = cfun->machine->frame.wb_pop_candidate2; |
| - unsigned int last_gpr = (cfun->machine->frame.is_scs_enabled |
| + = frame.below_hard_fp_saved_regs_size; |
| + unsigned reg1 = frame.wb_pop_candidate1; |
| + unsigned reg2 = frame.wb_pop_candidate2; |
| + unsigned int last_gpr = (frame.is_scs_enabled |
| ? R29_REGNUM : R30_REGNUM); |
| rtx cfi_ops = NULL; |
| rtx_insn *insn; |
| @@ -10203,7 +10208,7 @@ aarch64_expand_epilogue (bool for_sibcall) |
| /* We need to add memory barrier to prevent read from deallocated stack. */ |
| bool need_barrier_p |
| = maybe_ne (get_frame_size () |
| - + cfun->machine->frame.saved_varargs_size, 0); |
| + + frame.saved_varargs_size, 0); |
| |
| /* Emit a barrier to prevent loads from a deallocated stack. */ |
| if (maybe_gt (final_adjust, crtl->outgoing_args_size) |
| @@ -10284,7 +10289,7 @@ aarch64_expand_epilogue (bool for_sibcall) |
| } |
| |
| /* Pop return address from shadow call stack. */ |
| - if (cfun->machine->frame.is_scs_enabled) |
| + if (frame.is_scs_enabled) |
| { |
| machine_mode mode = aarch64_reg_save_mode (R30_REGNUM); |
| rtx reg = gen_rtx_REG (mode, R30_REGNUM); |
| @@ -12740,24 +12745,24 @@ aarch64_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to) |
| poly_int64 |
| aarch64_initial_elimination_offset (unsigned from, unsigned to) |
| { |
| + aarch64_frame &frame = cfun->machine->frame; |
| + |
| if (to == HARD_FRAME_POINTER_REGNUM) |
| { |
| if (from == ARG_POINTER_REGNUM) |
| - return cfun->machine->frame.hard_fp_offset; |
| + return frame.hard_fp_offset; |
| |
| if (from == FRAME_POINTER_REGNUM) |
| - return cfun->machine->frame.hard_fp_offset |
| - - cfun->machine->frame.locals_offset; |
| + return frame.hard_fp_offset - frame.locals_offset; |
| } |
| |
| if (to == STACK_POINTER_REGNUM) |
| { |
| if (from == FRAME_POINTER_REGNUM) |
| - return cfun->machine->frame.frame_size |
| - - cfun->machine->frame.locals_offset; |
| + return frame.frame_size - frame.locals_offset; |
| } |
| |
| - return cfun->machine->frame.frame_size; |
| + return frame.frame_size; |
| } |
| |
| |
| -- |
| 2.34.1 |
| |
| |
| From 89a9fa287706c5011f61926eaf65e7b996b963a3 Mon Sep 17 00:00:00 2001 |
| From: Richard Sandiford <richard.sandiford@arm.com> |
| Date: Tue, 12 Sep 2023 16:07:12 +0100 |
| Subject: [PATCH 02/19] aarch64: Avoid a use of callee_offset |
| |
| When we emit the frame chain, i.e. when we reach Here in this statement |
| of aarch64_expand_prologue: |
| |
| if (emit_frame_chain) |
| { |
| // Here |
| ... |
| } |
| |
| the stack is in one of two states: |
| |
| - We've allocated up to the frame chain, but no more. |
| |
| - We've allocated the whole frame, and the frame chain is within easy |
| reach of the new SP. |
| |
| The offset of the frame chain from the current SP is available |
| in aarch64_frame as callee_offset. It is also available as the |
| chain_offset local variable, where the latter is calculated from other |
| data. (However, chain_offset is not always equal to callee_offset when |
| !emit_frame_chain, so chain_offset isn't redundant.) |
| |
| In c600df9a4060da3c6121ff4d0b93f179eafd69d1 I switched to using |
| chain_offset for the initialisation of the hard frame pointer: |
| |
| aarch64_add_offset (Pmode, hard_frame_pointer_rtx, |
| - stack_pointer_rtx, callee_offset, |
| + stack_pointer_rtx, chain_offset, |
| tmp1_rtx, tmp0_rtx, frame_pointer_needed); |
| |
| But the later REG_CFA_ADJUST_CFA handling still used callee_offset. |
| |
| I think the difference is harmless, but it's more logical for the |
| CFA note to be in sync, and it's more convenient for later patches |
| if it uses chain_offset. |
| |
| gcc/ |
| * config/aarch64/aarch64.cc (aarch64_expand_prologue): Use |
| chain_offset rather than callee_offset. |
| --- |
| gcc/config/aarch64/aarch64.cc | 4 +--- |
| 1 file changed, 1 insertion(+), 3 deletions(-) |
| |
| diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc |
| index 5d473d161d9..4f233c95140 100644 |
| --- a/gcc/config/aarch64/aarch64.cc |
| +++ b/gcc/config/aarch64/aarch64.cc |
| @@ -9985,7 +9985,6 @@ aarch64_expand_prologue (void) |
| poly_int64 initial_adjust = frame.initial_adjust; |
| HOST_WIDE_INT callee_adjust = frame.callee_adjust; |
| poly_int64 final_adjust = frame.final_adjust; |
| - poly_int64 callee_offset = frame.callee_offset; |
| poly_int64 sve_callee_adjust = frame.sve_callee_adjust; |
| poly_int64 below_hard_fp_saved_regs_size |
| = frame.below_hard_fp_saved_regs_size; |
| @@ -10098,8 +10097,7 @@ aarch64_expand_prologue (void) |
| implicit. */ |
| if (!find_reg_note (insn, REG_CFA_ADJUST_CFA, NULL_RTX)) |
| { |
| - rtx src = plus_constant (Pmode, stack_pointer_rtx, |
| - callee_offset); |
| + rtx src = plus_constant (Pmode, stack_pointer_rtx, chain_offset); |
| add_reg_note (insn, REG_CFA_ADJUST_CFA, |
| gen_rtx_SET (hard_frame_pointer_rtx, src)); |
| } |
| -- |
| 2.34.1 |
| |
| |
| From b36a2a78040722dab6124366c5d6baf8eaf80aef Mon Sep 17 00:00:00 2001 |
| From: Richard Sandiford <richard.sandiford@arm.com> |
| Date: Tue, 12 Sep 2023 16:07:13 +0100 |
| Subject: [PATCH 03/19] aarch64: Explicitly handle frames with no saved |
| registers |
| |
| If a frame has no saved registers, it can be allocated in one go. |
| There is no need to treat the areas below and above the saved |
| registers as separate. |
| |
| And if we allocate the frame in one go, it should be allocated |
| as the initial_adjust rather than the final_adjust. This allows the |
| frame size to grow to guard_size - guard_used_by_caller before a stack |
| probe is needed. (A frame with no register saves is necessarily a |
| leaf frame.) |
| |
| This is a no-op as thing stand, since a leaf function will have |
| no outgoing arguments, and so all the frame will be above where |
| the saved registers normally go. |
| |
| gcc/ |
| * config/aarch64/aarch64.cc (aarch64_layout_frame): Explicitly |
| allocate the frame in one go if there are no saved registers. |
| --- |
| gcc/config/aarch64/aarch64.cc | 8 +++++--- |
| 1 file changed, 5 insertions(+), 3 deletions(-) |
| |
| diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc |
| index 4f233c95140..37643041ffb 100644 |
| --- a/gcc/config/aarch64/aarch64.cc |
| +++ b/gcc/config/aarch64/aarch64.cc |
| @@ -8639,9 +8639,11 @@ aarch64_layout_frame (void) |
| |
| HOST_WIDE_INT const_size, const_outgoing_args_size, const_fp_offset; |
| HOST_WIDE_INT const_saved_regs_size; |
| - if (frame.frame_size.is_constant (&const_size) |
| - && const_size < max_push_offset |
| - && known_eq (frame.hard_fp_offset, const_size)) |
| + if (known_eq (frame.saved_regs_size, 0)) |
| + frame.initial_adjust = frame.frame_size; |
| + else if (frame.frame_size.is_constant (&const_size) |
| + && const_size < max_push_offset |
| + && known_eq (frame.hard_fp_offset, const_size)) |
| { |
| /* Simple, small frame with no outgoing arguments: |
| |
| -- |
| 2.34.1 |
| |
| |
| From ada2ab0093596be707f23a3466ac82cff59fcffe Mon Sep 17 00:00:00 2001 |
| From: Richard Sandiford <richard.sandiford@arm.com> |
| Date: Tue, 12 Sep 2023 16:07:13 +0100 |
| Subject: [PATCH 04/19] aarch64: Add bytes_below_saved_regs to frame info |
| |
| The frame layout code currently hard-codes the assumption that |
| the number of bytes below the saved registers is equal to the |
| size of the outgoing arguments. This patch abstracts that |
| value into a new field of aarch64_frame. |
| |
| gcc/ |
| * config/aarch64/aarch64.h (aarch64_frame::bytes_below_saved_regs): New |
| field. |
| * config/aarch64/aarch64.cc (aarch64_layout_frame): Initialize it, |
| and use it instead of crtl->outgoing_args_size. |
| (aarch64_get_separate_components): Use bytes_below_saved_regs instead |
| of outgoing_args_size. |
| (aarch64_process_components): Likewise. |
| --- |
| gcc/config/aarch64/aarch64.cc | 71 ++++++++++++++++++----------------- |
| gcc/config/aarch64/aarch64.h | 5 +++ |
| 2 files changed, 41 insertions(+), 35 deletions(-) |
| |
| diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc |
| index 37643041ffb..dacc2b0e4dd 100644 |
| --- a/gcc/config/aarch64/aarch64.cc |
| +++ b/gcc/config/aarch64/aarch64.cc |
| @@ -8478,6 +8478,8 @@ aarch64_layout_frame (void) |
| gcc_assert (crtl->is_leaf |
| || maybe_ne (frame.reg_offset[R30_REGNUM], SLOT_NOT_REQUIRED)); |
| |
| + frame.bytes_below_saved_regs = crtl->outgoing_args_size; |
| + |
| /* Now assign stack slots for the registers. Start with the predicate |
| registers, since predicate LDR and STR have a relatively small |
| offset range. These saves happen below the hard frame pointer. */ |
| @@ -8582,18 +8584,18 @@ aarch64_layout_frame (void) |
| |
| poly_int64 varargs_and_saved_regs_size = offset + frame.saved_varargs_size; |
| |
| - poly_int64 above_outgoing_args |
| + poly_int64 saved_regs_and_above |
| = aligned_upper_bound (varargs_and_saved_regs_size |
| + get_frame_size (), |
| STACK_BOUNDARY / BITS_PER_UNIT); |
| |
| frame.hard_fp_offset |
| - = above_outgoing_args - frame.below_hard_fp_saved_regs_size; |
| + = saved_regs_and_above - frame.below_hard_fp_saved_regs_size; |
| |
| /* Both these values are already aligned. */ |
| - gcc_assert (multiple_p (crtl->outgoing_args_size, |
| + gcc_assert (multiple_p (frame.bytes_below_saved_regs, |
| STACK_BOUNDARY / BITS_PER_UNIT)); |
| - frame.frame_size = above_outgoing_args + crtl->outgoing_args_size; |
| + frame.frame_size = saved_regs_and_above + frame.bytes_below_saved_regs; |
| |
| frame.locals_offset = frame.saved_varargs_size; |
| |
| @@ -8637,7 +8639,7 @@ aarch64_layout_frame (void) |
| else if (frame.wb_pop_candidate1 != INVALID_REGNUM) |
| max_push_offset = 256; |
| |
| - HOST_WIDE_INT const_size, const_outgoing_args_size, const_fp_offset; |
| + HOST_WIDE_INT const_size, const_below_saved_regs, const_fp_offset; |
| HOST_WIDE_INT const_saved_regs_size; |
| if (known_eq (frame.saved_regs_size, 0)) |
| frame.initial_adjust = frame.frame_size; |
| @@ -8645,31 +8647,31 @@ aarch64_layout_frame (void) |
| && const_size < max_push_offset |
| && known_eq (frame.hard_fp_offset, const_size)) |
| { |
| - /* Simple, small frame with no outgoing arguments: |
| + /* Simple, small frame with no data below the saved registers. |
| |
| stp reg1, reg2, [sp, -frame_size]! |
| stp reg3, reg4, [sp, 16] */ |
| frame.callee_adjust = const_size; |
| } |
| - else if (crtl->outgoing_args_size.is_constant (&const_outgoing_args_size) |
| + else if (frame.bytes_below_saved_regs.is_constant (&const_below_saved_regs) |
| && frame.saved_regs_size.is_constant (&const_saved_regs_size) |
| - && const_outgoing_args_size + const_saved_regs_size < 512 |
| - /* We could handle this case even with outgoing args, provided |
| - that the number of args left us with valid offsets for all |
| - predicate and vector save slots. It's such a rare case that |
| - it hardly seems worth the effort though. */ |
| - && (!saves_below_hard_fp_p || const_outgoing_args_size == 0) |
| + && const_below_saved_regs + const_saved_regs_size < 512 |
| + /* We could handle this case even with data below the saved |
| + registers, provided that that data left us with valid offsets |
| + for all predicate and vector save slots. It's such a rare |
| + case that it hardly seems worth the effort though. */ |
| + && (!saves_below_hard_fp_p || const_below_saved_regs == 0) |
| && !(cfun->calls_alloca |
| && frame.hard_fp_offset.is_constant (&const_fp_offset) |
| && const_fp_offset < max_push_offset)) |
| { |
| - /* Frame with small outgoing arguments: |
| + /* Frame with small area below the saved registers: |
| |
| sub sp, sp, frame_size |
| - stp reg1, reg2, [sp, outgoing_args_size] |
| - stp reg3, reg4, [sp, outgoing_args_size + 16] */ |
| + stp reg1, reg2, [sp, bytes_below_saved_regs] |
| + stp reg3, reg4, [sp, bytes_below_saved_regs + 16] */ |
| frame.initial_adjust = frame.frame_size; |
| - frame.callee_offset = const_outgoing_args_size; |
| + frame.callee_offset = const_below_saved_regs; |
| } |
| else if (saves_below_hard_fp_p |
| && known_eq (frame.saved_regs_size, |
| @@ -8679,30 +8681,29 @@ aarch64_layout_frame (void) |
| |
| sub sp, sp, hard_fp_offset + below_hard_fp_saved_regs_size |
| save SVE registers relative to SP |
| - sub sp, sp, outgoing_args_size */ |
| + sub sp, sp, bytes_below_saved_regs */ |
| frame.initial_adjust = (frame.hard_fp_offset |
| + frame.below_hard_fp_saved_regs_size); |
| - frame.final_adjust = crtl->outgoing_args_size; |
| + frame.final_adjust = frame.bytes_below_saved_regs; |
| } |
| else if (frame.hard_fp_offset.is_constant (&const_fp_offset) |
| && const_fp_offset < max_push_offset) |
| { |
| - /* Frame with large outgoing arguments or SVE saves, but with |
| - a small local area: |
| + /* Frame with large area below the saved registers, or with SVE saves, |
| + but with a small area above: |
| |
| stp reg1, reg2, [sp, -hard_fp_offset]! |
| stp reg3, reg4, [sp, 16] |
| [sub sp, sp, below_hard_fp_saved_regs_size] |
| [save SVE registers relative to SP] |
| - sub sp, sp, outgoing_args_size */ |
| + sub sp, sp, bytes_below_saved_regs */ |
| frame.callee_adjust = const_fp_offset; |
| frame.sve_callee_adjust = frame.below_hard_fp_saved_regs_size; |
| - frame.final_adjust = crtl->outgoing_args_size; |
| + frame.final_adjust = frame.bytes_below_saved_regs; |
| } |
| else |
| { |
| - /* Frame with large local area and outgoing arguments or SVE saves, |
| - using frame pointer: |
| + /* General case: |
| |
| sub sp, sp, hard_fp_offset |
| stp x29, x30, [sp, 0] |
| @@ -8710,10 +8711,10 @@ aarch64_layout_frame (void) |
| stp reg3, reg4, [sp, 16] |
| [sub sp, sp, below_hard_fp_saved_regs_size] |
| [save SVE registers relative to SP] |
| - sub sp, sp, outgoing_args_size */ |
| + sub sp, sp, bytes_below_saved_regs */ |
| frame.initial_adjust = frame.hard_fp_offset; |
| frame.sve_callee_adjust = frame.below_hard_fp_saved_regs_size; |
| - frame.final_adjust = crtl->outgoing_args_size; |
| + frame.final_adjust = frame.bytes_below_saved_regs; |
| } |
| |
| /* Make sure the individual adjustments add up to the full frame size. */ |
| @@ -9358,7 +9359,7 @@ aarch64_get_separate_components (void) |
| if (frame_pointer_needed) |
| offset -= frame.below_hard_fp_saved_regs_size; |
| else |
| - offset += crtl->outgoing_args_size; |
| + offset += frame.bytes_below_saved_regs; |
| |
| /* Check that we can access the stack slot of the register with one |
| direct load with no adjustments needed. */ |
| @@ -9507,7 +9508,7 @@ aarch64_process_components (sbitmap components, bool prologue_p) |
| if (frame_pointer_needed) |
| offset -= frame.below_hard_fp_saved_regs_size; |
| else |
| - offset += crtl->outgoing_args_size; |
| + offset += frame.bytes_below_saved_regs; |
| |
| rtx addr = plus_constant (Pmode, ptr_reg, offset); |
| rtx mem = gen_frame_mem (mode, addr); |
| @@ -9561,7 +9562,7 @@ aarch64_process_components (sbitmap components, bool prologue_p) |
| if (frame_pointer_needed) |
| offset2 -= frame.below_hard_fp_saved_regs_size; |
| else |
| - offset2 += crtl->outgoing_args_size; |
| + offset2 += frame.bytes_below_saved_regs; |
| rtx addr2 = plus_constant (Pmode, ptr_reg, offset2); |
| rtx mem2 = gen_frame_mem (mode, addr2); |
| rtx set2 = prologue_p ? gen_rtx_SET (mem2, reg2) |
| @@ -9635,10 +9636,10 @@ aarch64_stack_clash_protection_alloca_probe_range (void) |
| registers. If POLY_SIZE is not large enough to require a probe this function |
| will only adjust the stack. When allocating the stack space |
| FRAME_RELATED_P is then used to indicate if the allocation is frame related. |
| - FINAL_ADJUSTMENT_P indicates whether we are allocating the outgoing |
| - arguments. If we are then we ensure that any allocation larger than the ABI |
| - defined buffer needs a probe so that the invariant of having a 1KB buffer is |
| - maintained. |
| + FINAL_ADJUSTMENT_P indicates whether we are allocating the area below |
| + the saved registers. If we are then we ensure that any allocation |
| + larger than the ABI defined buffer needs a probe so that the |
| + invariant of having a 1KB buffer is maintained. |
| |
| We emit barriers after each stack adjustment to prevent optimizations from |
| breaking the invariant that we never drop the stack more than a page. This |
| @@ -9847,7 +9848,7 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, |
| /* Handle any residuals. Residuals of at least MIN_PROBE_THRESHOLD have to |
| be probed. This maintains the requirement that each page is probed at |
| least once. For initial probing we probe only if the allocation is |
| - more than GUARD_SIZE - buffer, and for the outgoing arguments we probe |
| + more than GUARD_SIZE - buffer, and below the saved registers we probe |
| if the amount is larger than buffer. GUARD_SIZE - buffer + buffer == |
| GUARD_SIZE. This works that for any allocation that is large enough to |
| trigger a probe here, we'll have at least one, and if they're not large |
| diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h |
| index 73b09e20508..0b6faa3ddf1 100644 |
| --- a/gcc/config/aarch64/aarch64.h |
| +++ b/gcc/config/aarch64/aarch64.h |
| @@ -777,6 +777,11 @@ struct GTY (()) aarch64_frame |
| /* The size of the callee-save registers with a slot in REG_OFFSET. */ |
| poly_int64 saved_regs_size; |
| |
| + /* The number of bytes between the bottom of the static frame (the bottom |
| + of the outgoing arguments) and the bottom of the register save area. |
| + This value is always a multiple of STACK_BOUNDARY. */ |
| + poly_int64 bytes_below_saved_regs; |
| + |
| /* The size of the callee-save registers with a slot in REG_OFFSET that |
| are saved below the hard frame pointer. */ |
| poly_int64 below_hard_fp_saved_regs_size; |
| -- |
| 2.34.1 |
| |
| |
| From 82f6b3e1b596ef0f4e3ac3bb9c6e88fb4458f402 Mon Sep 17 00:00:00 2001 |
| From: Richard Sandiford <richard.sandiford@arm.com> |
| Date: Tue, 12 Sep 2023 16:07:14 +0100 |
| Subject: [PATCH 05/19] aarch64: Add bytes_below_hard_fp to frame info |
| |
| Following on from the previous bytes_below_saved_regs patch, this one |
| records the number of bytes that are below the hard frame pointer. |
| This eventually replaces below_hard_fp_saved_regs_size. |
| |
| If a frame pointer is not needed, the epilogue adds final_adjust |
| to the stack pointer before restoring registers: |
| |
| aarch64_add_sp (tmp1_rtx, tmp0_rtx, final_adjust, true); |
| |
| Therefore, if the epilogue needs to restore the stack pointer from |
| the hard frame pointer, the directly corresponding offset is: |
| |
| -bytes_below_hard_fp + final_adjust |
| |
| i.e. go from the hard frame pointer to the bottom of the frame, |
| then add the same amount as if we were using the stack pointer |
| from the outset. |
| |
| gcc/ |
| * config/aarch64/aarch64.h (aarch64_frame::bytes_below_hard_fp): New |
| field. |
| * config/aarch64/aarch64.cc (aarch64_layout_frame): Initialize it. |
| (aarch64_expand_epilogue): Use it instead of |
| below_hard_fp_saved_regs_size. |
| --- |
| gcc/config/aarch64/aarch64.cc | 6 +++--- |
| gcc/config/aarch64/aarch64.h | 5 +++++ |
| 2 files changed, 8 insertions(+), 3 deletions(-) |
| |
| diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc |
| index dacc2b0e4dd..a3f7aabcc59 100644 |
| --- a/gcc/config/aarch64/aarch64.cc |
| +++ b/gcc/config/aarch64/aarch64.cc |
| @@ -8530,6 +8530,7 @@ aarch64_layout_frame (void) |
| of the callee save area. */ |
| bool saves_below_hard_fp_p = maybe_ne (offset, 0); |
| frame.below_hard_fp_saved_regs_size = offset; |
| + frame.bytes_below_hard_fp = offset + frame.bytes_below_saved_regs; |
| if (frame.emit_frame_chain) |
| { |
| /* FP and LR are placed in the linkage record. */ |
| @@ -10171,8 +10172,7 @@ aarch64_expand_epilogue (bool for_sibcall) |
| poly_int64 final_adjust = frame.final_adjust; |
| poly_int64 callee_offset = frame.callee_offset; |
| poly_int64 sve_callee_adjust = frame.sve_callee_adjust; |
| - poly_int64 below_hard_fp_saved_regs_size |
| - = frame.below_hard_fp_saved_regs_size; |
| + poly_int64 bytes_below_hard_fp = frame.bytes_below_hard_fp; |
| unsigned reg1 = frame.wb_pop_candidate1; |
| unsigned reg2 = frame.wb_pop_candidate2; |
| unsigned int last_gpr = (frame.is_scs_enabled |
| @@ -10230,7 +10230,7 @@ aarch64_expand_epilogue (bool for_sibcall) |
| is restored on the instruction doing the writeback. */ |
| aarch64_add_offset (Pmode, stack_pointer_rtx, |
| hard_frame_pointer_rtx, |
| - -callee_offset - below_hard_fp_saved_regs_size, |
| + -bytes_below_hard_fp + final_adjust, |
| tmp1_rtx, tmp0_rtx, callee_adjust == 0); |
| else |
| /* The case where we need to re-use the register here is very rare, so |
| diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h |
| index 0b6faa3ddf1..4263d29d29d 100644 |
| --- a/gcc/config/aarch64/aarch64.h |
| +++ b/gcc/config/aarch64/aarch64.h |
| @@ -786,6 +786,11 @@ struct GTY (()) aarch64_frame |
| are saved below the hard frame pointer. */ |
| poly_int64 below_hard_fp_saved_regs_size; |
| |
| + /* The number of bytes between the bottom of the static frame (the bottom |
| + of the outgoing arguments) and the hard frame pointer. This value is |
| + always a multiple of STACK_BOUNDARY. */ |
| + poly_int64 bytes_below_hard_fp; |
| + |
| /* Offset from the base of the frame (incomming SP) to the |
| top of the locals area. This value is always a multiple of |
| STACK_BOUNDARY. */ |
| -- |
| 2.34.1 |
| |
| |
| From 86fa43e9fe4a8bf954f2919f07cbe3646d1d1df3 Mon Sep 17 00:00:00 2001 |
| From: Richard Sandiford <richard.sandiford@arm.com> |
| Date: Tue, 12 Sep 2023 16:07:14 +0100 |
| Subject: [PATCH 06/19] aarch64: Tweak aarch64_save/restore_callee_saves |
| |
| aarch64_save_callee_saves and aarch64_restore_callee_saves took |
| a parameter called start_offset that gives the offset of the |
| bottom of the saved register area from the current stack pointer. |
| However, it's more convenient for later patches if we use the |
| bottom of the entire frame as the reference point, rather than |
| the bottom of the saved registers. |
| |
| Doing that removes the need for the callee_offset field. |
| Other than that, this is not a win on its own. It only really |
| makes sense in combination with the follow-on patches. |
| |
| gcc/ |
| * config/aarch64/aarch64.h (aarch64_frame::callee_offset): Delete. |
| * config/aarch64/aarch64.cc (aarch64_layout_frame): Remove |
| callee_offset handling. |
| (aarch64_save_callee_saves): Replace the start_offset parameter |
| with a bytes_below_sp parameter. |
| (aarch64_restore_callee_saves): Likewise. |
| (aarch64_expand_prologue): Update accordingly. |
| (aarch64_expand_epilogue): Likewise. |
| --- |
| gcc/config/aarch64/aarch64.cc | 56 +++++++++++++++++------------------ |
| gcc/config/aarch64/aarch64.h | 4 --- |
| 2 files changed, 28 insertions(+), 32 deletions(-) |
| |
| diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc |
| index a3f7aabcc59..46ae5cf7673 100644 |
| --- a/gcc/config/aarch64/aarch64.cc |
| +++ b/gcc/config/aarch64/aarch64.cc |
| @@ -8604,7 +8604,6 @@ aarch64_layout_frame (void) |
| frame.final_adjust = 0; |
| frame.callee_adjust = 0; |
| frame.sve_callee_adjust = 0; |
| - frame.callee_offset = 0; |
| |
| frame.wb_pop_candidate1 = frame.wb_push_candidate1; |
| frame.wb_pop_candidate2 = frame.wb_push_candidate2; |
| @@ -8672,7 +8671,6 @@ aarch64_layout_frame (void) |
| stp reg1, reg2, [sp, bytes_below_saved_regs] |
| stp reg3, reg4, [sp, bytes_below_saved_regs + 16] */ |
| frame.initial_adjust = frame.frame_size; |
| - frame.callee_offset = const_below_saved_regs; |
| } |
| else if (saves_below_hard_fp_p |
| && known_eq (frame.saved_regs_size, |
| @@ -9073,12 +9071,13 @@ aarch64_add_cfa_expression (rtx_insn *insn, rtx reg, |
| } |
| |
| /* Emit code to save the callee-saved registers from register number START |
| - to LIMIT to the stack at the location starting at offset START_OFFSET, |
| - skipping any write-back candidates if SKIP_WB is true. HARD_FP_VALID_P |
| - is true if the hard frame pointer has been set up. */ |
| + to LIMIT to the stack. The stack pointer is currently BYTES_BELOW_SP |
| + bytes above the bottom of the static frame. Skip any write-back |
| + candidates if SKIP_WB is true. HARD_FP_VALID_P is true if the hard |
| + frame pointer has been set up. */ |
| |
| static void |
| -aarch64_save_callee_saves (poly_int64 start_offset, |
| +aarch64_save_callee_saves (poly_int64 bytes_below_sp, |
| unsigned start, unsigned limit, bool skip_wb, |
| bool hard_fp_valid_p) |
| { |
| @@ -9106,7 +9105,9 @@ aarch64_save_callee_saves (poly_int64 start_offset, |
| |
| machine_mode mode = aarch64_reg_save_mode (regno); |
| reg = gen_rtx_REG (mode, regno); |
| - offset = start_offset + frame.reg_offset[regno]; |
| + offset = (frame.reg_offset[regno] |
| + + frame.bytes_below_saved_regs |
| + - bytes_below_sp); |
| rtx base_rtx = stack_pointer_rtx; |
| poly_int64 sp_offset = offset; |
| |
| @@ -9117,9 +9118,7 @@ aarch64_save_callee_saves (poly_int64 start_offset, |
| else if (GP_REGNUM_P (regno) |
| && (!offset.is_constant (&const_offset) || const_offset >= 512)) |
| { |
| - gcc_assert (known_eq (start_offset, 0)); |
| - poly_int64 fp_offset |
| - = frame.below_hard_fp_saved_regs_size; |
| + poly_int64 fp_offset = frame.bytes_below_hard_fp - bytes_below_sp; |
| if (hard_fp_valid_p) |
| base_rtx = hard_frame_pointer_rtx; |
| else |
| @@ -9183,12 +9182,13 @@ aarch64_save_callee_saves (poly_int64 start_offset, |
| } |
| |
| /* Emit code to restore the callee registers from register number START |
| - up to and including LIMIT. Restore from the stack offset START_OFFSET, |
| - skipping any write-back candidates if SKIP_WB is true. Write the |
| - appropriate REG_CFA_RESTORE notes into CFI_OPS. */ |
| + up to and including LIMIT. The stack pointer is currently BYTES_BELOW_SP |
| + bytes above the bottom of the static frame. Skip any write-back |
| + candidates if SKIP_WB is true. Write the appropriate REG_CFA_RESTORE |
| + notes into CFI_OPS. */ |
| |
| static void |
| -aarch64_restore_callee_saves (poly_int64 start_offset, unsigned start, |
| +aarch64_restore_callee_saves (poly_int64 bytes_below_sp, unsigned start, |
| unsigned limit, bool skip_wb, rtx *cfi_ops) |
| { |
| aarch64_frame &frame = cfun->machine->frame; |
| @@ -9214,7 +9214,9 @@ aarch64_restore_callee_saves (poly_int64 start_offset, unsigned start, |
| |
| machine_mode mode = aarch64_reg_save_mode (regno); |
| reg = gen_rtx_REG (mode, regno); |
| - offset = start_offset + frame.reg_offset[regno]; |
| + offset = (frame.reg_offset[regno] |
| + + frame.bytes_below_saved_regs |
| + - bytes_below_sp); |
| rtx base_rtx = stack_pointer_rtx; |
| if (mode == VNx2DImode && BYTES_BIG_ENDIAN) |
| aarch64_adjust_sve_callee_save_base (mode, base_rtx, anchor_reg, |
| @@ -9990,8 +9992,6 @@ aarch64_expand_prologue (void) |
| HOST_WIDE_INT callee_adjust = frame.callee_adjust; |
| poly_int64 final_adjust = frame.final_adjust; |
| poly_int64 sve_callee_adjust = frame.sve_callee_adjust; |
| - poly_int64 below_hard_fp_saved_regs_size |
| - = frame.below_hard_fp_saved_regs_size; |
| unsigned reg1 = frame.wb_push_candidate1; |
| unsigned reg2 = frame.wb_push_candidate2; |
| bool emit_frame_chain = frame.emit_frame_chain; |
| @@ -10067,8 +10067,8 @@ aarch64_expand_prologue (void) |
| - frame.hard_fp_offset); |
| gcc_assert (known_ge (chain_offset, 0)); |
| |
| - /* The offset of the bottom of the save area from the current SP. */ |
| - poly_int64 saved_regs_offset = chain_offset - below_hard_fp_saved_regs_size; |
| + /* The offset of the current SP from the bottom of the static frame. */ |
| + poly_int64 bytes_below_sp = frame_size - initial_adjust - callee_adjust; |
| |
| if (emit_frame_chain) |
| { |
| @@ -10076,7 +10076,7 @@ aarch64_expand_prologue (void) |
| { |
| reg1 = R29_REGNUM; |
| reg2 = R30_REGNUM; |
| - aarch64_save_callee_saves (saved_regs_offset, reg1, reg2, |
| + aarch64_save_callee_saves (bytes_below_sp, reg1, reg2, |
| false, false); |
| } |
| else |
| @@ -10116,7 +10116,7 @@ aarch64_expand_prologue (void) |
| emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx)); |
| } |
| |
| - aarch64_save_callee_saves (saved_regs_offset, R0_REGNUM, R30_REGNUM, |
| + aarch64_save_callee_saves (bytes_below_sp, R0_REGNUM, R30_REGNUM, |
| callee_adjust != 0 || emit_frame_chain, |
| emit_frame_chain); |
| if (maybe_ne (sve_callee_adjust, 0)) |
| @@ -10126,16 +10126,17 @@ aarch64_expand_prologue (void) |
| aarch64_allocate_and_probe_stack_space (tmp1_rtx, tmp0_rtx, |
| sve_callee_adjust, |
| !frame_pointer_needed, false); |
| - saved_regs_offset += sve_callee_adjust; |
| + bytes_below_sp -= sve_callee_adjust; |
| } |
| - aarch64_save_callee_saves (saved_regs_offset, P0_REGNUM, P15_REGNUM, |
| + aarch64_save_callee_saves (bytes_below_sp, P0_REGNUM, P15_REGNUM, |
| false, emit_frame_chain); |
| - aarch64_save_callee_saves (saved_regs_offset, V0_REGNUM, V31_REGNUM, |
| + aarch64_save_callee_saves (bytes_below_sp, V0_REGNUM, V31_REGNUM, |
| callee_adjust != 0 || emit_frame_chain, |
| emit_frame_chain); |
| |
| /* We may need to probe the final adjustment if it is larger than the guard |
| that is assumed by the called. */ |
| + gcc_assert (known_eq (bytes_below_sp, final_adjust)); |
| aarch64_allocate_and_probe_stack_space (tmp1_rtx, tmp0_rtx, final_adjust, |
| !frame_pointer_needed, true); |
| } |
| @@ -10170,7 +10171,6 @@ aarch64_expand_epilogue (bool for_sibcall) |
| poly_int64 initial_adjust = frame.initial_adjust; |
| HOST_WIDE_INT callee_adjust = frame.callee_adjust; |
| poly_int64 final_adjust = frame.final_adjust; |
| - poly_int64 callee_offset = frame.callee_offset; |
| poly_int64 sve_callee_adjust = frame.sve_callee_adjust; |
| poly_int64 bytes_below_hard_fp = frame.bytes_below_hard_fp; |
| unsigned reg1 = frame.wb_pop_candidate1; |
| @@ -10240,9 +10240,9 @@ aarch64_expand_epilogue (bool for_sibcall) |
| |
| /* Restore the vector registers before the predicate registers, |
| so that we can use P4 as a temporary for big-endian SVE frames. */ |
| - aarch64_restore_callee_saves (callee_offset, V0_REGNUM, V31_REGNUM, |
| + aarch64_restore_callee_saves (final_adjust, V0_REGNUM, V31_REGNUM, |
| callee_adjust != 0, &cfi_ops); |
| - aarch64_restore_callee_saves (callee_offset, P0_REGNUM, P15_REGNUM, |
| + aarch64_restore_callee_saves (final_adjust, P0_REGNUM, P15_REGNUM, |
| false, &cfi_ops); |
| if (maybe_ne (sve_callee_adjust, 0)) |
| aarch64_add_sp (NULL_RTX, NULL_RTX, sve_callee_adjust, true); |
| @@ -10250,7 +10250,7 @@ aarch64_expand_epilogue (bool for_sibcall) |
| /* When shadow call stack is enabled, the scs_pop in the epilogue will |
| restore x30, we don't need to restore x30 again in the traditional |
| way. */ |
| - aarch64_restore_callee_saves (callee_offset - sve_callee_adjust, |
| + aarch64_restore_callee_saves (final_adjust + sve_callee_adjust, |
| R0_REGNUM, last_gpr, |
| callee_adjust != 0, &cfi_ops); |
| |
| diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h |
| index 4263d29d29d..fd820b1be4e 100644 |
| --- a/gcc/config/aarch64/aarch64.h |
| +++ b/gcc/config/aarch64/aarch64.h |
| @@ -813,10 +813,6 @@ struct GTY (()) aarch64_frame |
| It is zero when no push is used. */ |
| HOST_WIDE_INT callee_adjust; |
| |
| - /* The offset from SP to the callee-save registers after initial_adjust. |
| - It may be non-zero if no push is used (ie. callee_adjust == 0). */ |
| - poly_int64 callee_offset; |
| - |
| /* The size of the stack adjustment before saving or after restoring |
| SVE registers. */ |
| poly_int64 sve_callee_adjust; |
| -- |
| 2.34.1 |
| |
| |
| From 8ae9181426f2700c2e5a2909487fa630e6fa406b Mon Sep 17 00:00:00 2001 |
| From: Richard Sandiford <richard.sandiford@arm.com> |
| Date: Tue, 12 Sep 2023 16:07:15 +0100 |
| Subject: [PATCH 07/19] aarch64: Only calculate chain_offset if there is a |
| chain |
| |
| After previous patches, it is no longer necessary to calculate |
| a chain_offset in cases where there is no chain record. |
| |
| gcc/ |
| * config/aarch64/aarch64.cc (aarch64_expand_prologue): Move the |
| calculation of chain_offset into the emit_frame_chain block. |
| --- |
| gcc/config/aarch64/aarch64.cc | 10 +++++----- |
| 1 file changed, 5 insertions(+), 5 deletions(-) |
| |
| diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc |
| index 46ae5cf7673..0e9b9717c08 100644 |
| --- a/gcc/config/aarch64/aarch64.cc |
| +++ b/gcc/config/aarch64/aarch64.cc |
| @@ -10062,16 +10062,16 @@ aarch64_expand_prologue (void) |
| if (callee_adjust != 0) |
| aarch64_push_regs (reg1, reg2, callee_adjust); |
| |
| - /* The offset of the frame chain record (if any) from the current SP. */ |
| - poly_int64 chain_offset = (initial_adjust + callee_adjust |
| - - frame.hard_fp_offset); |
| - gcc_assert (known_ge (chain_offset, 0)); |
| - |
| /* The offset of the current SP from the bottom of the static frame. */ |
| poly_int64 bytes_below_sp = frame_size - initial_adjust - callee_adjust; |
| |
| if (emit_frame_chain) |
| { |
| + /* The offset of the frame chain record (if any) from the current SP. */ |
| + poly_int64 chain_offset = (initial_adjust + callee_adjust |
| + - frame.hard_fp_offset); |
| + gcc_assert (known_ge (chain_offset, 0)); |
| + |
| if (callee_adjust == 0) |
| { |
| reg1 = R29_REGNUM; |
| -- |
| 2.34.1 |
| |
| |
| From 375794feb614cee1f41b710b9cc1b6f25da6c1cb Mon Sep 17 00:00:00 2001 |
| From: Richard Sandiford <richard.sandiford@arm.com> |
| Date: Tue, 12 Sep 2023 16:07:15 +0100 |
| Subject: [PATCH 08/19] aarch64: Rename locals_offset to bytes_above_locals |
| MIME-Version: 1.0 |
| Content-Type: text/plain; charset=UTF-8 |
| Content-Transfer-Encoding: 8bit |
| |
| locals_offset was described as: |
| |
| /* Offset from the base of the frame (incomming SP) to the |
| top of the locals area. This value is always a multiple of |
| STACK_BOUNDARY. */ |
| |
| This is implicitly an “upside down” view of the frame: the incoming |
| SP is at offset 0, and anything N bytes below the incoming SP is at |
| offset N (rather than -N). |
| |
| However, reg_offset instead uses a “right way up” view; that is, |
| it views offsets in address terms. Something above X is at a |
| positive offset from X and something below X is at a negative |
| offset from X. |
| |
| Also, even on FRAME_GROWS_DOWNWARD targets like AArch64, |
| target-independent code views offsets in address terms too: |
| locals are allocated at negative offsets to virtual_stack_vars. |
| |
| It seems confusing to have *_offset fields of the same structure |
| using different polarities like this. This patch tries to avoid |
| that by renaming locals_offset to bytes_above_locals. |
| |
| gcc/ |
| * config/aarch64/aarch64.h (aarch64_frame::locals_offset): Rename to... |
| (aarch64_frame::bytes_above_locals): ...this. |
| * config/aarch64/aarch64.cc (aarch64_layout_frame) |
| (aarch64_initial_elimination_offset): Update accordingly. |
| --- |
| gcc/config/aarch64/aarch64.cc | 6 +++--- |
| gcc/config/aarch64/aarch64.h | 6 +++--- |
| 2 files changed, 6 insertions(+), 6 deletions(-) |
| |
| diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc |
| index 0e9b9717c08..0a22f91520e 100644 |
| --- a/gcc/config/aarch64/aarch64.cc |
| +++ b/gcc/config/aarch64/aarch64.cc |
| @@ -8598,7 +8598,7 @@ aarch64_layout_frame (void) |
| STACK_BOUNDARY / BITS_PER_UNIT)); |
| frame.frame_size = saved_regs_and_above + frame.bytes_below_saved_regs; |
| |
| - frame.locals_offset = frame.saved_varargs_size; |
| + frame.bytes_above_locals = frame.saved_varargs_size; |
| |
| frame.initial_adjust = 0; |
| frame.final_adjust = 0; |
| @@ -12754,13 +12754,13 @@ aarch64_initial_elimination_offset (unsigned from, unsigned to) |
| return frame.hard_fp_offset; |
| |
| if (from == FRAME_POINTER_REGNUM) |
| - return frame.hard_fp_offset - frame.locals_offset; |
| + return frame.hard_fp_offset - frame.bytes_above_locals; |
| } |
| |
| if (to == STACK_POINTER_REGNUM) |
| { |
| if (from == FRAME_POINTER_REGNUM) |
| - return frame.frame_size - frame.locals_offset; |
| + return frame.frame_size - frame.bytes_above_locals; |
| } |
| |
| return frame.frame_size; |
| diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h |
| index fd820b1be4e..7ae12d13e2b 100644 |
| --- a/gcc/config/aarch64/aarch64.h |
| +++ b/gcc/config/aarch64/aarch64.h |
| @@ -791,10 +791,10 @@ struct GTY (()) aarch64_frame |
| always a multiple of STACK_BOUNDARY. */ |
| poly_int64 bytes_below_hard_fp; |
| |
| - /* Offset from the base of the frame (incomming SP) to the |
| - top of the locals area. This value is always a multiple of |
| + /* The number of bytes between the top of the locals area and the top |
| + of the frame (the incomming SP). This value is always a multiple of |
| STACK_BOUNDARY. */ |
| - poly_int64 locals_offset; |
| + poly_int64 bytes_above_locals; |
| |
| /* Offset from the base of the frame (incomming SP) to the |
| hard_frame_pointer. This value is always a multiple of |
| -- |
| 2.34.1 |
| |
| |
| From 1a9ea1c45c75615ffbfabe652b3598a1d7be2168 Mon Sep 17 00:00:00 2001 |
| From: Richard Sandiford <richard.sandiford@arm.com> |
| Date: Tue, 12 Sep 2023 16:07:16 +0100 |
| Subject: [PATCH 09/19] aarch64: Rename hard_fp_offset to bytes_above_hard_fp |
| MIME-Version: 1.0 |
| Content-Type: text/plain; charset=UTF-8 |
| Content-Transfer-Encoding: 8bit |
| |
| Similarly to the previous locals_offset patch, hard_fp_offset |
| was described as: |
| |
| /* Offset from the base of the frame (incomming SP) to the |
| hard_frame_pointer. This value is always a multiple of |
| STACK_BOUNDARY. */ |
| poly_int64 hard_fp_offset; |
| |
| which again took an “upside-down” view: higher offsets meant lower |
| addresses. This patch renames the field to bytes_above_hard_fp instead. |
| |
| gcc/ |
| * config/aarch64/aarch64.h (aarch64_frame::hard_fp_offset): Rename |
| to... |
| (aarch64_frame::bytes_above_hard_fp): ...this. |
| * config/aarch64/aarch64.cc (aarch64_layout_frame) |
| (aarch64_expand_prologue): Update accordingly. |
| (aarch64_initial_elimination_offset): Likewise. |
| --- |
| gcc/config/aarch64/aarch64.cc | 26 +++++++++++++------------- |
| gcc/config/aarch64/aarch64.h | 6 +++--- |
| 2 files changed, 16 insertions(+), 16 deletions(-) |
| |
| diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc |
| index 0a22f91520e..95499ae49ba 100644 |
| --- a/gcc/config/aarch64/aarch64.cc |
| +++ b/gcc/config/aarch64/aarch64.cc |
| @@ -8590,7 +8590,7 @@ aarch64_layout_frame (void) |
| + get_frame_size (), |
| STACK_BOUNDARY / BITS_PER_UNIT); |
| |
| - frame.hard_fp_offset |
| + frame.bytes_above_hard_fp |
| = saved_regs_and_above - frame.below_hard_fp_saved_regs_size; |
| |
| /* Both these values are already aligned. */ |
| @@ -8639,13 +8639,13 @@ aarch64_layout_frame (void) |
| else if (frame.wb_pop_candidate1 != INVALID_REGNUM) |
| max_push_offset = 256; |
| |
| - HOST_WIDE_INT const_size, const_below_saved_regs, const_fp_offset; |
| + HOST_WIDE_INT const_size, const_below_saved_regs, const_above_fp; |
| HOST_WIDE_INT const_saved_regs_size; |
| if (known_eq (frame.saved_regs_size, 0)) |
| frame.initial_adjust = frame.frame_size; |
| else if (frame.frame_size.is_constant (&const_size) |
| && const_size < max_push_offset |
| - && known_eq (frame.hard_fp_offset, const_size)) |
| + && known_eq (frame.bytes_above_hard_fp, const_size)) |
| { |
| /* Simple, small frame with no data below the saved registers. |
| |
| @@ -8662,8 +8662,8 @@ aarch64_layout_frame (void) |
| case that it hardly seems worth the effort though. */ |
| && (!saves_below_hard_fp_p || const_below_saved_regs == 0) |
| && !(cfun->calls_alloca |
| - && frame.hard_fp_offset.is_constant (&const_fp_offset) |
| - && const_fp_offset < max_push_offset)) |
| + && frame.bytes_above_hard_fp.is_constant (&const_above_fp) |
| + && const_above_fp < max_push_offset)) |
| { |
| /* Frame with small area below the saved registers: |
| |
| @@ -8681,12 +8681,12 @@ aarch64_layout_frame (void) |
| sub sp, sp, hard_fp_offset + below_hard_fp_saved_regs_size |
| save SVE registers relative to SP |
| sub sp, sp, bytes_below_saved_regs */ |
| - frame.initial_adjust = (frame.hard_fp_offset |
| + frame.initial_adjust = (frame.bytes_above_hard_fp |
| + frame.below_hard_fp_saved_regs_size); |
| frame.final_adjust = frame.bytes_below_saved_regs; |
| } |
| - else if (frame.hard_fp_offset.is_constant (&const_fp_offset) |
| - && const_fp_offset < max_push_offset) |
| + else if (frame.bytes_above_hard_fp.is_constant (&const_above_fp) |
| + && const_above_fp < max_push_offset) |
| { |
| /* Frame with large area below the saved registers, or with SVE saves, |
| but with a small area above: |
| @@ -8696,7 +8696,7 @@ aarch64_layout_frame (void) |
| [sub sp, sp, below_hard_fp_saved_regs_size] |
| [save SVE registers relative to SP] |
| sub sp, sp, bytes_below_saved_regs */ |
| - frame.callee_adjust = const_fp_offset; |
| + frame.callee_adjust = const_above_fp; |
| frame.sve_callee_adjust = frame.below_hard_fp_saved_regs_size; |
| frame.final_adjust = frame.bytes_below_saved_regs; |
| } |
| @@ -8711,7 +8711,7 @@ aarch64_layout_frame (void) |
| [sub sp, sp, below_hard_fp_saved_regs_size] |
| [save SVE registers relative to SP] |
| sub sp, sp, bytes_below_saved_regs */ |
| - frame.initial_adjust = frame.hard_fp_offset; |
| + frame.initial_adjust = frame.bytes_above_hard_fp; |
| frame.sve_callee_adjust = frame.below_hard_fp_saved_regs_size; |
| frame.final_adjust = frame.bytes_below_saved_regs; |
| } |
| @@ -10069,7 +10069,7 @@ aarch64_expand_prologue (void) |
| { |
| /* The offset of the frame chain record (if any) from the current SP. */ |
| poly_int64 chain_offset = (initial_adjust + callee_adjust |
| - - frame.hard_fp_offset); |
| + - frame.bytes_above_hard_fp); |
| gcc_assert (known_ge (chain_offset, 0)); |
| |
| if (callee_adjust == 0) |
| @@ -12751,10 +12751,10 @@ aarch64_initial_elimination_offset (unsigned from, unsigned to) |
| if (to == HARD_FRAME_POINTER_REGNUM) |
| { |
| if (from == ARG_POINTER_REGNUM) |
| - return frame.hard_fp_offset; |
| + return frame.bytes_above_hard_fp; |
| |
| if (from == FRAME_POINTER_REGNUM) |
| - return frame.hard_fp_offset - frame.bytes_above_locals; |
| + return frame.bytes_above_hard_fp - frame.bytes_above_locals; |
| } |
| |
| if (to == STACK_POINTER_REGNUM) |
| diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h |
| index 7ae12d13e2b..3808f49e9ca 100644 |
| --- a/gcc/config/aarch64/aarch64.h |
| +++ b/gcc/config/aarch64/aarch64.h |
| @@ -796,10 +796,10 @@ struct GTY (()) aarch64_frame |
| STACK_BOUNDARY. */ |
| poly_int64 bytes_above_locals; |
| |
| - /* Offset from the base of the frame (incomming SP) to the |
| - hard_frame_pointer. This value is always a multiple of |
| + /* The number of bytes between the hard_frame_pointer and the top of |
| + the frame (the incomming SP). This value is always a multiple of |
| STACK_BOUNDARY. */ |
| - poly_int64 hard_fp_offset; |
| + poly_int64 bytes_above_hard_fp; |
| |
| /* The size of the frame. This value is the offset from base of the |
| frame (incomming SP) to the stack_pointer. This value is always |
| -- |
| 2.34.1 |
| |
| |
| From d202ce1ecf60a36a3e1009917dd76109248ce9be Mon Sep 17 00:00:00 2001 |
| From: Richard Sandiford <richard.sandiford@arm.com> |
| Date: Tue, 12 Sep 2023 16:07:16 +0100 |
| Subject: [PATCH 10/19] aarch64: Tweak frame_size comment |
| MIME-Version: 1.0 |
| Content-Type: text/plain; charset=UTF-8 |
| Content-Transfer-Encoding: 8bit |
| |
| This patch fixes another case in which a value was described with |
| an “upside-down” view. |
| |
| gcc/ |
| * config/aarch64/aarch64.h (aarch64_frame::frame_size): Tweak comment. |
| --- |
| gcc/config/aarch64/aarch64.h | 4 ++-- |
| 1 file changed, 2 insertions(+), 2 deletions(-) |
| |
| diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h |
| index 3808f49e9ca..108a5731b0d 100644 |
| --- a/gcc/config/aarch64/aarch64.h |
| +++ b/gcc/config/aarch64/aarch64.h |
| @@ -801,8 +801,8 @@ struct GTY (()) aarch64_frame |
| STACK_BOUNDARY. */ |
| poly_int64 bytes_above_hard_fp; |
| |
| - /* The size of the frame. This value is the offset from base of the |
| - frame (incomming SP) to the stack_pointer. This value is always |
| + /* The size of the frame, i.e. the number of bytes between the bottom |
| + of the outgoing arguments and the incoming SP. This value is always |
| a multiple of STACK_BOUNDARY. */ |
| poly_int64 frame_size; |
| |
| -- |
| 2.34.1 |
| |
| |
| From f2b585375205b0a1802d79c682ba33766ecd1f0f Mon Sep 17 00:00:00 2001 |
| From: Richard Sandiford <richard.sandiford@arm.com> |
| Date: Tue, 12 Sep 2023 16:07:17 +0100 |
| Subject: [PATCH 11/19] aarch64: Measure reg_offset from the bottom of the |
| frame |
| |
| reg_offset was measured from the bottom of the saved register area. |
| This made perfect sense with the original layout, since the bottom |
| of the saved register area was also the hard frame pointer address. |
| It became slightly less obvious with SVE, since we save SVE |
| registers below the hard frame pointer, but it still made sense. |
| |
| However, if we want to allow different frame layouts, it's more |
| convenient and obvious to measure reg_offset from the bottom of |
| the frame. After previous patches, it's also a slight simplification |
| in its own right. |
| |
| gcc/ |
| * config/aarch64/aarch64.h (aarch64_frame): Add comment above |
| reg_offset. |
| * config/aarch64/aarch64.cc (aarch64_layout_frame): Walk offsets |
| from the bottom of the frame, rather than the bottom of the saved |
| register area. Measure reg_offset from the bottom of the frame |
| rather than the bottom of the saved register area. |
| (aarch64_save_callee_saves): Update accordingly. |
| (aarch64_restore_callee_saves): Likewise. |
| (aarch64_get_separate_components): Likewise. |
| (aarch64_process_components): Likewise. |
| --- |
| gcc/config/aarch64/aarch64.cc | 53 ++++++++++++++++------------------- |
| gcc/config/aarch64/aarch64.h | 3 ++ |
| 2 files changed, 27 insertions(+), 29 deletions(-) |
| |
| diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc |
| index 95499ae49ba..af99807ef8a 100644 |
| --- a/gcc/config/aarch64/aarch64.cc |
| +++ b/gcc/config/aarch64/aarch64.cc |
| @@ -8400,7 +8400,6 @@ aarch64_needs_frame_chain (void) |
| static void |
| aarch64_layout_frame (void) |
| { |
| - poly_int64 offset = 0; |
| int regno, last_fp_reg = INVALID_REGNUM; |
| machine_mode vector_save_mode = aarch64_reg_save_mode (V8_REGNUM); |
| poly_int64 vector_save_size = GET_MODE_SIZE (vector_save_mode); |
| @@ -8478,7 +8477,9 @@ aarch64_layout_frame (void) |
| gcc_assert (crtl->is_leaf |
| || maybe_ne (frame.reg_offset[R30_REGNUM], SLOT_NOT_REQUIRED)); |
| |
| - frame.bytes_below_saved_regs = crtl->outgoing_args_size; |
| + poly_int64 offset = crtl->outgoing_args_size; |
| + gcc_assert (multiple_p (offset, STACK_BOUNDARY / BITS_PER_UNIT)); |
| + frame.bytes_below_saved_regs = offset; |
| |
| /* Now assign stack slots for the registers. Start with the predicate |
| registers, since predicate LDR and STR have a relatively small |
| @@ -8490,7 +8491,8 @@ aarch64_layout_frame (void) |
| offset += BYTES_PER_SVE_PRED; |
| } |
| |
| - if (maybe_ne (offset, 0)) |
| + poly_int64 saved_prs_size = offset - frame.bytes_below_saved_regs; |
| + if (maybe_ne (saved_prs_size, 0)) |
| { |
| /* If we have any vector registers to save above the predicate registers, |
| the offset of the vector register save slots need to be a multiple |
| @@ -8508,10 +8510,10 @@ aarch64_layout_frame (void) |
| offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT); |
| else |
| { |
| - if (known_le (offset, vector_save_size)) |
| - offset = vector_save_size; |
| - else if (known_le (offset, vector_save_size * 2)) |
| - offset = vector_save_size * 2; |
| + if (known_le (saved_prs_size, vector_save_size)) |
| + offset = frame.bytes_below_saved_regs + vector_save_size; |
| + else if (known_le (saved_prs_size, vector_save_size * 2)) |
| + offset = frame.bytes_below_saved_regs + vector_save_size * 2; |
| else |
| gcc_unreachable (); |
| } |
| @@ -8528,9 +8530,10 @@ aarch64_layout_frame (void) |
| |
| /* OFFSET is now the offset of the hard frame pointer from the bottom |
| of the callee save area. */ |
| - bool saves_below_hard_fp_p = maybe_ne (offset, 0); |
| - frame.below_hard_fp_saved_regs_size = offset; |
| - frame.bytes_below_hard_fp = offset + frame.bytes_below_saved_regs; |
| + frame.below_hard_fp_saved_regs_size = offset - frame.bytes_below_saved_regs; |
| + bool saves_below_hard_fp_p |
| + = maybe_ne (frame.below_hard_fp_saved_regs_size, 0); |
| + frame.bytes_below_hard_fp = offset; |
| if (frame.emit_frame_chain) |
| { |
| /* FP and LR are placed in the linkage record. */ |
| @@ -8581,9 +8584,10 @@ aarch64_layout_frame (void) |
| |
| offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT); |
| |
| - frame.saved_regs_size = offset; |
| + frame.saved_regs_size = offset - frame.bytes_below_saved_regs; |
| |
| - poly_int64 varargs_and_saved_regs_size = offset + frame.saved_varargs_size; |
| + poly_int64 varargs_and_saved_regs_size |
| + = frame.saved_regs_size + frame.saved_varargs_size; |
| |
| poly_int64 saved_regs_and_above |
| = aligned_upper_bound (varargs_and_saved_regs_size |
| @@ -9105,9 +9109,7 @@ aarch64_save_callee_saves (poly_int64 bytes_below_sp, |
| |
| machine_mode mode = aarch64_reg_save_mode (regno); |
| reg = gen_rtx_REG (mode, regno); |
| - offset = (frame.reg_offset[regno] |
| - + frame.bytes_below_saved_regs |
| - - bytes_below_sp); |
| + offset = frame.reg_offset[regno] - bytes_below_sp; |
| rtx base_rtx = stack_pointer_rtx; |
| poly_int64 sp_offset = offset; |
| |
| @@ -9214,9 +9216,7 @@ aarch64_restore_callee_saves (poly_int64 bytes_below_sp, unsigned start, |
| |
| machine_mode mode = aarch64_reg_save_mode (regno); |
| reg = gen_rtx_REG (mode, regno); |
| - offset = (frame.reg_offset[regno] |
| - + frame.bytes_below_saved_regs |
| - - bytes_below_sp); |
| + offset = frame.reg_offset[regno] - bytes_below_sp; |
| rtx base_rtx = stack_pointer_rtx; |
| if (mode == VNx2DImode && BYTES_BIG_ENDIAN) |
| aarch64_adjust_sve_callee_save_base (mode, base_rtx, anchor_reg, |
| @@ -9355,14 +9355,12 @@ aarch64_get_separate_components (void) |
| it as a stack probe for -fstack-clash-protection. */ |
| if (flag_stack_clash_protection |
| && maybe_ne (frame.below_hard_fp_saved_regs_size, 0) |
| - && known_eq (offset, 0)) |
| + && known_eq (offset, frame.bytes_below_saved_regs)) |
| continue; |
| |
| /* Get the offset relative to the register we'll use. */ |
| if (frame_pointer_needed) |
| - offset -= frame.below_hard_fp_saved_regs_size; |
| - else |
| - offset += frame.bytes_below_saved_regs; |
| + offset -= frame.bytes_below_hard_fp; |
| |
| /* Check that we can access the stack slot of the register with one |
| direct load with no adjustments needed. */ |
| @@ -9509,9 +9507,7 @@ aarch64_process_components (sbitmap components, bool prologue_p) |
| rtx reg = gen_rtx_REG (mode, regno); |
| poly_int64 offset = frame.reg_offset[regno]; |
| if (frame_pointer_needed) |
| - offset -= frame.below_hard_fp_saved_regs_size; |
| - else |
| - offset += frame.bytes_below_saved_regs; |
| + offset -= frame.bytes_below_hard_fp; |
| |
| rtx addr = plus_constant (Pmode, ptr_reg, offset); |
| rtx mem = gen_frame_mem (mode, addr); |
| @@ -9563,9 +9559,7 @@ aarch64_process_components (sbitmap components, bool prologue_p) |
| /* REGNO2 can be saved/restored in a pair with REGNO. */ |
| rtx reg2 = gen_rtx_REG (mode, regno2); |
| if (frame_pointer_needed) |
| - offset2 -= frame.below_hard_fp_saved_regs_size; |
| - else |
| - offset2 += frame.bytes_below_saved_regs; |
| + offset2 -= frame.bytes_below_hard_fp; |
| rtx addr2 = plus_constant (Pmode, ptr_reg, offset2); |
| rtx mem2 = gen_frame_mem (mode, addr2); |
| rtx set2 = prologue_p ? gen_rtx_SET (mem2, reg2) |
| @@ -9681,7 +9675,8 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, |
| if (final_adjustment_p |
| && known_eq (frame.below_hard_fp_saved_regs_size, 0)) |
| { |
| - poly_int64 lr_offset = frame.reg_offset[LR_REGNUM]; |
| + poly_int64 lr_offset = (frame.reg_offset[LR_REGNUM] |
| + - frame.bytes_below_saved_regs); |
| if (known_ge (lr_offset, 0)) |
| min_probe_threshold -= lr_offset.to_constant (); |
| else |
| diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h |
| index 108a5731b0d..c8becb098c8 100644 |
| --- a/gcc/config/aarch64/aarch64.h |
| +++ b/gcc/config/aarch64/aarch64.h |
| @@ -766,6 +766,9 @@ extern enum aarch64_processor aarch64_tune; |
| #ifdef HAVE_POLY_INT_H |
| struct GTY (()) aarch64_frame |
| { |
| + /* The offset from the bottom of the static frame (the bottom of the |
| + outgoing arguments) of each register save slot, or -2 if no save is |
| + needed. */ |
| poly_int64 reg_offset[LAST_SAVED_REGNUM + 1]; |
| |
| /* The number of extra stack bytes taken up by register varargs. |
| -- |
| 2.34.1 |
| |
| |
| From 79faabda181d0d9fd29a3cf5726ba65bdee945b5 Mon Sep 17 00:00:00 2001 |
| From: Richard Sandiford <richard.sandiford@arm.com> |
| Date: Tue, 12 Sep 2023 16:07:17 +0100 |
| Subject: [PATCH 12/19] aarch64: Simplify top of frame allocation |
| |
| After previous patches, it no longer really makes sense to allocate |
| the top of the frame in terms of varargs_and_saved_regs_size and |
| saved_regs_and_above. |
| |
| gcc/ |
| * config/aarch64/aarch64.cc (aarch64_layout_frame): Simplify |
| the allocation of the top of the frame. |
| --- |
| gcc/config/aarch64/aarch64.cc | 23 ++++++++--------------- |
| 1 file changed, 8 insertions(+), 15 deletions(-) |
| |
| diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc |
| index af99807ef8a..31b00094c2a 100644 |
| --- a/gcc/config/aarch64/aarch64.cc |
| +++ b/gcc/config/aarch64/aarch64.cc |
| @@ -8586,23 +8586,16 @@ aarch64_layout_frame (void) |
| |
| frame.saved_regs_size = offset - frame.bytes_below_saved_regs; |
| |
| - poly_int64 varargs_and_saved_regs_size |
| - = frame.saved_regs_size + frame.saved_varargs_size; |
| - |
| - poly_int64 saved_regs_and_above |
| - = aligned_upper_bound (varargs_and_saved_regs_size |
| - + get_frame_size (), |
| - STACK_BOUNDARY / BITS_PER_UNIT); |
| - |
| - frame.bytes_above_hard_fp |
| - = saved_regs_and_above - frame.below_hard_fp_saved_regs_size; |
| + offset += get_frame_size (); |
| + offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT); |
| + auto top_of_locals = offset; |
| |
| - /* Both these values are already aligned. */ |
| - gcc_assert (multiple_p (frame.bytes_below_saved_regs, |
| - STACK_BOUNDARY / BITS_PER_UNIT)); |
| - frame.frame_size = saved_regs_and_above + frame.bytes_below_saved_regs; |
| + offset += frame.saved_varargs_size; |
| + gcc_assert (multiple_p (offset, STACK_BOUNDARY / BITS_PER_UNIT)); |
| + frame.frame_size = offset; |
| |
| - frame.bytes_above_locals = frame.saved_varargs_size; |
| + frame.bytes_above_hard_fp = frame.frame_size - frame.bytes_below_hard_fp; |
| + frame.bytes_above_locals = frame.frame_size - top_of_locals; |
| |
| frame.initial_adjust = 0; |
| frame.final_adjust = 0; |
| -- |
| 2.34.1 |
| |
| |
| From 4e62049e403b141e6f916176160dac8cbd65fe47 Mon Sep 17 00:00:00 2001 |
| From: Richard Sandiford <richard.sandiford@arm.com> |
| Date: Tue, 12 Sep 2023 16:07:18 +0100 |
| Subject: [PATCH 13/19] aarch64: Minor initial adjustment tweak |
| |
| This patch just changes a calculation of initial_adjust |
| to one that makes it slightly more obvious that the total |
| adjustment is frame.frame_size. |
| |
| gcc/ |
| * config/aarch64/aarch64.cc (aarch64_layout_frame): Tweak |
| calculation of initial_adjust for frames in which all saves |
| are SVE saves. |
| --- |
| gcc/config/aarch64/aarch64.cc | 5 ++--- |
| 1 file changed, 2 insertions(+), 3 deletions(-) |
| |
| diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc |
| index 31b00094c2a..1aa79da0673 100644 |
| --- a/gcc/config/aarch64/aarch64.cc |
| +++ b/gcc/config/aarch64/aarch64.cc |
| @@ -8675,11 +8675,10 @@ aarch64_layout_frame (void) |
| { |
| /* Frame in which all saves are SVE saves: |
| |
| - sub sp, sp, hard_fp_offset + below_hard_fp_saved_regs_size |
| + sub sp, sp, frame_size - bytes_below_saved_regs |
| save SVE registers relative to SP |
| sub sp, sp, bytes_below_saved_regs */ |
| - frame.initial_adjust = (frame.bytes_above_hard_fp |
| - + frame.below_hard_fp_saved_regs_size); |
| + frame.initial_adjust = frame.frame_size - frame.bytes_below_saved_regs; |
| frame.final_adjust = frame.bytes_below_saved_regs; |
| } |
| else if (frame.bytes_above_hard_fp.is_constant (&const_above_fp) |
| -- |
| 2.34.1 |
| |
| |
| From aaa1a0a5912d9e5d571e5f1c6f09ceac99544ab5 Mon Sep 17 00:00:00 2001 |
| From: Richard Sandiford <richard.sandiford@arm.com> |
| Date: Tue, 12 Sep 2023 16:07:18 +0100 |
| Subject: [PATCH 14/19] aarch64: Tweak stack clash boundary condition |
| |
| The AArch64 ABI says that, when stack clash protection is used, |
| there can be a maximum of 1KiB of unprobed space at sp on entry |
| to a function. Therefore, we need to probe when allocating |
| >= guard_size - 1KiB of data (>= rather than >). This is what |
| GCC does. |
| |
| If an allocation is exactly guard_size bytes, it is enough to allocate |
| those bytes and probe once at offset 1024. It isn't possible to use a |
| single probe at any other offset: higher would conmplicate later code, |
| by leaving more unprobed space than usual, while lower would risk |
| leaving an entire page unprobed. For simplicity, the code probes all |
| allocations at offset 1024. |
| |
| Some register saves also act as probes. If we need to allocate |
| more space below the last such register save probe, we need to |
| probe the allocation if it is > 1KiB. Again, this allocation is |
| then sometimes (but not always) probed at offset 1024. This sort of |
| allocation is currently only used for outgoing arguments, which are |
| rarely this big. |
| |
| However, the code also probed if this final outgoing-arguments |
| allocation was == 1KiB, rather than just > 1KiB. This isn't |
| necessary, since the register save then probes at offset 1024 |
| as required. Continuing to probe allocations of exactly 1KiB |
| would complicate later patches. |
| |
| gcc/ |
| * config/aarch64/aarch64.cc (aarch64_allocate_and_probe_stack_space): |
| Don't probe final allocations that are exactly 1KiB in size (after |
| unprobed space above the final allocation has been deducted). |
| |
| gcc/testsuite/ |
| * gcc.target/aarch64/stack-check-prologue-17.c: New test. |
| --- |
| gcc/config/aarch64/aarch64.cc | 4 +- |
| .../aarch64/stack-check-prologue-17.c | 55 +++++++++++++++++++ |
| 2 files changed, 58 insertions(+), 1 deletion(-) |
| create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c |
| |
| diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc |
| index 1aa79da0673..5cad847977a 100644 |
| --- a/gcc/config/aarch64/aarch64.cc |
| +++ b/gcc/config/aarch64/aarch64.cc |
| @@ -9648,9 +9648,11 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, |
| HOST_WIDE_INT guard_size |
| = 1 << param_stack_clash_protection_guard_size; |
| HOST_WIDE_INT guard_used_by_caller = STACK_CLASH_CALLER_GUARD; |
| + HOST_WIDE_INT byte_sp_alignment = STACK_BOUNDARY / BITS_PER_UNIT; |
| + gcc_assert (multiple_p (poly_size, byte_sp_alignment)); |
| HOST_WIDE_INT min_probe_threshold |
| = (final_adjustment_p |
| - ? guard_used_by_caller |
| + ? guard_used_by_caller + byte_sp_alignment |
| : guard_size - guard_used_by_caller); |
| /* When doing the final adjustment for the outgoing arguments, take into |
| account any unprobed space there is above the current SP. There are |
| diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c |
| new file mode 100644 |
| index 00000000000..0d8a25d73a2 |
| --- /dev/null |
| +++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c |
| @@ -0,0 +1,55 @@ |
| +/* { dg-options "-O2 -fstack-clash-protection -fomit-frame-pointer --param stack-clash-protection-guard-size=12" } */ |
| +/* { dg-final { check-function-bodies "**" "" } } */ |
| + |
| +void f(int, ...); |
| +void g(); |
| + |
| +/* |
| +** test1: |
| +** ... |
| +** str x30, \[sp\] |
| +** sub sp, sp, #1024 |
| +** cbnz w0, .* |
| +** bl g |
| +** ... |
| +*/ |
| +int test1(int z) { |
| + __uint128_t x = 0; |
| + int y[0x400]; |
| + if (z) |
| + { |
| + f(0, 0, 0, 0, 0, 0, 0, &y, |
| + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, |
| + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, |
| + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, |
| + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x); |
| + } |
| + g(); |
| + return 1; |
| +} |
| + |
| +/* |
| +** test2: |
| +** ... |
| +** str x30, \[sp\] |
| +** sub sp, sp, #1040 |
| +** str xzr, \[sp\] |
| +** cbnz w0, .* |
| +** bl g |
| +** ... |
| +*/ |
| +int test2(int z) { |
| + __uint128_t x = 0; |
| + int y[0x400]; |
| + if (z) |
| + { |
| + f(0, 0, 0, 0, 0, 0, 0, &y, |
| + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, |
| + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, |
| + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, |
| + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, |
| + x); |
| + } |
| + g(); |
| + return 1; |
| +} |
| -- |
| 2.34.1 |
| |
| |
| From 8433953434a7b58c0923140d39eb3c5988c1d097 Mon Sep 17 00:00:00 2001 |
| From: Richard Sandiford <richard.sandiford@arm.com> |
| Date: Tue, 12 Sep 2023 16:07:19 +0100 |
| Subject: [PATCH 15/19] aarch64: Put LR save probe in first 16 bytes |
| |
| -fstack-clash-protection uses the save of LR as a probe for the next |
| allocation. The next allocation could be: |
| |
| * another part of the static frame, e.g. when allocating SVE save slots |
| or outgoing arguments |
| |
| * an alloca in the same function |
| |
| * an allocation made by a callee function |
| |
| However, when -fomit-frame-pointer is used, the LR save slot is placed |
| above the other GPR save slots. It could therefore be up to 80 bytes |
| above the base of the GPR save area (which is also the hard fp address). |
| |
| aarch64_allocate_and_probe_stack_space took this into account when |
| deciding how much subsequent space could be allocated without needing |
| a probe. However, it interacted badly with: |
| |
| /* If doing a small final adjustment, we always probe at offset 0. |
| This is done to avoid issues when LR is not at position 0 or when |
| the final adjustment is smaller than the probing offset. */ |
| else if (final_adjustment_p && rounded_size == 0) |
| residual_probe_offset = 0; |
| |
| which forces any allocation that is smaller than the guard page size |
| to be probed at offset 0 rather than the usual offset 1024. It was |
| therefore possible to construct cases in which we had: |
| |
| * a probe using LR at SP + 80 bytes (or some other value >= 16) |
| * an allocation of the guard page size - 16 bytes |
| * a probe at SP + 0 |
| |
| which allocates guard page size + 64 consecutive unprobed bytes. |
| |
| This patch requires the LR probe to be in the first 16 bytes of the |
| save area when stack clash protection is active. Doing it |
| unconditionally would cause code-quality regressions. |
| |
| Putting LR before other registers prevents push/pop allocation |
| when shadow call stacks are enabled, since LR is restored |
| separately from the other callee-saved registers. |
| |
| The new comment doesn't say that the probe register is required |
| to be LR, since a later patch removes that restriction. |
| |
| gcc/ |
| * config/aarch64/aarch64.cc (aarch64_layout_frame): Ensure that |
| the LR save slot is in the first 16 bytes of the register save area. |
| Only form STP/LDP push/pop candidates if both registers are valid. |
| (aarch64_allocate_and_probe_stack_space): Remove workaround for |
| when LR was not in the first 16 bytes. |
| |
| gcc/testsuite/ |
| * gcc.target/aarch64/stack-check-prologue-18.c: New test. |
| * gcc.target/aarch64/stack-check-prologue-19.c: Likewise. |
| * gcc.target/aarch64/stack-check-prologue-20.c: Likewise. |
| --- |
| gcc/config/aarch64/aarch64.cc | 72 ++++++------- |
| .../aarch64/stack-check-prologue-18.c | 100 ++++++++++++++++++ |
| .../aarch64/stack-check-prologue-19.c | 100 ++++++++++++++++++ |
| .../aarch64/stack-check-prologue-20.c | 3 + |
| 4 files changed, 233 insertions(+), 42 deletions(-) |
| create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c |
| create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c |
| create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-check-prologue-20.c |
| |
| diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc |
| index 5cad847977a..a765f92329d 100644 |
| --- a/gcc/config/aarch64/aarch64.cc |
| +++ b/gcc/config/aarch64/aarch64.cc |
| @@ -8534,26 +8534,34 @@ aarch64_layout_frame (void) |
| bool saves_below_hard_fp_p |
| = maybe_ne (frame.below_hard_fp_saved_regs_size, 0); |
| frame.bytes_below_hard_fp = offset; |
| + |
| + auto allocate_gpr_slot = [&](unsigned int regno) |
| + { |
| + frame.reg_offset[regno] = offset; |
| + if (frame.wb_push_candidate1 == INVALID_REGNUM) |
| + frame.wb_push_candidate1 = regno; |
| + else if (frame.wb_push_candidate2 == INVALID_REGNUM) |
| + frame.wb_push_candidate2 = regno; |
| + offset += UNITS_PER_WORD; |
| + }; |
| + |
| if (frame.emit_frame_chain) |
| { |
| /* FP and LR are placed in the linkage record. */ |
| - frame.reg_offset[R29_REGNUM] = offset; |
| - frame.wb_push_candidate1 = R29_REGNUM; |
| - frame.reg_offset[R30_REGNUM] = offset + UNITS_PER_WORD; |
| - frame.wb_push_candidate2 = R30_REGNUM; |
| - offset += 2 * UNITS_PER_WORD; |
| + allocate_gpr_slot (R29_REGNUM); |
| + allocate_gpr_slot (R30_REGNUM); |
| } |
| + else if (flag_stack_clash_protection |
| + && known_eq (frame.reg_offset[R30_REGNUM], SLOT_REQUIRED)) |
| + /* Put the LR save slot first, since it makes a good choice of probe |
| + for stack clash purposes. The idea is that the link register usually |
| + has to be saved before a call anyway, and so we lose little by |
| + stopping it from being individually shrink-wrapped. */ |
| + allocate_gpr_slot (R30_REGNUM); |
| |
| for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++) |
| if (known_eq (frame.reg_offset[regno], SLOT_REQUIRED)) |
| - { |
| - frame.reg_offset[regno] = offset; |
| - if (frame.wb_push_candidate1 == INVALID_REGNUM) |
| - frame.wb_push_candidate1 = regno; |
| - else if (frame.wb_push_candidate2 == INVALID_REGNUM) |
| - frame.wb_push_candidate2 = regno; |
| - offset += UNITS_PER_WORD; |
| - } |
| + allocate_gpr_slot (regno); |
| |
| poly_int64 max_int_offset = offset; |
| offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT); |
| @@ -8631,10 +8639,13 @@ aarch64_layout_frame (void) |
| max_push_offset to 0, because no registers are popped at this time, |
| so callee_adjust cannot be adjusted. */ |
| HOST_WIDE_INT max_push_offset = 0; |
| - if (frame.wb_pop_candidate2 != INVALID_REGNUM) |
| - max_push_offset = 512; |
| - else if (frame.wb_pop_candidate1 != INVALID_REGNUM) |
| - max_push_offset = 256; |
| + if (frame.wb_pop_candidate1 != INVALID_REGNUM) |
| + { |
| + if (frame.wb_pop_candidate2 != INVALID_REGNUM) |
| + max_push_offset = 512; |
| + else |
| + max_push_offset = 256; |
| + } |
| |
| HOST_WIDE_INT const_size, const_below_saved_regs, const_above_fp; |
| HOST_WIDE_INT const_saved_regs_size; |
| @@ -9654,29 +9665,6 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, |
| = (final_adjustment_p |
| ? guard_used_by_caller + byte_sp_alignment |
| : guard_size - guard_used_by_caller); |
| - /* When doing the final adjustment for the outgoing arguments, take into |
| - account any unprobed space there is above the current SP. There are |
| - two cases: |
| - |
| - - When saving SVE registers below the hard frame pointer, we force |
| - the lowest save to take place in the prologue before doing the final |
| - adjustment (i.e. we don't allow the save to be shrink-wrapped). |
| - This acts as a probe at SP, so there is no unprobed space. |
| - |
| - - When there are no SVE register saves, we use the store of the link |
| - register as a probe. We can't assume that LR was saved at position 0 |
| - though, so treat any space below it as unprobed. */ |
| - if (final_adjustment_p |
| - && known_eq (frame.below_hard_fp_saved_regs_size, 0)) |
| - { |
| - poly_int64 lr_offset = (frame.reg_offset[LR_REGNUM] |
| - - frame.bytes_below_saved_regs); |
| - if (known_ge (lr_offset, 0)) |
| - min_probe_threshold -= lr_offset.to_constant (); |
| - else |
| - gcc_assert (!flag_stack_clash_protection || known_eq (poly_size, 0)); |
| - } |
| - |
| poly_int64 frame_size = frame.frame_size; |
| |
| /* We should always have a positive probe threshold. */ |
| @@ -9856,8 +9844,8 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, |
| if (final_adjustment_p && rounded_size != 0) |
| min_probe_threshold = 0; |
| /* If doing a small final adjustment, we always probe at offset 0. |
| - This is done to avoid issues when LR is not at position 0 or when |
| - the final adjustment is smaller than the probing offset. */ |
| + This is done to avoid issues when the final adjustment is smaller |
| + than the probing offset. */ |
| else if (final_adjustment_p && rounded_size == 0) |
| residual_probe_offset = 0; |
| |
| diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c |
| new file mode 100644 |
| index 00000000000..82447d20fff |
| --- /dev/null |
| +++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c |
| @@ -0,0 +1,100 @@ |
| +/* { dg-options "-O2 -fstack-clash-protection -fomit-frame-pointer --param stack-clash-protection-guard-size=12" } */ |
| +/* { dg-final { check-function-bodies "**" "" } } */ |
| + |
| +void f(int, ...); |
| +void g(); |
| + |
| +/* |
| +** test1: |
| +** ... |
| +** str x30, \[sp\] |
| +** sub sp, sp, #4064 |
| +** str xzr, \[sp\] |
| +** cbnz w0, .* |
| +** bl g |
| +** ... |
| +** str x26, \[sp, #?4128\] |
| +** ... |
| +*/ |
| +int test1(int z) { |
| + __uint128_t x = 0; |
| + int y[0x400]; |
| + if (z) |
| + { |
| + asm volatile ("" ::: |
| + "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26"); |
| + f(0, 0, 0, 0, 0, 0, 0, &y, |
| + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, |
| + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, |
| + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, |
| + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, |
| + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, |
| + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, |
| + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, |
| + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, |
| + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, |
| + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, |
| + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, |
| + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, |
| + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, |
| + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, |
| + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, |
| + x, x, x, x, x, x, x, x, x, x, x, x, x, x); |
| + } |
| + g(); |
| + return 1; |
| +} |
| + |
| +/* |
| +** test2: |
| +** ... |
| +** str x30, \[sp\] |
| +** sub sp, sp, #1040 |
| +** str xzr, \[sp\] |
| +** cbnz w0, .* |
| +** bl g |
| +** ... |
| +*/ |
| +int test2(int z) { |
| + __uint128_t x = 0; |
| + int y[0x400]; |
| + if (z) |
| + { |
| + asm volatile ("" ::: |
| + "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26"); |
| + f(0, 0, 0, 0, 0, 0, 0, &y, |
| + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, |
| + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, |
| + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, |
| + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, |
| + x); |
| + } |
| + g(); |
| + return 1; |
| +} |
| + |
| +/* |
| +** test3: |
| +** ... |
| +** str x30, \[sp\] |
| +** sub sp, sp, #1024 |
| +** cbnz w0, .* |
| +** bl g |
| +** ... |
| +*/ |
| +int test3(int z) { |
| + __uint128_t x = 0; |
| + int y[0x400]; |
| + if (z) |
| + { |
| + asm volatile ("" ::: |
| + "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26"); |
| + f(0, 0, 0, 0, 0, 0, 0, &y, |
| + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, |
| + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, |
| + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, |
| + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x); |
| + } |
| + g(); |
| + return 1; |
| +} |
| diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c |
| new file mode 100644 |
| index 00000000000..73ac3e4e4eb |
| --- /dev/null |
| +++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c |
| @@ -0,0 +1,100 @@ |
| +/* { dg-options "-O2 -fstack-clash-protection -fomit-frame-pointer --param stack-clash-protection-guard-size=12 -fsanitize=shadow-call-stack -ffixed-x18" } */ |
| +/* { dg-final { check-function-bodies "**" "" } } */ |
| + |
| +void f(int, ...); |
| +void g(); |
| + |
| +/* |
| +** test1: |
| +** ... |
| +** str x30, \[sp\] |
| +** sub sp, sp, #4064 |
| +** str xzr, \[sp\] |
| +** cbnz w0, .* |
| +** bl g |
| +** ... |
| +** str x26, \[sp, #?4128\] |
| +** ... |
| +*/ |
| +int test1(int z) { |
| + __uint128_t x = 0; |
| + int y[0x400]; |
| + if (z) |
| + { |
| + asm volatile ("" ::: |
| + "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26"); |
| + f(0, 0, 0, 0, 0, 0, 0, &y, |
| + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, |
| + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, |
| + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, |
| + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, |
| + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, |
| + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, |
| + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, |
| + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, |
| + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, |
| + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, |
| + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, |
| + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, |
| + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, |
| + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, |
| + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, |
| + x, x, x, x, x, x, x, x, x, x, x, x, x, x); |
| + } |
| + g(); |
| + return 1; |
| +} |
| + |
| +/* |
| +** test2: |
| +** ... |
| +** str x30, \[sp\] |
| +** sub sp, sp, #1040 |
| +** str xzr, \[sp\] |
| +** cbnz w0, .* |
| +** bl g |
| +** ... |
| +*/ |
| +int test2(int z) { |
| + __uint128_t x = 0; |
| + int y[0x400]; |
| + if (z) |
| + { |
| + asm volatile ("" ::: |
| + "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26"); |
| + f(0, 0, 0, 0, 0, 0, 0, &y, |
| + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, |
| + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, |
| + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, |
| + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, |
| + x); |
| + } |
| + g(); |
| + return 1; |
| +} |
| + |
| +/* |
| +** test3: |
| +** ... |
| +** str x30, \[sp\] |
| +** sub sp, sp, #1024 |
| +** cbnz w0, .* |
| +** bl g |
| +** ... |
| +*/ |
| +int test3(int z) { |
| + __uint128_t x = 0; |
| + int y[0x400]; |
| + if (z) |
| + { |
| + asm volatile ("" ::: |
| + "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26"); |
| + f(0, 0, 0, 0, 0, 0, 0, &y, |
| + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, |
| + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, |
| + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, |
| + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x); |
| + } |
| + g(); |
| + return 1; |
| +} |
| diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-20.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-20.c |
| new file mode 100644 |
| index 00000000000..690aae8dfd5 |
| --- /dev/null |
| +++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-20.c |
| @@ -0,0 +1,3 @@ |
| +/* { dg-options "-O2 -fstack-protector-all -fstack-clash-protection -fomit-frame-pointer --param stack-clash-protection-guard-size=12 -fsanitize=shadow-call-stack -ffixed-x18" } */ |
| + |
| +#include "stack-check-prologue-19.c" |
| -- |
| 2.34.1 |
| |
| |
| From eea1759073e09dd1aefbc9a881601ab1eebfdd18 Mon Sep 17 00:00:00 2001 |
| From: Richard Sandiford <richard.sandiford@arm.com> |
| Date: Tue, 12 Sep 2023 16:07:19 +0100 |
| Subject: [PATCH 16/19] aarch64: Simplify probe of final frame allocation |
| |
| Previous patches ensured that the final frame allocation only needs |
| a probe when the size is strictly greater than 1KiB. It's therefore |
| safe to use the normal 1024 probe offset in all cases. |
| |
| The main motivation for doing this is to simplify the code and |
| remove the number of special cases. |
| |
| gcc/ |
| * config/aarch64/aarch64.cc (aarch64_allocate_and_probe_stack_space): |
| Always probe the residual allocation at offset 1024, asserting |
| that that is in range. |
| |
| gcc/testsuite/ |
| * gcc.target/aarch64/stack-check-prologue-17.c: Expect the probe |
| to be at offset 1024 rather than offset 0. |
| * gcc.target/aarch64/stack-check-prologue-18.c: Likewise. |
| * gcc.target/aarch64/stack-check-prologue-19.c: Likewise. |
| --- |
| gcc/config/aarch64/aarch64.cc | 12 ++++-------- |
| .../gcc.target/aarch64/stack-check-prologue-17.c | 2 +- |
| .../gcc.target/aarch64/stack-check-prologue-18.c | 4 ++-- |
| .../gcc.target/aarch64/stack-check-prologue-19.c | 4 ++-- |
| 4 files changed, 9 insertions(+), 13 deletions(-) |
| |
| diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc |
| index a765f92329d..37809a306f7 100644 |
| --- a/gcc/config/aarch64/aarch64.cc |
| +++ b/gcc/config/aarch64/aarch64.cc |
| @@ -9838,16 +9838,12 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, |
| are still safe. */ |
| if (residual) |
| { |
| - HOST_WIDE_INT residual_probe_offset = guard_used_by_caller; |
| + gcc_assert (guard_used_by_caller + byte_sp_alignment <= size); |
| + |
| /* If we're doing final adjustments, and we've done any full page |
| allocations then any residual needs to be probed. */ |
| if (final_adjustment_p && rounded_size != 0) |
| min_probe_threshold = 0; |
| - /* If doing a small final adjustment, we always probe at offset 0. |
| - This is done to avoid issues when the final adjustment is smaller |
| - than the probing offset. */ |
| - else if (final_adjustment_p && rounded_size == 0) |
| - residual_probe_offset = 0; |
| |
| aarch64_sub_sp (temp1, temp2, residual, frame_related_p); |
| if (residual >= min_probe_threshold) |
| @@ -9858,8 +9854,8 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, |
| HOST_WIDE_INT_PRINT_DEC " bytes, probing will be required." |
| "\n", residual); |
| |
| - emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx, |
| - residual_probe_offset)); |
| + emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx, |
| + guard_used_by_caller)); |
| emit_insn (gen_blockage ()); |
| } |
| } |
| diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c |
| index 0d8a25d73a2..f0ec1389771 100644 |
| --- a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c |
| +++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c |
| @@ -33,7 +33,7 @@ int test1(int z) { |
| ** ... |
| ** str x30, \[sp\] |
| ** sub sp, sp, #1040 |
| -** str xzr, \[sp\] |
| +** str xzr, \[sp, #?1024\] |
| ** cbnz w0, .* |
| ** bl g |
| ** ... |
| diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c |
| index 82447d20fff..6383bec5ebc 100644 |
| --- a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c |
| +++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c |
| @@ -9,7 +9,7 @@ void g(); |
| ** ... |
| ** str x30, \[sp\] |
| ** sub sp, sp, #4064 |
| -** str xzr, \[sp\] |
| +** str xzr, \[sp, #?1024\] |
| ** cbnz w0, .* |
| ** bl g |
| ** ... |
| @@ -50,7 +50,7 @@ int test1(int z) { |
| ** ... |
| ** str x30, \[sp\] |
| ** sub sp, sp, #1040 |
| -** str xzr, \[sp\] |
| +** str xzr, \[sp, #?1024\] |
| ** cbnz w0, .* |
| ** bl g |
| ** ... |
| diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c |
| index 73ac3e4e4eb..562039b5e9b 100644 |
| --- a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c |
| +++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c |
| @@ -9,7 +9,7 @@ void g(); |
| ** ... |
| ** str x30, \[sp\] |
| ** sub sp, sp, #4064 |
| -** str xzr, \[sp\] |
| +** str xzr, \[sp, #?1024\] |
| ** cbnz w0, .* |
| ** bl g |
| ** ... |
| @@ -50,7 +50,7 @@ int test1(int z) { |
| ** ... |
| ** str x30, \[sp\] |
| ** sub sp, sp, #1040 |
| -** str xzr, \[sp\] |
| +** str xzr, \[sp, #?1024\] |
| ** cbnz w0, .* |
| ** bl g |
| ** ... |
| -- |
| 2.34.1 |
| |
| |
| From 96d85187c3b9c9a7efc2fd698c3d452e80d8aa47 Mon Sep 17 00:00:00 2001 |
| From: Richard Sandiford <richard.sandiford@arm.com> |
| Date: Tue, 12 Sep 2023 16:07:20 +0100 |
| Subject: [PATCH 17/19] aarch64: Explicitly record probe registers in frame |
| info |
| |
| The stack frame is currently divided into three areas: |
| |
| A: the area above the hard frame pointer |
| B: the SVE saves below the hard frame pointer |
| C: the outgoing arguments |
| |
| If the stack frame is allocated in one chunk, the allocation needs a |
| probe if the frame size is >= guard_size - 1KiB. In addition, if the |
| function is not a leaf function, it must probe an address no more than |
| 1KiB above the outgoing SP. We ensured the second condition by |
| |
| (1) using single-chunk allocations for non-leaf functions only if |
| the link register save slot is within 512 bytes of the bottom |
| of the frame; and |
| |
| (2) using the link register save as a probe (meaning, for instance, |
| that it can't be individually shrink wrapped) |
| |
| If instead the stack is allocated in multiple chunks, then: |
| |
| * an allocation involving only the outgoing arguments (C above) requires |
| a probe if the allocation size is > 1KiB |
| |
| * any other allocation requires a probe if the allocation size |
| is >= guard_size - 1KiB |
| |
| * second and subsequent allocations require the previous allocation |
| to probe at the bottom of the allocated area, regardless of the size |
| of that previous allocation |
| |
| The final point means that, unlike for single allocations, |
| it can be necessary to have both a non-SVE register probe and |
| an SVE register probe. For example: |
| |
| * allocate A, probe using a non-SVE register save |
| * allocate B, probe using an SVE register save |
| * allocate C |
| |
| The non-SVE register used in this case was again the link register. |
| It was previously used even if the link register save slot was some |
| bytes above the bottom of the non-SVE register saves, but an earlier |
| patch avoided that by putting the link register save slot first. |
| |
| As a belt-and-braces fix, this patch explicitly records which |
| probe registers we're using and allows the non-SVE probe to be |
| whichever register comes first (as for SVE). |
| |
| The patch also avoids unnecessary probes in sve/pcs/stack_clash_3.c. |
| |
| gcc/ |
| * config/aarch64/aarch64.h (aarch64_frame::sve_save_and_probe) |
| (aarch64_frame::hard_fp_save_and_probe): New fields. |
| * config/aarch64/aarch64.cc (aarch64_layout_frame): Initialize them. |
| Rather than asserting that a leaf function saves LR, instead assert |
| that a leaf function saves something. |
| (aarch64_get_separate_components): Prevent the chosen probe |
| registers from being individually shrink-wrapped. |
| (aarch64_allocate_and_probe_stack_space): Remove workaround for |
| probe registers that aren't at the bottom of the previous allocation. |
| |
| gcc/testsuite/ |
| * gcc.target/aarch64/sve/pcs/stack_clash_3.c: Avoid redundant probes. |
| --- |
| gcc/config/aarch64/aarch64.cc | 68 +++++++++++++++---- |
| gcc/config/aarch64/aarch64.h | 8 +++ |
| .../aarch64/sve/pcs/stack_clash_3.c | 6 +- |
| 3 files changed, 64 insertions(+), 18 deletions(-) |
| |
| diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc |
| index 37809a306f7..6c59c39a639 100644 |
| --- a/gcc/config/aarch64/aarch64.cc |
| +++ b/gcc/config/aarch64/aarch64.cc |
| @@ -8471,15 +8471,11 @@ aarch64_layout_frame (void) |
| && !crtl->abi->clobbers_full_reg_p (regno)) |
| frame.reg_offset[regno] = SLOT_REQUIRED; |
| |
| - /* With stack-clash, LR must be saved in non-leaf functions. The saving of |
| - LR counts as an implicit probe which allows us to maintain the invariant |
| - described in the comment at expand_prologue. */ |
| - gcc_assert (crtl->is_leaf |
| - || maybe_ne (frame.reg_offset[R30_REGNUM], SLOT_NOT_REQUIRED)); |
| |
| poly_int64 offset = crtl->outgoing_args_size; |
| gcc_assert (multiple_p (offset, STACK_BOUNDARY / BITS_PER_UNIT)); |
| frame.bytes_below_saved_regs = offset; |
| + frame.sve_save_and_probe = INVALID_REGNUM; |
| |
| /* Now assign stack slots for the registers. Start with the predicate |
| registers, since predicate LDR and STR have a relatively small |
| @@ -8487,6 +8483,8 @@ aarch64_layout_frame (void) |
| for (regno = P0_REGNUM; regno <= P15_REGNUM; regno++) |
| if (known_eq (frame.reg_offset[regno], SLOT_REQUIRED)) |
| { |
| + if (frame.sve_save_and_probe == INVALID_REGNUM) |
| + frame.sve_save_and_probe = regno; |
| frame.reg_offset[regno] = offset; |
| offset += BYTES_PER_SVE_PRED; |
| } |
| @@ -8524,6 +8522,8 @@ aarch64_layout_frame (void) |
| for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++) |
| if (known_eq (frame.reg_offset[regno], SLOT_REQUIRED)) |
| { |
| + if (frame.sve_save_and_probe == INVALID_REGNUM) |
| + frame.sve_save_and_probe = regno; |
| frame.reg_offset[regno] = offset; |
| offset += vector_save_size; |
| } |
| @@ -8533,10 +8533,18 @@ aarch64_layout_frame (void) |
| frame.below_hard_fp_saved_regs_size = offset - frame.bytes_below_saved_regs; |
| bool saves_below_hard_fp_p |
| = maybe_ne (frame.below_hard_fp_saved_regs_size, 0); |
| + gcc_assert (!saves_below_hard_fp_p |
| + || (frame.sve_save_and_probe != INVALID_REGNUM |
| + && known_eq (frame.reg_offset[frame.sve_save_and_probe], |
| + frame.bytes_below_saved_regs))); |
| + |
| frame.bytes_below_hard_fp = offset; |
| + frame.hard_fp_save_and_probe = INVALID_REGNUM; |
| |
| auto allocate_gpr_slot = [&](unsigned int regno) |
| { |
| + if (frame.hard_fp_save_and_probe == INVALID_REGNUM) |
| + frame.hard_fp_save_and_probe = regno; |
| frame.reg_offset[regno] = offset; |
| if (frame.wb_push_candidate1 == INVALID_REGNUM) |
| frame.wb_push_candidate1 = regno; |
| @@ -8570,6 +8578,8 @@ aarch64_layout_frame (void) |
| for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++) |
| if (known_eq (frame.reg_offset[regno], SLOT_REQUIRED)) |
| { |
| + if (frame.hard_fp_save_and_probe == INVALID_REGNUM) |
| + frame.hard_fp_save_and_probe = regno; |
| /* If there is an alignment gap between integer and fp callee-saves, |
| allocate the last fp register to it if possible. */ |
| if (regno == last_fp_reg |
| @@ -8593,6 +8603,17 @@ aarch64_layout_frame (void) |
| offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT); |
| |
| frame.saved_regs_size = offset - frame.bytes_below_saved_regs; |
| + gcc_assert (known_eq (frame.saved_regs_size, |
| + frame.below_hard_fp_saved_regs_size) |
| + || (frame.hard_fp_save_and_probe != INVALID_REGNUM |
| + && known_eq (frame.reg_offset[frame.hard_fp_save_and_probe], |
| + frame.bytes_below_hard_fp))); |
| + |
| + /* With stack-clash, a register must be saved in non-leaf functions. |
| + The saving of the bottommost register counts as an implicit probe, |
| + which allows us to maintain the invariant described in the comment |
| + at expand_prologue. */ |
| + gcc_assert (crtl->is_leaf || maybe_ne (frame.saved_regs_size, 0)); |
| |
| offset += get_frame_size (); |
| offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT); |
| @@ -8723,6 +8744,25 @@ aarch64_layout_frame (void) |
| frame.final_adjust = frame.bytes_below_saved_regs; |
| } |
| |
| + /* The frame is allocated in pieces, with each non-final piece |
| + including a register save at offset 0 that acts as a probe for |
| + the following piece. In addition, the save of the bottommost register |
| + acts as a probe for callees and allocas. Roll back any probes that |
| + aren't needed. |
| + |
| + A probe isn't needed if it is associated with the final allocation |
| + (including callees and allocas) that happens before the epilogue is |
| + executed. */ |
| + if (crtl->is_leaf |
| + && !cfun->calls_alloca |
| + && known_eq (frame.final_adjust, 0)) |
| + { |
| + if (maybe_ne (frame.sve_callee_adjust, 0)) |
| + frame.sve_save_and_probe = INVALID_REGNUM; |
| + else |
| + frame.hard_fp_save_and_probe = INVALID_REGNUM; |
| + } |
| + |
| /* Make sure the individual adjustments add up to the full frame size. */ |
| gcc_assert (known_eq (frame.initial_adjust |
| + frame.callee_adjust |
| @@ -9354,13 +9394,6 @@ aarch64_get_separate_components (void) |
| |
| poly_int64 offset = frame.reg_offset[regno]; |
| |
| - /* If the register is saved in the first SVE save slot, we use |
| - it as a stack probe for -fstack-clash-protection. */ |
| - if (flag_stack_clash_protection |
| - && maybe_ne (frame.below_hard_fp_saved_regs_size, 0) |
| - && known_eq (offset, frame.bytes_below_saved_regs)) |
| - continue; |
| - |
| /* Get the offset relative to the register we'll use. */ |
| if (frame_pointer_needed) |
| offset -= frame.bytes_below_hard_fp; |
| @@ -9395,6 +9428,13 @@ aarch64_get_separate_components (void) |
| |
| bitmap_clear_bit (components, LR_REGNUM); |
| bitmap_clear_bit (components, SP_REGNUM); |
| + if (flag_stack_clash_protection) |
| + { |
| + if (frame.sve_save_and_probe != INVALID_REGNUM) |
| + bitmap_clear_bit (components, frame.sve_save_and_probe); |
| + if (frame.hard_fp_save_and_probe != INVALID_REGNUM) |
| + bitmap_clear_bit (components, frame.hard_fp_save_and_probe); |
| + } |
| |
| return components; |
| } |
| @@ -9931,8 +9971,8 @@ aarch64_epilogue_uses (int regno) |
| When probing is needed, we emit a probe at the start of the prologue |
| and every PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE bytes thereafter. |
| |
| - We have to track how much space has been allocated and the only stores |
| - to the stack we track as implicit probes are the FP/LR stores. |
| + We can also use register saves as probes. These are stored in |
| + sve_save_and_probe and hard_fp_save_and_probe. |
| |
| For outgoing arguments we probe if the size is larger than 1KB, such that |
| the ABI specified buffer is maintained for the next callee. |
| diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h |
| index c8becb098c8..fbfb73545ba 100644 |
| --- a/gcc/config/aarch64/aarch64.h |
| +++ b/gcc/config/aarch64/aarch64.h |
| @@ -863,6 +863,14 @@ struct GTY (()) aarch64_frame |
| This is the register they should use. */ |
| unsigned spare_pred_reg; |
| |
| + /* An SVE register that is saved below the hard frame pointer and that acts |
| + as a probe for later allocations, or INVALID_REGNUM if none. */ |
| + unsigned sve_save_and_probe; |
| + |
| + /* A register that is saved at the hard frame pointer and that acts |
| + as a probe for later allocations, or INVALID_REGNUM if none. */ |
| + unsigned hard_fp_save_and_probe; |
| + |
| bool laid_out; |
| |
| /* True if shadow call stack should be enabled for the current function. */ |
| diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_3.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_3.c |
| index 3e01ec36c3a..3530a0d504b 100644 |
| --- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_3.c |
| +++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_3.c |
| @@ -11,11 +11,10 @@ |
| ** mov x11, sp |
| ** ... |
| ** sub sp, sp, x13 |
| -** str p4, \[sp\] |
| ** cbz w0, [^\n]* |
| +** str p4, \[sp\] |
| ** ... |
| ** ptrue p0\.b, all |
| -** ldr p4, \[sp\] |
| ** addvl sp, sp, #1 |
| ** ldr x24, \[sp\], 32 |
| ** ret |
| @@ -39,13 +38,12 @@ test_1 (int n) |
| ** mov x11, sp |
| ** ... |
| ** sub sp, sp, x13 |
| -** str p4, \[sp\] |
| ** cbz w0, [^\n]* |
| +** str p4, \[sp\] |
| ** str p5, \[sp, #1, mul vl\] |
| ** str p6, \[sp, #2, mul vl\] |
| ** ... |
| ** ptrue p0\.b, all |
| -** ldr p4, \[sp\] |
| ** addvl sp, sp, #1 |
| ** ldr x24, \[sp\], 32 |
| ** ret |
| -- |
| 2.34.1 |
| |
| |
| From 56df065080950bb30dda9c260f71be54269bdda5 Mon Sep 17 00:00:00 2001 |
| From: Richard Sandiford <richard.sandiford@arm.com> |
| Date: Tue, 12 Sep 2023 16:07:20 +0100 |
| Subject: [PATCH 18/19] aarch64: Remove below_hard_fp_saved_regs_size |
| |
| After previous patches, it's no longer necessary to store |
| saved_regs_size and below_hard_fp_saved_regs_size in the frame info. |
| All measurements instead use the top or bottom of the frame as |
| reference points. |
| |
| gcc/ |
| * config/aarch64/aarch64.h (aarch64_frame::saved_regs_size) |
| (aarch64_frame::below_hard_fp_saved_regs_size): Delete. |
| * config/aarch64/aarch64.cc (aarch64_layout_frame): Update accordingly. |
| --- |
| gcc/config/aarch64/aarch64.cc | 45 ++++++++++++++++------------------- |
| gcc/config/aarch64/aarch64.h | 7 ------ |
| 2 files changed, 21 insertions(+), 31 deletions(-) |
| |
| diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc |
| index 6c59c39a639..b95e805a8cc 100644 |
| --- a/gcc/config/aarch64/aarch64.cc |
| +++ b/gcc/config/aarch64/aarch64.cc |
| @@ -8530,9 +8530,8 @@ aarch64_layout_frame (void) |
| |
| /* OFFSET is now the offset of the hard frame pointer from the bottom |
| of the callee save area. */ |
| - frame.below_hard_fp_saved_regs_size = offset - frame.bytes_below_saved_regs; |
| - bool saves_below_hard_fp_p |
| - = maybe_ne (frame.below_hard_fp_saved_regs_size, 0); |
| + auto below_hard_fp_saved_regs_size = offset - frame.bytes_below_saved_regs; |
| + bool saves_below_hard_fp_p = maybe_ne (below_hard_fp_saved_regs_size, 0); |
| gcc_assert (!saves_below_hard_fp_p |
| || (frame.sve_save_and_probe != INVALID_REGNUM |
| && known_eq (frame.reg_offset[frame.sve_save_and_probe], |
| @@ -8602,9 +8601,8 @@ aarch64_layout_frame (void) |
| |
| offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT); |
| |
| - frame.saved_regs_size = offset - frame.bytes_below_saved_regs; |
| - gcc_assert (known_eq (frame.saved_regs_size, |
| - frame.below_hard_fp_saved_regs_size) |
| + auto saved_regs_size = offset - frame.bytes_below_saved_regs; |
| + gcc_assert (known_eq (saved_regs_size, below_hard_fp_saved_regs_size) |
| || (frame.hard_fp_save_and_probe != INVALID_REGNUM |
| && known_eq (frame.reg_offset[frame.hard_fp_save_and_probe], |
| frame.bytes_below_hard_fp))); |
| @@ -8613,7 +8611,7 @@ aarch64_layout_frame (void) |
| The saving of the bottommost register counts as an implicit probe, |
| which allows us to maintain the invariant described in the comment |
| at expand_prologue. */ |
| - gcc_assert (crtl->is_leaf || maybe_ne (frame.saved_regs_size, 0)); |
| + gcc_assert (crtl->is_leaf || maybe_ne (saved_regs_size, 0)); |
| |
| offset += get_frame_size (); |
| offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT); |
| @@ -8670,7 +8668,7 @@ aarch64_layout_frame (void) |
| |
| HOST_WIDE_INT const_size, const_below_saved_regs, const_above_fp; |
| HOST_WIDE_INT const_saved_regs_size; |
| - if (known_eq (frame.saved_regs_size, 0)) |
| + if (known_eq (saved_regs_size, 0)) |
| frame.initial_adjust = frame.frame_size; |
| else if (frame.frame_size.is_constant (&const_size) |
| && const_size < max_push_offset |
| @@ -8683,7 +8681,7 @@ aarch64_layout_frame (void) |
| frame.callee_adjust = const_size; |
| } |
| else if (frame.bytes_below_saved_regs.is_constant (&const_below_saved_regs) |
| - && frame.saved_regs_size.is_constant (&const_saved_regs_size) |
| + && saved_regs_size.is_constant (&const_saved_regs_size) |
| && const_below_saved_regs + const_saved_regs_size < 512 |
| /* We could handle this case even with data below the saved |
| registers, provided that that data left us with valid offsets |
| @@ -8702,8 +8700,7 @@ aarch64_layout_frame (void) |
| frame.initial_adjust = frame.frame_size; |
| } |
| else if (saves_below_hard_fp_p |
| - && known_eq (frame.saved_regs_size, |
| - frame.below_hard_fp_saved_regs_size)) |
| + && known_eq (saved_regs_size, below_hard_fp_saved_regs_size)) |
| { |
| /* Frame in which all saves are SVE saves: |
| |
| @@ -8725,7 +8722,7 @@ aarch64_layout_frame (void) |
| [save SVE registers relative to SP] |
| sub sp, sp, bytes_below_saved_regs */ |
| frame.callee_adjust = const_above_fp; |
| - frame.sve_callee_adjust = frame.below_hard_fp_saved_regs_size; |
| + frame.sve_callee_adjust = below_hard_fp_saved_regs_size; |
| frame.final_adjust = frame.bytes_below_saved_regs; |
| } |
| else |
| @@ -8740,7 +8737,7 @@ aarch64_layout_frame (void) |
| [save SVE registers relative to SP] |
| sub sp, sp, bytes_below_saved_regs */ |
| frame.initial_adjust = frame.bytes_above_hard_fp; |
| - frame.sve_callee_adjust = frame.below_hard_fp_saved_regs_size; |
| + frame.sve_callee_adjust = below_hard_fp_saved_regs_size; |
| frame.final_adjust = frame.bytes_below_saved_regs; |
| } |
| |
| @@ -9936,17 +9933,17 @@ aarch64_epilogue_uses (int regno) |
| | local variables | <-- frame_pointer_rtx |
| | | |
| +-------------------------------+ |
| - | padding | \ |
| - +-------------------------------+ | |
| - | callee-saved registers | | frame.saved_regs_size |
| - +-------------------------------+ | |
| - | LR' | | |
| - +-------------------------------+ | |
| - | FP' | | |
| - +-------------------------------+ |<- hard_frame_pointer_rtx (aligned) |
| - | SVE vector registers | | \ |
| - +-------------------------------+ | | below_hard_fp_saved_regs_size |
| - | SVE predicate registers | / / |
| + | padding | |
| + +-------------------------------+ |
| + | callee-saved registers | |
| + +-------------------------------+ |
| + | LR' | |
| + +-------------------------------+ |
| + | FP' | |
| + +-------------------------------+ <-- hard_frame_pointer_rtx (aligned) |
| + | SVE vector registers | |
| + +-------------------------------+ |
| + | SVE predicate registers | |
| +-------------------------------+ |
| | dynamic allocation | |
| +-------------------------------+ |
| diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h |
| index fbfb73545ba..cfeaf4657ab 100644 |
| --- a/gcc/config/aarch64/aarch64.h |
| +++ b/gcc/config/aarch64/aarch64.h |
| @@ -777,18 +777,11 @@ struct GTY (()) aarch64_frame |
| STACK_BOUNDARY. */ |
| HOST_WIDE_INT saved_varargs_size; |
| |
| - /* The size of the callee-save registers with a slot in REG_OFFSET. */ |
| - poly_int64 saved_regs_size; |
| - |
| /* The number of bytes between the bottom of the static frame (the bottom |
| of the outgoing arguments) and the bottom of the register save area. |
| This value is always a multiple of STACK_BOUNDARY. */ |
| poly_int64 bytes_below_saved_regs; |
| |
| - /* The size of the callee-save registers with a slot in REG_OFFSET that |
| - are saved below the hard frame pointer. */ |
| - poly_int64 below_hard_fp_saved_regs_size; |
| - |
| /* The number of bytes between the bottom of the static frame (the bottom |
| of the outgoing arguments) and the hard frame pointer. This value is |
| always a multiple of STACK_BOUNDARY. */ |
| -- |
| 2.34.1 |
| |
| |
| From b96e66fd4ef3e36983969fb8cdd1956f551a074b Mon Sep 17 00:00:00 2001 |
| From: Richard Sandiford <richard.sandiford@arm.com> |
| Date: Tue, 12 Sep 2023 16:07:21 +0100 |
| Subject: [PATCH 19/19] aarch64: Make stack smash canary protect saved |
| registers |
| |
| AArch64 normally puts the saved registers near the bottom of the frame, |
| immediately above any dynamic allocations. But this means that a |
| stack-smash attack on those dynamic allocations could overwrite the |
| saved registers without needing to reach as far as the stack smash |
| canary. |
| |
| The same thing could also happen for variable-sized arguments that are |
| passed by value, since those are allocated before a call and popped on |
| return. |
| |
| This patch avoids that by putting the locals (and thus the canary) below |
| the saved registers when stack smash protection is active. |
| |
| The patch fixes CVE-2023-4039. |
| |
| gcc/ |
| * config/aarch64/aarch64.cc (aarch64_save_regs_above_locals_p): |
| New function. |
| (aarch64_layout_frame): Use it to decide whether locals should |
| go above or below the saved registers. |
| (aarch64_expand_prologue): Update stack layout comment. |
| Emit a stack tie after the final adjustment. |
| |
| gcc/testsuite/ |
| * gcc.target/aarch64/stack-protector-8.c: New test. |
| * gcc.target/aarch64/stack-protector-9.c: Likewise. |
| --- |
| gcc/config/aarch64/aarch64.cc | 46 +++++++-- |
| .../gcc.target/aarch64/stack-protector-8.c | 95 +++++++++++++++++++ |
| .../gcc.target/aarch64/stack-protector-9.c | 33 +++++++ |
| 3 files changed, 168 insertions(+), 6 deletions(-) |
| create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-protector-8.c |
| create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-protector-9.c |
| |
| diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc |
| index b95e805a8cc..389c0e29353 100644 |
| --- a/gcc/config/aarch64/aarch64.cc |
| +++ b/gcc/config/aarch64/aarch64.cc |
| @@ -8394,6 +8394,20 @@ aarch64_needs_frame_chain (void) |
| return aarch64_use_frame_pointer; |
| } |
| |
| +/* Return true if the current function should save registers above |
| + the locals area, rather than below it. */ |
| + |
| +static bool |
| +aarch64_save_regs_above_locals_p () |
| +{ |
| + /* When using stack smash protection, make sure that the canary slot |
| + comes between the locals and the saved registers. Otherwise, |
| + it would be possible for a carefully sized smash attack to change |
| + the saved registers (particularly LR and FP) without reaching the |
| + canary. */ |
| + return crtl->stack_protect_guard; |
| +} |
| + |
| /* Mark the registers that need to be saved by the callee and calculate |
| the size of the callee-saved registers area and frame record (both FP |
| and LR may be omitted). */ |
| @@ -8405,6 +8419,7 @@ aarch64_layout_frame (void) |
| poly_int64 vector_save_size = GET_MODE_SIZE (vector_save_mode); |
| bool frame_related_fp_reg_p = false; |
| aarch64_frame &frame = cfun->machine->frame; |
| + poly_int64 top_of_locals = -1; |
| |
| frame.emit_frame_chain = aarch64_needs_frame_chain (); |
| |
| @@ -8471,9 +8486,16 @@ aarch64_layout_frame (void) |
| && !crtl->abi->clobbers_full_reg_p (regno)) |
| frame.reg_offset[regno] = SLOT_REQUIRED; |
| |
| + bool regs_at_top_p = aarch64_save_regs_above_locals_p (); |
| |
| poly_int64 offset = crtl->outgoing_args_size; |
| gcc_assert (multiple_p (offset, STACK_BOUNDARY / BITS_PER_UNIT)); |
| + if (regs_at_top_p) |
| + { |
| + offset += get_frame_size (); |
| + offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT); |
| + top_of_locals = offset; |
| + } |
| frame.bytes_below_saved_regs = offset; |
| frame.sve_save_and_probe = INVALID_REGNUM; |
| |
| @@ -8613,15 +8635,18 @@ aarch64_layout_frame (void) |
| at expand_prologue. */ |
| gcc_assert (crtl->is_leaf || maybe_ne (saved_regs_size, 0)); |
| |
| - offset += get_frame_size (); |
| - offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT); |
| - auto top_of_locals = offset; |
| - |
| + if (!regs_at_top_p) |
| + { |
| + offset += get_frame_size (); |
| + offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT); |
| + top_of_locals = offset; |
| + } |
| offset += frame.saved_varargs_size; |
| gcc_assert (multiple_p (offset, STACK_BOUNDARY / BITS_PER_UNIT)); |
| frame.frame_size = offset; |
| |
| frame.bytes_above_hard_fp = frame.frame_size - frame.bytes_below_hard_fp; |
| + gcc_assert (known_ge (top_of_locals, 0)); |
| frame.bytes_above_locals = frame.frame_size - top_of_locals; |
| |
| frame.initial_adjust = 0; |
| @@ -9930,10 +9955,10 @@ aarch64_epilogue_uses (int regno) |
| | for register varargs | |
| | | |
| +-------------------------------+ |
| - | local variables | <-- frame_pointer_rtx |
| + | local variables (1) | <-- frame_pointer_rtx |
| | | |
| +-------------------------------+ |
| - | padding | |
| + | padding (1) | |
| +-------------------------------+ |
| | callee-saved registers | |
| +-------------------------------+ |
| @@ -9945,6 +9970,10 @@ aarch64_epilogue_uses (int regno) |
| +-------------------------------+ |
| | SVE predicate registers | |
| +-------------------------------+ |
| + | local variables (2) | |
| + +-------------------------------+ |
| + | padding (2) | |
| + +-------------------------------+ |
| | dynamic allocation | |
| +-------------------------------+ |
| | padding | |
| @@ -9954,6 +9983,9 @@ aarch64_epilogue_uses (int regno) |
| +-------------------------------+ |
| | | <-- stack_pointer_rtx (aligned) |
| |
| + The regions marked (1) and (2) are mutually exclusive. (2) is used |
| + when aarch64_save_regs_above_locals_p is true. |
| + |
| Dynamic stack allocations via alloca() decrease stack_pointer_rtx |
| but leave frame_pointer_rtx and hard_frame_pointer_rtx |
| unchanged. |
| @@ -10149,6 +10181,8 @@ aarch64_expand_prologue (void) |
| gcc_assert (known_eq (bytes_below_sp, final_adjust)); |
| aarch64_allocate_and_probe_stack_space (tmp1_rtx, tmp0_rtx, final_adjust, |
| !frame_pointer_needed, true); |
| + if (emit_frame_chain && maybe_ne (final_adjust, 0)) |
| + emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx)); |
| } |
| |
| /* Return TRUE if we can use a simple_return insn. |
| diff --git a/gcc/testsuite/gcc.target/aarch64/stack-protector-8.c b/gcc/testsuite/gcc.target/aarch64/stack-protector-8.c |
| new file mode 100644 |
| index 00000000000..e71d820e365 |
| --- /dev/null |
| +++ b/gcc/testsuite/gcc.target/aarch64/stack-protector-8.c |
| @@ -0,0 +1,95 @@ |
| +/* { dg-options " -O -fstack-protector-strong -mstack-protector-guard=sysreg -mstack-protector-guard-reg=tpidr2_el0 -mstack-protector-guard-offset=16" } */ |
| +/* { dg-final { check-function-bodies "**" "" } } */ |
| + |
| +void g(void *); |
| +__SVBool_t *h(void *); |
| + |
| +/* |
| +** test1: |
| +** sub sp, sp, #288 |
| +** stp x29, x30, \[sp, #?272\] |
| +** add x29, sp, #?272 |
| +** mrs (x[0-9]+), tpidr2_el0 |
| +** ldr (x[0-9]+), \[\1, #?16\] |
| +** str \2, \[sp, #?264\] |
| +** mov \2, #?0 |
| +** add x0, sp, #?8 |
| +** bl g |
| +** ... |
| +** mrs .* |
| +** ... |
| +** bne .* |
| +** ... |
| +** ldp x29, x30, \[sp, #?272\] |
| +** add sp, sp, #?288 |
| +** ret |
| +** bl __stack_chk_fail |
| +*/ |
| +int test1() { |
| + int y[0x40]; |
| + g(y); |
| + return 1; |
| +} |
| + |
| +/* |
| +** test2: |
| +** stp x29, x30, \[sp, #?-16\]! |
| +** mov x29, sp |
| +** sub sp, sp, #1040 |
| +** mrs (x[0-9]+), tpidr2_el0 |
| +** ldr (x[0-9]+), \[\1, #?16\] |
| +** str \2, \[sp, #?1032\] |
| +** mov \2, #?0 |
| +** add x0, sp, #?8 |
| +** bl g |
| +** ... |
| +** mrs .* |
| +** ... |
| +** bne .* |
| +** ... |
| +** add sp, sp, #?1040 |
| +** ldp x29, x30, \[sp\], #?16 |
| +** ret |
| +** bl __stack_chk_fail |
| +*/ |
| +int test2() { |
| + int y[0x100]; |
| + g(y); |
| + return 1; |
| +} |
| + |
| +#pragma GCC target "+sve" |
| + |
| +/* |
| +** test3: |
| +** stp x29, x30, \[sp, #?-16\]! |
| +** mov x29, sp |
| +** addvl sp, sp, #-18 |
| +** ... |
| +** str p4, \[sp\] |
| +** ... |
| +** sub sp, sp, #272 |
| +** mrs (x[0-9]+), tpidr2_el0 |
| +** ldr (x[0-9]+), \[\1, #?16\] |
| +** str \2, \[sp, #?264\] |
| +** mov \2, #?0 |
| +** add x0, sp, #?8 |
| +** bl h |
| +** ... |
| +** mrs .* |
| +** ... |
| +** bne .* |
| +** ... |
| +** add sp, sp, #?272 |
| +** ... |
| +** ldr p4, \[sp\] |
| +** ... |
| +** addvl sp, sp, #18 |
| +** ldp x29, x30, \[sp\], #?16 |
| +** ret |
| +** bl __stack_chk_fail |
| +*/ |
| +__SVBool_t test3() { |
| + int y[0x40]; |
| + return *h(y); |
| +} |
| diff --git a/gcc/testsuite/gcc.target/aarch64/stack-protector-9.c b/gcc/testsuite/gcc.target/aarch64/stack-protector-9.c |
| new file mode 100644 |
| index 00000000000..58f322aa480 |
| --- /dev/null |
| +++ b/gcc/testsuite/gcc.target/aarch64/stack-protector-9.c |
| @@ -0,0 +1,33 @@ |
| +/* { dg-options "-O2 -mcpu=neoverse-v1 -fstack-protector-all" } */ |
| +/* { dg-final { check-function-bodies "**" "" } } */ |
| + |
| +/* |
| +** main: |
| +** ... |
| +** stp x29, x30, \[sp, #?-[0-9]+\]! |
| +** ... |
| +** sub sp, sp, #[0-9]+ |
| +** ... |
| +** str x[0-9]+, \[x29, #?-8\] |
| +** ... |
| +*/ |
| +int f(const char *); |
| +void g(void *); |
| +int main(int argc, char* argv[]) |
| +{ |
| + int a; |
| + int b; |
| + char c[2+f(argv[1])]; |
| + int d[0x100]; |
| + char y; |
| + |
| + y=42; a=4; b=10; |
| + c[0] = 'h'; c[1] = '\0'; |
| + |
| + c[f(argv[2])] = '\0'; |
| + |
| + __builtin_printf("%d %d\n%s\n", a, b, c); |
| + g(d); |
| + |
| + return 0; |
| +} |
| -- |
| 2.34.1 |
| |