target/arm: Convert ADDV, *ADDLV, *MAXV, *MINV to decodetree
Reviewed-by: Peter Maydell <peter.maydell@linaro.org> Signed-off-by: Richard Henderson <richard.henderson@linaro.org> Message-id: 20240912024114.1097832-11-richard.henderson@linaro.org Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
		
							parent
							
								
									d944e04961
								
							
						
					
					
						commit
						cc7ece7216
					
				| @ -59,6 +59,8 @@ | ||||
| @rrr_q1e3       ........ ... rm:5 ...... rn:5 rd:5      &qrrr_e q=1 esz=3 | ||||
| @rrrr_q1e3      ........ ... rm:5 . ra:5 rn:5 rd:5      &qrrrr_e q=1 esz=3 | ||||
| 
 | ||||
| @qrr_e          . q:1 ...... esz:2 ...... ...... rn:5 rd:5  &qrr_e | ||||
| 
 | ||||
| @qrrr_b         . q:1 ...... ... rm:5 ...... rn:5 rd:5  &qrrr_e esz=0 | ||||
| @qrrr_h         . q:1 ...... ... rm:5 ...... rn:5 rd:5  &qrrr_e esz=1 | ||||
| @qrrr_s         . q:1 ...... ... rm:5 ...... rn:5 rd:5  &qrrr_e esz=2 | ||||
| @ -1154,3 +1156,13 @@ TRN1            0.00 1110 .. 0 ..... 0 010 10 ..... .....   @qrrr_e | ||||
| TRN2            0.00 1110 .. 0 ..... 0 110 10 ..... .....   @qrrr_e | ||||
| ZIP1            0.00 1110 .. 0 ..... 0 011 10 ..... .....   @qrrr_e | ||||
| ZIP2            0.00 1110 .. 0 ..... 0 111 10 ..... .....   @qrrr_e | ||||
| 
 | ||||
| # Advanced SIMD Across Lanes | ||||
| 
 | ||||
| ADDV            0.00 1110 .. 11000 11011 10 ..... .....     @qrr_e | ||||
| SADDLV          0.00 1110 .. 11000 00011 10 ..... .....     @qrr_e | ||||
| UADDLV          0.10 1110 .. 11000 00011 10 ..... .....     @qrr_e | ||||
| SMAXV           0.00 1110 .. 11000 01010 10 ..... .....     @qrr_e | ||||
| UMAXV           0.10 1110 .. 11000 01010 10 ..... .....     @qrr_e | ||||
| SMINV           0.00 1110 .. 11000 11010 10 ..... .....     @qrr_e | ||||
| UMINV           0.10 1110 .. 11000 11010 10 ..... .....     @qrr_e | ||||
|  | ||||
| @ -6794,6 +6794,47 @@ TRANS(FNMADD, do_fmadd, a, true, true) | ||||
| TRANS(FMSUB, do_fmadd, a, false, true) | ||||
| TRANS(FNMSUB, do_fmadd, a, true, false) | ||||
| 
 | ||||
| /*
 | ||||
|  * Advanced SIMD Across Lanes | ||||
|  */ | ||||
| 
 | ||||
| static bool do_int_reduction(DisasContext *s, arg_qrr_e *a, bool widen, | ||||
|                              MemOp src_sign, NeonGenTwo64OpFn *fn) | ||||
| { | ||||
|     TCGv_i64 tcg_res, tcg_elt; | ||||
|     MemOp src_mop = a->esz | src_sign; | ||||
|     int elements = (a->q ? 16 : 8) >> a->esz; | ||||
| 
 | ||||
|     /* Reject MO_64, and MO_32 without Q: a minimum of 4 elements. */ | ||||
|     if (elements < 4) { | ||||
|         return false; | ||||
|     } | ||||
|     if (!fp_access_check(s)) { | ||||
|         return true; | ||||
|     } | ||||
| 
 | ||||
|     tcg_res = tcg_temp_new_i64(); | ||||
|     tcg_elt = tcg_temp_new_i64(); | ||||
| 
 | ||||
|     read_vec_element(s, tcg_res, a->rn, 0, src_mop); | ||||
|     for (int i = 1; i < elements; i++) { | ||||
|         read_vec_element(s, tcg_elt, a->rn, i, src_mop); | ||||
|         fn(tcg_res, tcg_res, tcg_elt); | ||||
|     } | ||||
| 
 | ||||
|     tcg_gen_ext_i64(tcg_res, tcg_res, a->esz + widen); | ||||
|     write_fp_dreg(s, a->rd, tcg_res); | ||||
|     return true; | ||||
| } | ||||
| 
 | ||||
| TRANS(ADDV, do_int_reduction, a, false, 0, tcg_gen_add_i64) | ||||
| TRANS(SADDLV, do_int_reduction, a, true, MO_SIGN, tcg_gen_add_i64) | ||||
| TRANS(UADDLV, do_int_reduction, a, true, 0, tcg_gen_add_i64) | ||||
| TRANS(SMAXV, do_int_reduction, a, false, MO_SIGN, tcg_gen_smax_i64) | ||||
| TRANS(UMAXV, do_int_reduction, a, false, 0, tcg_gen_umax_i64) | ||||
| TRANS(SMINV, do_int_reduction, a, false, MO_SIGN, tcg_gen_smin_i64) | ||||
| TRANS(UMINV, do_int_reduction, a, false, 0, tcg_gen_umin_i64) | ||||
| 
 | ||||
| /* Shift a TCGv src by TCGv shift_amount, put result in dst.
 | ||||
|  * Note that it is the caller's responsibility to ensure that the | ||||
|  * shift amount is in range (ie 0..31 or 0..63) and provide the ARM | ||||
| @ -9092,27 +9133,10 @@ static void disas_simd_across_lanes(DisasContext *s, uint32_t insn) | ||||
|     int opcode = extract32(insn, 12, 5); | ||||
|     bool is_q = extract32(insn, 30, 1); | ||||
|     bool is_u = extract32(insn, 29, 1); | ||||
|     bool is_fp = false; | ||||
|     bool is_min = false; | ||||
|     int elements; | ||||
|     int i; | ||||
|     TCGv_i64 tcg_res, tcg_elt; | ||||
| 
 | ||||
|     switch (opcode) { | ||||
|     case 0x1b: /* ADDV */ | ||||
|         if (is_u) { | ||||
|             unallocated_encoding(s); | ||||
|             return; | ||||
|         } | ||||
|         /* fall through */ | ||||
|     case 0x3: /* SADDLV, UADDLV */ | ||||
|     case 0xa: /* SMAXV, UMAXV */ | ||||
|     case 0x1a: /* SMINV, UMINV */ | ||||
|         if (size == 3 || (size == 2 && !is_q)) { | ||||
|             unallocated_encoding(s); | ||||
|             return; | ||||
|         } | ||||
|         break; | ||||
|     case 0xc: /* FMAXNMV, FMINNMV */ | ||||
|     case 0xf: /* FMAXV, FMINV */ | ||||
|         /* Bit 1 of size field encodes min vs max and the actual size
 | ||||
| @ -9121,7 +9145,6 @@ static void disas_simd_across_lanes(DisasContext *s, uint32_t insn) | ||||
|          * precision. | ||||
|          */ | ||||
|         is_min = extract32(size, 1, 1); | ||||
|         is_fp = true; | ||||
|         if (!is_u && dc_isar_feature(aa64_fp16, s)) { | ||||
|             size = 1; | ||||
|         } else if (!is_u || !is_q || extract32(size, 0, 1)) { | ||||
| @ -9132,6 +9155,10 @@ static void disas_simd_across_lanes(DisasContext *s, uint32_t insn) | ||||
|         } | ||||
|         break; | ||||
|     default: | ||||
|     case 0x3: /* SADDLV, UADDLV */ | ||||
|     case 0xa: /* SMAXV, UMAXV */ | ||||
|     case 0x1a: /* SMINV, UMINV */ | ||||
|     case 0x1b: /* ADDV */ | ||||
|         unallocated_encoding(s); | ||||
|         return; | ||||
|     } | ||||
| @ -9142,52 +9169,7 @@ static void disas_simd_across_lanes(DisasContext *s, uint32_t insn) | ||||
| 
 | ||||
|     elements = (is_q ? 16 : 8) >> size; | ||||
| 
 | ||||
|     tcg_res = tcg_temp_new_i64(); | ||||
|     tcg_elt = tcg_temp_new_i64(); | ||||
| 
 | ||||
|     /* These instructions operate across all lanes of a vector
 | ||||
|      * to produce a single result. We can guarantee that a 64 | ||||
|      * bit intermediate is sufficient: | ||||
|      *  + for [US]ADDLV the maximum element size is 32 bits, and | ||||
|      *    the result type is 64 bits | ||||
|      *  + for FMAX*V, FMIN*V, ADDV the intermediate type is the | ||||
|      *    same as the element size, which is 32 bits at most | ||||
|      * For the integer operations we can choose to work at 64 | ||||
|      * or 32 bits and truncate at the end; for simplicity | ||||
|      * we use 64 bits always. The floating point | ||||
|      * ops do require 32 bit intermediates, though. | ||||
|      */ | ||||
|     if (!is_fp) { | ||||
|         read_vec_element(s, tcg_res, rn, 0, size | (is_u ? 0 : MO_SIGN)); | ||||
| 
 | ||||
|         for (i = 1; i < elements; i++) { | ||||
|             read_vec_element(s, tcg_elt, rn, i, size | (is_u ? 0 : MO_SIGN)); | ||||
| 
 | ||||
|             switch (opcode) { | ||||
|             case 0x03: /* SADDLV / UADDLV */ | ||||
|             case 0x1b: /* ADDV */ | ||||
|                 tcg_gen_add_i64(tcg_res, tcg_res, tcg_elt); | ||||
|                 break; | ||||
|             case 0x0a: /* SMAXV / UMAXV */ | ||||
|                 if (is_u) { | ||||
|                     tcg_gen_umax_i64(tcg_res, tcg_res, tcg_elt); | ||||
|                 } else { | ||||
|                     tcg_gen_smax_i64(tcg_res, tcg_res, tcg_elt); | ||||
|                 } | ||||
|                 break; | ||||
|             case 0x1a: /* SMINV / UMINV */ | ||||
|                 if (is_u) { | ||||
|                     tcg_gen_umin_i64(tcg_res, tcg_res, tcg_elt); | ||||
|                 } else { | ||||
|                     tcg_gen_smin_i64(tcg_res, tcg_res, tcg_elt); | ||||
|                 } | ||||
|                 break; | ||||
|             default: | ||||
|                 g_assert_not_reached(); | ||||
|             } | ||||
| 
 | ||||
|         } | ||||
|     } else { | ||||
|     { | ||||
|         /* Floating point vector reduction ops which work across 32
 | ||||
|          * bit (single) or 16 bit (half-precision) intermediates. | ||||
|          * Note that correct NaN propagation requires that we do these | ||||
| @ -9195,34 +9177,10 @@ static void disas_simd_across_lanes(DisasContext *s, uint32_t insn) | ||||
|          */ | ||||
|         TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); | ||||
|         int fpopcode = opcode | is_min << 4 | is_u << 5; | ||||
|         TCGv_i32 tcg_res32 = do_reduction_op(s, fpopcode, rn, size, | ||||
|                                              0, elements, fpst); | ||||
|         tcg_gen_extu_i32_i64(tcg_res, tcg_res32); | ||||
|         TCGv_i32 tcg_res = do_reduction_op(s, fpopcode, rn, size, | ||||
|                                            0, elements, fpst); | ||||
|         write_fp_sreg(s, rd, tcg_res); | ||||
|     } | ||||
| 
 | ||||
|     /* Now truncate the result to the width required for the final output */ | ||||
|     if (opcode == 0x03) { | ||||
|         /* SADDLV, UADDLV: result is 2*esize */ | ||||
|         size++; | ||||
|     } | ||||
| 
 | ||||
|     switch (size) { | ||||
|     case 0: | ||||
|         tcg_gen_ext8u_i64(tcg_res, tcg_res); | ||||
|         break; | ||||
|     case 1: | ||||
|         tcg_gen_ext16u_i64(tcg_res, tcg_res); | ||||
|         break; | ||||
|     case 2: | ||||
|         tcg_gen_ext32u_i64(tcg_res, tcg_res); | ||||
|         break; | ||||
|     case 3: | ||||
|         break; | ||||
|     default: | ||||
|         g_assert_not_reached(); | ||||
|     } | ||||
| 
 | ||||
|     write_fp_dreg(s, rd, tcg_res); | ||||
| } | ||||
| 
 | ||||
| /* AdvSIMD modified immediate
 | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 Richard Henderson
						Richard Henderson