target-arm: convert NEON VZIP/VUZP/VTRN helper functions to pure TCG
The neon_trn_u8, neon_trn_u16, neon_unzip_u8, neon_zip_u8 and neon_zip_u16 helpers used fixed registers to return values. This patch replaces that with TCG code, so T0/T1 is no longer directly used by the helper functions. Bugs in the gen_neon_unzip register load code were also fixed. Signed-off-by: Filip Navara <filip.navara@gmail.com> Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
This commit is contained in:
		
							parent
							
								
									a6445c52aa
								
							
						
					
					
						commit
						194576157a
					
				@ -338,12 +338,6 @@ DEF_HELPER_2(neon_qneg_s8, i32, env, i32)
 | 
			
		||||
DEF_HELPER_2(neon_qneg_s16, i32, env, i32)
 | 
			
		||||
DEF_HELPER_2(neon_qneg_s32, i32, env, i32)
 | 
			
		||||
 | 
			
		||||
DEF_HELPER_0(neon_trn_u8, void)
 | 
			
		||||
DEF_HELPER_0(neon_trn_u16, void)
 | 
			
		||||
DEF_HELPER_0(neon_unzip_u8, void)
 | 
			
		||||
DEF_HELPER_0(neon_zip_u8, void)
 | 
			
		||||
DEF_HELPER_0(neon_zip_u16, void)
 | 
			
		||||
 | 
			
		||||
DEF_HELPER_2(neon_min_f32, i32, i32, i32)
 | 
			
		||||
DEF_HELPER_2(neon_max_f32, i32, i32, i32)
 | 
			
		||||
DEF_HELPER_2(neon_abd_f32, i32, i32, i32)
 | 
			
		||||
 | 
			
		||||
@ -495,61 +495,3 @@ uint64_t HELPER(neon_sub_saturate_u64)(uint64_t src1, uint64_t src2)
 | 
			
		||||
    }
 | 
			
		||||
    return res;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* These need to return a pair of value, so still use T0/T1.  */
 | 
			
		||||
/* Transpose.  Argument order is rather strange to avoid special casing
 | 
			
		||||
   the tranlation code.
 | 
			
		||||
   On input T0 = rm, T1 = rd.  On output T0 = rd, T1 = rm  */
 | 
			
		||||
void HELPER(neon_trn_u8)(void)
 | 
			
		||||
{
 | 
			
		||||
    uint32_t rd;
 | 
			
		||||
    uint32_t rm;
 | 
			
		||||
    rd = ((T0 & 0x00ff00ff) << 8) | (T1 & 0x00ff00ff);
 | 
			
		||||
    rm = ((T1 & 0xff00ff00) >> 8) | (T0 & 0xff00ff00);
 | 
			
		||||
    T0 = rd;
 | 
			
		||||
    T1 = rm;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void HELPER(neon_trn_u16)(void)
 | 
			
		||||
{
 | 
			
		||||
    uint32_t rd;
 | 
			
		||||
    uint32_t rm;
 | 
			
		||||
    rd = (T0 << 16) | (T1 & 0xffff);
 | 
			
		||||
    rm = (T1 >> 16) | (T0 & 0xffff0000);
 | 
			
		||||
    T0 = rd;
 | 
			
		||||
    T1 = rm;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* Worker routines for zip and unzip.  */
 | 
			
		||||
void HELPER(neon_unzip_u8)(void)
 | 
			
		||||
{
 | 
			
		||||
    uint32_t rd;
 | 
			
		||||
    uint32_t rm;
 | 
			
		||||
    rd = (T0 & 0xff) | ((T0 >> 8) & 0xff00)
 | 
			
		||||
         | ((T1 << 16) & 0xff0000) | ((T1 << 8) & 0xff000000);
 | 
			
		||||
    rm = ((T0 >> 8) & 0xff) | ((T0 >> 16) & 0xff00)
 | 
			
		||||
         | ((T1 << 8) & 0xff0000) | (T1 & 0xff000000);
 | 
			
		||||
    T0 = rd;
 | 
			
		||||
    T1 = rm;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void HELPER(neon_zip_u8)(void)
 | 
			
		||||
{
 | 
			
		||||
    uint32_t rd;
 | 
			
		||||
    uint32_t rm;
 | 
			
		||||
    rd = (T0 & 0xff) | ((T1 << 8) & 0xff00)
 | 
			
		||||
         | ((T0 << 16) & 0xff0000) | ((T1 << 24) & 0xff000000);
 | 
			
		||||
    rm = ((T0 >> 16) & 0xff) | ((T1 >> 8) & 0xff00)
 | 
			
		||||
         | ((T0 >> 8) & 0xff0000) | (T1 & 0xff000000);
 | 
			
		||||
    T0 = rd;
 | 
			
		||||
    T1 = rm;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void HELPER(neon_zip_u16)(void)
 | 
			
		||||
{
 | 
			
		||||
    uint32_t tmp;
 | 
			
		||||
 | 
			
		||||
    tmp = (T0 & 0xffff) | (T1 << 16);
 | 
			
		||||
    T1 = (T1 & 0xffff0000) | (T0 >> 16);
 | 
			
		||||
    T0 = tmp;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@ -3627,24 +3627,157 @@ static inline void gen_neon_get_scalar(int size, int reg)
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void gen_neon_unzip_u8(TCGv t0, TCGv t1)
 | 
			
		||||
{
 | 
			
		||||
    TCGv rd, rm, tmp;
 | 
			
		||||
 | 
			
		||||
    rd = new_tmp();
 | 
			
		||||
    rm = new_tmp();
 | 
			
		||||
    tmp = new_tmp();
 | 
			
		||||
 | 
			
		||||
    tcg_gen_andi_i32(rd, t0, 0xff);
 | 
			
		||||
    tcg_gen_shri_i32(tmp, t0, 8);
 | 
			
		||||
    tcg_gen_andi_i32(tmp, tmp, 0xff00);
 | 
			
		||||
    tcg_gen_or_i32(rd, rd, tmp);
 | 
			
		||||
    tcg_gen_shli_i32(tmp, t1, 16);
 | 
			
		||||
    tcg_gen_andi_i32(tmp, tmp, 0xff0000);
 | 
			
		||||
    tcg_gen_or_i32(rd, rd, tmp);
 | 
			
		||||
    tcg_gen_shli_i32(tmp, t1, 8);
 | 
			
		||||
    tcg_gen_andi_i32(tmp, tmp, 0xff000000);
 | 
			
		||||
    tcg_gen_or_i32(rd, rd, tmp);
 | 
			
		||||
 | 
			
		||||
    tcg_gen_shri_i32(rm, t0, 8);
 | 
			
		||||
    tcg_gen_andi_i32(rm, rm, 0xff);
 | 
			
		||||
    tcg_gen_shri_i32(tmp, t0, 16);
 | 
			
		||||
    tcg_gen_andi_i32(tmp, tmp, 0xff00);
 | 
			
		||||
    tcg_gen_or_i32(rm, rm, tmp);
 | 
			
		||||
    tcg_gen_shli_i32(tmp, t1, 8);
 | 
			
		||||
    tcg_gen_andi_i32(tmp, tmp, 0xff0000);
 | 
			
		||||
    tcg_gen_or_i32(rm, rm, tmp);
 | 
			
		||||
    tcg_gen_andi_i32(tmp, t1, 0xff000000);
 | 
			
		||||
    tcg_gen_or_i32(t1, rm, tmp);
 | 
			
		||||
    tcg_gen_mov_i32(t0, rd);
 | 
			
		||||
 | 
			
		||||
    dead_tmp(tmp);
 | 
			
		||||
    dead_tmp(rm);
 | 
			
		||||
    dead_tmp(rd);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void gen_neon_zip_u8(TCGv t0, TCGv t1)
 | 
			
		||||
{
 | 
			
		||||
    TCGv rd, rm, tmp;
 | 
			
		||||
 | 
			
		||||
    rd = new_tmp();
 | 
			
		||||
    rm = new_tmp();
 | 
			
		||||
    tmp = new_tmp();
 | 
			
		||||
 | 
			
		||||
    tcg_gen_andi_i32(rd, t0, 0xff);
 | 
			
		||||
    tcg_gen_shli_i32(tmp, t1, 8);
 | 
			
		||||
    tcg_gen_andi_i32(tmp, tmp, 0xff00);
 | 
			
		||||
    tcg_gen_or_i32(rd, rd, tmp);
 | 
			
		||||
    tcg_gen_shli_i32(tmp, t0, 16);
 | 
			
		||||
    tcg_gen_andi_i32(tmp, tmp, 0xff0000);
 | 
			
		||||
    tcg_gen_or_i32(rd, rd, tmp);
 | 
			
		||||
    tcg_gen_shli_i32(tmp, t1, 24);
 | 
			
		||||
    tcg_gen_andi_i32(tmp, tmp, 0xff000000);
 | 
			
		||||
    tcg_gen_or_i32(rd, rd, tmp);
 | 
			
		||||
 | 
			
		||||
    tcg_gen_andi_i32(rm, t1, 0xff000000);
 | 
			
		||||
    tcg_gen_shri_i32(tmp, t0, 8);
 | 
			
		||||
    tcg_gen_andi_i32(tmp, tmp, 0xff0000);
 | 
			
		||||
    tcg_gen_or_i32(rm, rm, tmp);
 | 
			
		||||
    tcg_gen_shri_i32(tmp, t1, 8);
 | 
			
		||||
    tcg_gen_andi_i32(tmp, tmp, 0xff00);
 | 
			
		||||
    tcg_gen_or_i32(rm, rm, tmp);
 | 
			
		||||
    tcg_gen_shri_i32(tmp, t0, 16);
 | 
			
		||||
    tcg_gen_andi_i32(tmp, tmp, 0xff);
 | 
			
		||||
    tcg_gen_or_i32(t1, rm, tmp);
 | 
			
		||||
    tcg_gen_mov_i32(t0, rd);
 | 
			
		||||
 | 
			
		||||
    dead_tmp(tmp);
 | 
			
		||||
    dead_tmp(rm);
 | 
			
		||||
    dead_tmp(rd);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void gen_neon_zip_u16(TCGv t0, TCGv t1)
 | 
			
		||||
{
 | 
			
		||||
    TCGv tmp, tmp2;
 | 
			
		||||
 | 
			
		||||
    tmp = new_tmp();
 | 
			
		||||
    tmp2 = new_tmp();
 | 
			
		||||
 | 
			
		||||
    tcg_gen_andi_i32(tmp, t0, 0xffff);
 | 
			
		||||
    tcg_gen_shli_i32(tmp2, t1, 16);
 | 
			
		||||
    tcg_gen_or_i32(tmp, tmp, tmp2);
 | 
			
		||||
    tcg_gen_andi_i32(t1, t1, 0xffff0000);
 | 
			
		||||
    tcg_gen_shri_i32(tmp2, t0, 16);
 | 
			
		||||
    tcg_gen_or_i32(t1, t1, tmp2);
 | 
			
		||||
    tcg_gen_mov_i32(t0, tmp);
 | 
			
		||||
 | 
			
		||||
    dead_tmp(tmp2);
 | 
			
		||||
    dead_tmp(tmp);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void gen_neon_unzip(int reg, int q, int tmp, int size)
 | 
			
		||||
{
 | 
			
		||||
    int n;
 | 
			
		||||
 | 
			
		||||
    for (n = 0; n < q + 1; n += 2) {
 | 
			
		||||
        NEON_GET_REG(T0, reg, n);
 | 
			
		||||
        NEON_GET_REG(T0, reg, n + n);
 | 
			
		||||
        NEON_GET_REG(T1, reg, n + 1);
 | 
			
		||||
        switch (size) {
 | 
			
		||||
        case 0: gen_helper_neon_unzip_u8(); break;
 | 
			
		||||
        case 1: gen_helper_neon_zip_u16(); break; /* zip and unzip are the same.  */
 | 
			
		||||
        case 0: gen_neon_unzip_u8(cpu_T[0], cpu_T[1]); break;
 | 
			
		||||
        case 1: gen_neon_zip_u16(cpu_T[0], cpu_T[1]); break; /* zip and unzip are the same.  */
 | 
			
		||||
        case 2: /* no-op */; break;
 | 
			
		||||
        default: abort();
 | 
			
		||||
        }
 | 
			
		||||
        gen_neon_movl_scratch_T0(tmp + n);
 | 
			
		||||
        gen_neon_movl_scratch_T1(tmp + n + 1);
 | 
			
		||||
        gen_neon_movl_T0_scratch(tmp + n);
 | 
			
		||||
        gen_neon_movl_T1_scratch(tmp + n + 1);
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void gen_neon_trn_u8(TCGv t0, TCGv t1)
 | 
			
		||||
{
 | 
			
		||||
    TCGv rd, tmp;
 | 
			
		||||
 | 
			
		||||
    rd = new_tmp();
 | 
			
		||||
    tmp = new_tmp();
 | 
			
		||||
 | 
			
		||||
    tcg_gen_shli_i32(rd, t0, 8);
 | 
			
		||||
    tcg_gen_andi_i32(rd, rd, 0xff00ff00);
 | 
			
		||||
    tcg_gen_andi_i32(tmp, t1, 0x00ff00ff);
 | 
			
		||||
    tcg_gen_or_i32(rd, rd, tmp);
 | 
			
		||||
 | 
			
		||||
    tcg_gen_shri_i32(t1, t1, 8);
 | 
			
		||||
    tcg_gen_andi_i32(t1, t1, 0x00ff00ff);
 | 
			
		||||
    tcg_gen_andi_i32(tmp, t0, 0xff00ff00);
 | 
			
		||||
    tcg_gen_or_i32(t1, t1, tmp);
 | 
			
		||||
    tcg_gen_mov_i32(t0, rd);
 | 
			
		||||
 | 
			
		||||
    dead_tmp(tmp);
 | 
			
		||||
    dead_tmp(rd);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void gen_neon_trn_u16(TCGv t0, TCGv t1)
 | 
			
		||||
{
 | 
			
		||||
    TCGv rd, tmp;
 | 
			
		||||
 | 
			
		||||
    rd = new_tmp();
 | 
			
		||||
    tmp = new_tmp();
 | 
			
		||||
 | 
			
		||||
    tcg_gen_shli_i32(rd, t0, 16);
 | 
			
		||||
    tcg_gen_andi_i32(tmp, t1, 0xffff);
 | 
			
		||||
    tcg_gen_or_i32(rd, rd, tmp);
 | 
			
		||||
    tcg_gen_shri_i32(t1, t1, 16);
 | 
			
		||||
    tcg_gen_andi_i32(tmp, t0, 0xffff0000);
 | 
			
		||||
    tcg_gen_or_i32(t1, t1, tmp);
 | 
			
		||||
    tcg_gen_mov_i32(t0, rd);
 | 
			
		||||
 | 
			
		||||
    dead_tmp(tmp);
 | 
			
		||||
    dead_tmp(rd);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
static struct {
 | 
			
		||||
    int nregs;
 | 
			
		||||
    int interleave;
 | 
			
		||||
@ -5256,8 +5389,8 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
 | 
			
		||||
                        NEON_GET_REG(T0, rd, n);
 | 
			
		||||
                        NEON_GET_REG(T1, rd, n);
 | 
			
		||||
                        switch (size) {
 | 
			
		||||
                        case 0: gen_helper_neon_zip_u8(); break;
 | 
			
		||||
                        case 1: gen_helper_neon_zip_u16(); break;
 | 
			
		||||
                        case 0: gen_neon_zip_u8(cpu_T[0], cpu_T[1]); break;
 | 
			
		||||
                        case 1: gen_neon_zip_u16(cpu_T[0], cpu_T[1]); break;
 | 
			
		||||
                        case 2: /* no-op */; break;
 | 
			
		||||
                        default: abort();
 | 
			
		||||
                        }
 | 
			
		||||
@ -5442,8 +5575,8 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
 | 
			
		||||
                        case 33: /* VTRN */
 | 
			
		||||
                            NEON_GET_REG(T1, rd, pass);
 | 
			
		||||
                            switch (size) {
 | 
			
		||||
                            case 0: gen_helper_neon_trn_u8(); break;
 | 
			
		||||
                            case 1: gen_helper_neon_trn_u16(); break;
 | 
			
		||||
                            case 0: gen_neon_trn_u8(cpu_T[0], cpu_T[1]); break;
 | 
			
		||||
                            case 1: gen_neon_trn_u16(cpu_T[0], cpu_T[1]); break;
 | 
			
		||||
                            case 2: abort();
 | 
			
		||||
                            default: return 1;
 | 
			
		||||
                            }
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user