target/sh4: Use cmpxchg for movco when parallel_cpus
As for other targets, cmpxchg isn't quite right for ll/sc, suffering from an ABA race, but is sufficient to implement portable atomic operations. Signed-off-by: Richard Henderson <rth@twiddle.net> Message-Id: <20170907185057.23421-2-richard.henderson@linaro.org> [aurel32: fix whitespace] Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
This commit is contained in:
		
							parent
							
								
									6d56fc6cc3
								
							
						
					
					
						commit
						f85da3081d
					
				| @ -2679,6 +2679,8 @@ void cpu_loop(CPUSH4State *env) | ||||
|     target_siginfo_t info; | ||||
| 
 | ||||
|     while (1) { | ||||
|         bool arch_interrupt = true; | ||||
| 
 | ||||
|         cpu_exec_start(cs); | ||||
|         trapnr = cpu_exec(cs); | ||||
|         cpu_exec_end(cs); | ||||
| @ -2710,13 +2712,14 @@ void cpu_loop(CPUSH4State *env) | ||||
|                 int sig; | ||||
| 
 | ||||
|                 sig = gdb_handlesig(cs, TARGET_SIGTRAP); | ||||
|                 if (sig) | ||||
|                   { | ||||
|                 if (sig) { | ||||
|                     info.si_signo = sig; | ||||
|                     info.si_errno = 0; | ||||
|                     info.si_code = TARGET_TRAP_BRKPT; | ||||
|                     queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info); | ||||
|                   } | ||||
|                 } else { | ||||
|                     arch_interrupt = false; | ||||
|                 } | ||||
|             } | ||||
|             break; | ||||
| 	case 0xa0: | ||||
| @ -2727,9 +2730,9 @@ void cpu_loop(CPUSH4State *env) | ||||
|             info._sifields._sigfault._addr = env->tea; | ||||
|             queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info); | ||||
| 	    break; | ||||
| 
 | ||||
|         case EXCP_ATOMIC: | ||||
|             cpu_exec_step_atomic(cs); | ||||
|             arch_interrupt = false; | ||||
|             break; | ||||
|         default: | ||||
|             printf ("Unhandled trap: 0x%x\n", trapnr); | ||||
| @ -2737,6 +2740,14 @@ void cpu_loop(CPUSH4State *env) | ||||
|             exit(EXIT_FAILURE); | ||||
|         } | ||||
|         process_pending_signals (env); | ||||
| 
 | ||||
|         /* Most of the traps imply an exception or interrupt, which
 | ||||
|            implies an REI instruction has been executed.  Which means | ||||
|            that LDST (aka LOK_ADDR) should be cleared.  But there are | ||||
|            a few exceptions for traps internal to QEMU.  */ | ||||
|         if (arch_interrupt) { | ||||
|             env->lock_addr = -1; | ||||
|         } | ||||
|     } | ||||
| } | ||||
| #endif | ||||
|  | ||||
| @ -188,7 +188,9 @@ typedef struct CPUSH4State { | ||||
|     tlb_t itlb[ITLB_SIZE];	/* instruction translation table */ | ||||
|     tlb_t utlb[UTLB_SIZE];	/* unified translation table */ | ||||
| 
 | ||||
|     uint32_t ldst; | ||||
|     /* LDST = LOCK_ADDR != -1.  */ | ||||
|     uint32_t lock_addr; | ||||
|     uint32_t lock_value; | ||||
| 
 | ||||
|     /* Fields up to this point are cleared by a CPU reset */ | ||||
|     struct {} end_reset_fields; | ||||
|  | ||||
| @ -171,6 +171,7 @@ void superh_cpu_do_interrupt(CPUState *cs) | ||||
|     env->spc = env->pc; | ||||
|     env->sgr = env->gregs[15]; | ||||
|     env->sr |= (1u << SR_BL) | (1u << SR_MD) | (1u << SR_RB); | ||||
|     env->lock_addr = -1; | ||||
| 
 | ||||
|     if (env->flags & DELAY_SLOT_MASK) { | ||||
|         /* Branch instruction should be executed again before delay slot. */ | ||||
|  | ||||
| @ -69,7 +69,8 @@ static TCGv cpu_gregs[32]; | ||||
| static TCGv cpu_sr, cpu_sr_m, cpu_sr_q, cpu_sr_t; | ||||
| static TCGv cpu_pc, cpu_ssr, cpu_spc, cpu_gbr; | ||||
| static TCGv cpu_vbr, cpu_sgr, cpu_dbr, cpu_mach, cpu_macl; | ||||
| static TCGv cpu_pr, cpu_fpscr, cpu_fpul, cpu_ldst; | ||||
| static TCGv cpu_pr, cpu_fpscr, cpu_fpul; | ||||
| static TCGv cpu_lock_addr, cpu_lock_value; | ||||
| static TCGv cpu_fregs[32]; | ||||
| 
 | ||||
| /* internal register indexes */ | ||||
| @ -147,8 +148,12 @@ void sh4_translate_init(void) | ||||
|                                               offsetof(CPUSH4State, | ||||
|                                                        delayed_cond), | ||||
|                                               "_delayed_cond_"); | ||||
|     cpu_ldst = tcg_global_mem_new_i32(cpu_env, | ||||
| 				      offsetof(CPUSH4State, ldst), "_ldst_"); | ||||
|     cpu_lock_addr = tcg_global_mem_new_i32(cpu_env, | ||||
|                                            offsetof(CPUSH4State, lock_addr), | ||||
|                                            "_lock_addr_"); | ||||
|     cpu_lock_value = tcg_global_mem_new_i32(cpu_env, | ||||
|                                             offsetof(CPUSH4State, lock_value), | ||||
|                                             "_lock_value_"); | ||||
| 
 | ||||
|     for (i = 0; i < 32; i++) | ||||
|         cpu_fregs[i] = tcg_global_mem_new_i32(cpu_env, | ||||
| @ -1549,31 +1554,64 @@ static void _decode_opc(DisasContext * ctx) | ||||
| 	return; | ||||
|     case 0x0073: | ||||
|         /* MOVCO.L
 | ||||
| 	       LDST -> T | ||||
|                If (T == 1) R0 -> (Rn) | ||||
|                0 -> LDST | ||||
|         */ | ||||
|          *     LDST -> T | ||||
|          *     If (T == 1) R0 -> (Rn) | ||||
|          *     0 -> LDST | ||||
|          * | ||||
|          * The above description doesn't work in a parallel context. | ||||
|          * Since we currently support no smp boards, this implies user-mode. | ||||
|          * But we can still support the official mechanism while user-mode | ||||
|          * is single-threaded.  */ | ||||
|         CHECK_SH4A | ||||
|         { | ||||
|             TCGLabel *label = gen_new_label(); | ||||
|             tcg_gen_mov_i32(cpu_sr_t, cpu_ldst); | ||||
| 	    tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_ldst, 0, label); | ||||
|             tcg_gen_qemu_st_i32(REG(0), REG(B11_8), ctx->memidx, MO_TEUL); | ||||
| 	    gen_set_label(label); | ||||
| 	    tcg_gen_movi_i32(cpu_ldst, 0); | ||||
| 	    return; | ||||
|             TCGLabel *fail = gen_new_label(); | ||||
|             TCGLabel *done = gen_new_label(); | ||||
| 
 | ||||
|             if ((tb_cflags(ctx->tb) & CF_PARALLEL)) { | ||||
|                 TCGv tmp; | ||||
| 
 | ||||
|                 tcg_gen_brcond_i32(TCG_COND_NE, REG(B11_8), | ||||
|                                    cpu_lock_addr, fail); | ||||
|                 tmp = tcg_temp_new(); | ||||
|                 tcg_gen_atomic_cmpxchg_i32(tmp, REG(B11_8), cpu_lock_value, | ||||
|                                            REG(0), ctx->memidx, MO_TEUL); | ||||
|                 tcg_gen_setcond_i32(TCG_COND_EQ, cpu_sr_t, tmp, cpu_lock_value); | ||||
|                 tcg_temp_free(tmp); | ||||
|             } else { | ||||
|                 tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_lock_addr, -1, fail); | ||||
|                 tcg_gen_qemu_st_i32(REG(0), REG(B11_8), ctx->memidx, MO_TEUL); | ||||
|                 tcg_gen_movi_i32(cpu_sr_t, 1); | ||||
|             } | ||||
|             tcg_gen_br(done); | ||||
| 
 | ||||
|             gen_set_label(fail); | ||||
|             tcg_gen_movi_i32(cpu_sr_t, 0); | ||||
| 
 | ||||
|             gen_set_label(done); | ||||
|             tcg_gen_movi_i32(cpu_lock_addr, -1); | ||||
|         } | ||||
|         return; | ||||
|     case 0x0063: | ||||
|         /* MOVLI.L @Rm,R0
 | ||||
|                1 -> LDST | ||||
|                (Rm) -> R0 | ||||
|                When interrupt/exception | ||||
|                occurred 0 -> LDST | ||||
|         */ | ||||
|          *     1 -> LDST | ||||
|          *     (Rm) -> R0 | ||||
|          *     When interrupt/exception | ||||
|          *     occurred 0 -> LDST | ||||
|          * | ||||
|          * In a parallel context, we must also save the loaded value | ||||
|          * for use with the cmpxchg that we'll use with movco.l.  */ | ||||
|         CHECK_SH4A | ||||
|         tcg_gen_movi_i32(cpu_ldst, 0); | ||||
|         tcg_gen_qemu_ld_i32(REG(0), REG(B11_8), ctx->memidx, MO_TESL); | ||||
|         tcg_gen_movi_i32(cpu_ldst, 1); | ||||
|         if ((tb_cflags(ctx->tb) & CF_PARALLEL)) { | ||||
|             TCGv tmp = tcg_temp_new(); | ||||
|             tcg_gen_mov_i32(tmp, REG(B11_8)); | ||||
|             tcg_gen_qemu_ld_i32(REG(0), REG(B11_8), ctx->memidx, MO_TESL); | ||||
|             tcg_gen_mov_i32(cpu_lock_value, REG(0)); | ||||
|             tcg_gen_mov_i32(cpu_lock_addr, tmp); | ||||
|             tcg_temp_free(tmp); | ||||
|         } else { | ||||
|             tcg_gen_qemu_ld_i32(REG(0), REG(B11_8), ctx->memidx, MO_TESL); | ||||
|             tcg_gen_movi_i32(cpu_lock_addr, 0); | ||||
|         } | ||||
|         return; | ||||
|     case 0x0093:		/* ocbi @Rn */ | ||||
| 	{ | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 Richard Henderson
						Richard Henderson