VirtualBox

source: vbox/trunk/src/recompiler/tcg/i386/tcg-target.c@ 38064

最後變更 在這個檔案從38064是 38064,由 vboxsync 提交於 13 年 前

tcg-target.c: Make sure the stack is aligned on darwin (VBoxREM64 only).

  • 屬性 svn:eol-style 設為 native
檔案大小: 66.3 KB
 
1/*
2 * Tiny Code Generator for QEMU
3 *
4 * Copyright (c) 2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25#ifndef NDEBUG
26static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
27#if TCG_TARGET_REG_BITS == 64
28 "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
29 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
30#else
31 "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
32#endif
33};
34#endif
35
36static const int tcg_target_reg_alloc_order[] = {
37#if TCG_TARGET_REG_BITS == 64
38 TCG_REG_RBP,
39 TCG_REG_RBX,
40 TCG_REG_R12,
41 TCG_REG_R13,
42 TCG_REG_R14,
43 TCG_REG_R15,
44 TCG_REG_R10,
45 TCG_REG_R11,
46 TCG_REG_R9,
47 TCG_REG_R8,
48 TCG_REG_RCX,
49 TCG_REG_RDX,
50 TCG_REG_RSI,
51 TCG_REG_RDI,
52 TCG_REG_RAX,
53#else
54 TCG_REG_EBX,
55 TCG_REG_ESI,
56 TCG_REG_EDI,
57 TCG_REG_EBP,
58 TCG_REG_ECX,
59 TCG_REG_EDX,
60 TCG_REG_EAX,
61#endif
62};
63
64static const int tcg_target_call_iarg_regs[] = {
65#if TCG_TARGET_REG_BITS == 64
66 TCG_REG_RDI,
67 TCG_REG_RSI,
68 TCG_REG_RDX,
69 TCG_REG_RCX,
70 TCG_REG_R8,
71 TCG_REG_R9,
72#else
73 TCG_REG_EAX,
74 TCG_REG_EDX,
75 TCG_REG_ECX
76#endif
77};
78
79static const int tcg_target_call_oarg_regs[2] = {
80 TCG_REG_EAX,
81 TCG_REG_EDX
82};
83
84static uint8_t *tb_ret_addr;
85
86static void patch_reloc(uint8_t *code_ptr, int type,
87 tcg_target_long value, tcg_target_long addend)
88{
89 value += addend;
90 switch(type) {
91 case R_386_PC32:
92 value -= (uintptr_t)code_ptr;
93 if (value != (int32_t)value) {
94 tcg_abort();
95 }
96 *(uint32_t *)code_ptr = value;
97 break;
98 case R_386_PC8:
99 value -= (uintptr_t)code_ptr;
100 if (value != (int8_t)value) {
101 tcg_abort();
102 }
103 *(uint8_t *)code_ptr = value;
104 break;
105 default:
106 tcg_abort();
107 }
108}
109
110#ifdef VBOX
111/* emits stack alignment checks for strict builds. */
112DECLINLINE(void) tcg_gen_stack_alignment_check(TCGContext *s)
113{
114# if defined(RT_STRICT) && defined(RT_OS_DARWIN) /** @todo all OSes? */
115 tcg_out8(s, 0xf7); tcg_out8(s, 0xc4); /* test %esp, 1fh */
116 tcg_out32(s, TCG_TARGET_STACK_ALIGN - 1);
117 tcg_out8(s, 0x74); /* jz imm8 */
118 tcg_out8(s, 1); /* $+3 (over int3) */
119 tcg_out8(s, 0xcc); /* int3 */
120# else
121 NOREF(s);
122# endif
123}
124#endif /* VBOX */
125
126/* maximum number of register used for input function arguments */
127static inline int tcg_target_get_call_iarg_regs_count(int flags)
128{
129 if (TCG_TARGET_REG_BITS == 64) {
130 return 6;
131 }
132
133 flags &= TCG_CALL_TYPE_MASK;
134 switch(flags) {
135 case TCG_CALL_TYPE_STD:
136 return 0;
137 case TCG_CALL_TYPE_REGPARM_1:
138 case TCG_CALL_TYPE_REGPARM_2:
139 case TCG_CALL_TYPE_REGPARM:
140 return flags - TCG_CALL_TYPE_REGPARM_1 + 1;
141 default:
142 tcg_abort();
143 }
144}
145
146/* parse target specific constraints */
147static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
148{
149 const char *ct_str;
150
151 ct_str = *pct_str;
152 switch(ct_str[0]) {
153 case 'a':
154 ct->ct |= TCG_CT_REG;
155 tcg_regset_set_reg(ct->u.regs, TCG_REG_EAX);
156 break;
157 case 'b':
158 ct->ct |= TCG_CT_REG;
159 tcg_regset_set_reg(ct->u.regs, TCG_REG_EBX);
160 break;
161 case 'c':
162 ct->ct |= TCG_CT_REG;
163 tcg_regset_set_reg(ct->u.regs, TCG_REG_ECX);
164 break;
165 case 'd':
166 ct->ct |= TCG_CT_REG;
167 tcg_regset_set_reg(ct->u.regs, TCG_REG_EDX);
168 break;
169 case 'S':
170 ct->ct |= TCG_CT_REG;
171 tcg_regset_set_reg(ct->u.regs, TCG_REG_ESI);
172 break;
173 case 'D':
174 ct->ct |= TCG_CT_REG;
175 tcg_regset_set_reg(ct->u.regs, TCG_REG_EDI);
176 break;
177 case 'q':
178 ct->ct |= TCG_CT_REG;
179 if (TCG_TARGET_REG_BITS == 64) {
180 tcg_regset_set32(ct->u.regs, 0, 0xffff);
181 } else {
182 tcg_regset_set32(ct->u.regs, 0, 0xf);
183 }
184 break;
185 case 'r':
186 ct->ct |= TCG_CT_REG;
187 if (TCG_TARGET_REG_BITS == 64) {
188 tcg_regset_set32(ct->u.regs, 0, 0xffff);
189 } else {
190 tcg_regset_set32(ct->u.regs, 0, 0xff);
191 }
192 break;
193
194 /* qemu_ld/st address constraint */
195 case 'L':
196 ct->ct |= TCG_CT_REG;
197 if (TCG_TARGET_REG_BITS == 64) {
198 tcg_regset_set32(ct->u.regs, 0, 0xffff);
199 tcg_regset_reset_reg(ct->u.regs, TCG_REG_RSI);
200 tcg_regset_reset_reg(ct->u.regs, TCG_REG_RDI);
201 } else {
202 tcg_regset_set32(ct->u.regs, 0, 0xff);
203 tcg_regset_reset_reg(ct->u.regs, TCG_REG_EAX);
204 tcg_regset_reset_reg(ct->u.regs, TCG_REG_EDX);
205 }
206 break;
207
208 case 'e':
209 ct->ct |= TCG_CT_CONST_S32;
210 break;
211 case 'Z':
212 ct->ct |= TCG_CT_CONST_U32;
213 break;
214
215 default:
216 return -1;
217 }
218 ct_str++;
219 *pct_str = ct_str;
220 return 0;
221}
222
223/* test if a constant matches the constraint */
224static inline int tcg_target_const_match(tcg_target_long val,
225 const TCGArgConstraint *arg_ct)
226{
227 int ct = arg_ct->ct;
228 if (ct & TCG_CT_CONST) {
229 return 1;
230 }
231 if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) {
232 return 1;
233 }
234 if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) {
235 return 1;
236 }
237 return 0;
238}
239
240#if TCG_TARGET_REG_BITS == 64
241# define LOWREGMASK(x) ((x) & 7)
242#else
243# define LOWREGMASK(x) (x)
244#endif
245
246#define P_EXT 0x100 /* 0x0f opcode prefix */
247#define P_DATA16 0x200 /* 0x66 opcode prefix */
248#if TCG_TARGET_REG_BITS == 64
249# define P_ADDR32 0x400 /* 0x67 opcode prefix */
250# define P_REXW 0x800 /* Set REX.W = 1 */
251# define P_REXB_R 0x1000 /* REG field as byte register */
252# define P_REXB_RM 0x2000 /* R/M field as byte register */
253#else
254# define P_ADDR32 0
255# define P_REXW 0
256# define P_REXB_R 0
257# define P_REXB_RM 0
258#endif
259
260#define OPC_ARITH_EvIz (0x81)
261#define OPC_ARITH_EvIb (0x83)
262#define OPC_ARITH_GvEv (0x03) /* ... plus (ARITH_FOO << 3) */
263#define OPC_ADD_GvEv (OPC_ARITH_GvEv | (ARITH_ADD << 3))
264#define OPC_BSWAP (0xc8 | P_EXT)
265#define OPC_CALL_Jz (0xe8)
266#define OPC_CMP_GvEv (OPC_ARITH_GvEv | (ARITH_CMP << 3))
267#define OPC_DEC_r32 (0x48)
268#define OPC_IMUL_GvEv (0xaf | P_EXT)
269#define OPC_IMUL_GvEvIb (0x6b)
270#define OPC_IMUL_GvEvIz (0x69)
271#define OPC_INC_r32 (0x40)
272#define OPC_JCC_long (0x80 | P_EXT) /* ... plus condition code */
273#define OPC_JCC_short (0x70) /* ... plus condition code */
274#define OPC_JMP_long (0xe9)
275#define OPC_JMP_short (0xeb)
276#define OPC_LEA (0x8d)
277#define OPC_MOVB_EvGv (0x88) /* stores, more or less */
278#define OPC_MOVL_EvGv (0x89) /* stores, more or less */
279#define OPC_MOVL_GvEv (0x8b) /* loads, more or less */
280#define OPC_MOVL_EvIz (0xc7)
281#define OPC_MOVL_Iv (0xb8)
282#define OPC_MOVSBL (0xbe | P_EXT)
283#define OPC_MOVSWL (0xbf | P_EXT)
284#define OPC_MOVSLQ (0x63 | P_REXW)
285#define OPC_MOVZBL (0xb6 | P_EXT)
286#define OPC_MOVZWL (0xb7 | P_EXT)
287#define OPC_POP_r32 (0x58)
288#define OPC_PUSH_r32 (0x50)
289#define OPC_PUSH_Iv (0x68)
290#define OPC_PUSH_Ib (0x6a)
291#define OPC_RET (0xc3)
292#define OPC_SETCC (0x90 | P_EXT | P_REXB_RM) /* ... plus cc */
293#define OPC_SHIFT_1 (0xd1)
294#define OPC_SHIFT_Ib (0xc1)
295#define OPC_SHIFT_cl (0xd3)
296#define OPC_TESTL (0x85)
297#define OPC_XCHG_ax_r32 (0x90)
298
299#define OPC_GRP3_Ev (0xf7)
300#define OPC_GRP5 (0xff)
301
302/* Group 1 opcode extensions for 0x80-0x83.
303 These are also used as modifiers for OPC_ARITH. */
304#define ARITH_ADD 0
305#define ARITH_OR 1
306#define ARITH_ADC 2
307#define ARITH_SBB 3
308#define ARITH_AND 4
309#define ARITH_SUB 5
310#define ARITH_XOR 6
311#define ARITH_CMP 7
312
313/* Group 2 opcode extensions for 0xc0, 0xc1, 0xd0-0xd3. */
314#define SHIFT_ROL 0
315#define SHIFT_ROR 1
316#define SHIFT_SHL 4
317#define SHIFT_SHR 5
318#define SHIFT_SAR 7
319
320/* Group 3 opcode extensions for 0xf6, 0xf7. To be used with OPC_GRP3. */
321#define EXT3_NOT 2
322#define EXT3_NEG 3
323#define EXT3_MUL 4
324#define EXT3_IMUL 5
325#define EXT3_DIV 6
326#define EXT3_IDIV 7
327
328/* Group 5 opcode extensions for 0xff. To be used with OPC_GRP5. */
329#define EXT5_INC_Ev 0
330#define EXT5_DEC_Ev 1
331#define EXT5_CALLN_Ev 2
332#define EXT5_JMPN_Ev 4
333
334/* Condition codes to be added to OPC_JCC_{long,short}. */
335#define JCC_JMP (-1)
336#define JCC_JO 0x0
337#define JCC_JNO 0x1
338#define JCC_JB 0x2
339#define JCC_JAE 0x3
340#define JCC_JE 0x4
341#define JCC_JNE 0x5
342#define JCC_JBE 0x6
343#define JCC_JA 0x7
344#define JCC_JS 0x8
345#define JCC_JNS 0x9
346#define JCC_JP 0xa
347#define JCC_JNP 0xb
348#define JCC_JL 0xc
349#define JCC_JGE 0xd
350#define JCC_JLE 0xe
351#define JCC_JG 0xf
352
353static const uint8_t tcg_cond_to_jcc[10] = {
354 [TCG_COND_EQ] = JCC_JE,
355 [TCG_COND_NE] = JCC_JNE,
356 [TCG_COND_LT] = JCC_JL,
357 [TCG_COND_GE] = JCC_JGE,
358 [TCG_COND_LE] = JCC_JLE,
359 [TCG_COND_GT] = JCC_JG,
360 [TCG_COND_LTU] = JCC_JB,
361 [TCG_COND_GEU] = JCC_JAE,
362 [TCG_COND_LEU] = JCC_JBE,
363 [TCG_COND_GTU] = JCC_JA,
364};
365
366#if defined(VBOX)
367/* Calc the size of the tcg_out_opc() result. */
368static inline unsigned char tcg_calc_opc_len(TCGContext *s, int opc, int r, int rm, int x)
369{
370 unsigned char len = 1;
371# if TCG_TARGET_REG_BITS == 64
372 unsigned rex;
373 rex = 0;
374 rex |= (opc & P_REXW) >> 8; /* REX.W */
375 rex |= (r & 8) >> 1; /* REX.R */
376 rex |= (x & 8) >> 2; /* REX.X */
377 rex |= (rm & 8) >> 3; /* REX.B */
378 rex |= opc & (r >= 4 ? P_REXB_R : 0);
379 rex |= opc & (rm >= 4 ? P_REXB_RM : 0);
380 if (rex) len++;
381 if (opc & P_ADDR32) len++;
382# endif
383 if (opc & P_DATA16) len++;
384 if (opc & P_EXT) len++;
385
386 return len;
387}
388#endif
389
390#if TCG_TARGET_REG_BITS == 64
391static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x)
392{
393 int rex;
394
395 if (opc & P_DATA16) {
396 /* We should never be asking for both 16 and 64-bit operation. */
397 assert((opc & P_REXW) == 0);
398 tcg_out8(s, 0x66);
399 }
400 if (opc & P_ADDR32) {
401 tcg_out8(s, 0x67);
402 }
403
404 rex = 0;
405 rex |= (opc & P_REXW) >> 8; /* REX.W */
406 rex |= (r & 8) >> 1; /* REX.R */
407 rex |= (x & 8) >> 2; /* REX.X */
408 rex |= (rm & 8) >> 3; /* REX.B */
409
410 /* P_REXB_{R,RM} indicates that the given register is the low byte.
411 For %[abcd]l we need no REX prefix, but for %{si,di,bp,sp}l we do,
412 as otherwise the encoding indicates %[abcd]h. Note that the values
413 that are ORed in merely indicate that the REX byte must be present;
414 those bits get discarded in output. */
415 rex |= opc & (r >= 4 ? P_REXB_R : 0);
416 rex |= opc & (rm >= 4 ? P_REXB_RM : 0);
417
418 if (rex) {
419 tcg_out8(s, (uint8_t)(rex | 0x40));
420 }
421
422 if (opc & P_EXT) {
423 tcg_out8(s, 0x0f);
424 }
425 tcg_out8(s, opc);
426}
427#else
428static void tcg_out_opc(TCGContext *s, int opc)
429{
430 if (opc & P_DATA16) {
431 tcg_out8(s, 0x66);
432 }
433 if (opc & P_EXT) {
434 tcg_out8(s, 0x0f);
435 }
436 tcg_out8(s, opc);
437}
438/* Discard the register arguments to tcg_out_opc early, so as not to penalize
439 the 32-bit compilation paths. This method works with all versions of gcc,
440 whereas relying on optimization may not be able to exclude them. */
441#define tcg_out_opc(s, opc, r, rm, x) (tcg_out_opc)(s, opc)
442#endif
443
444static void tcg_out_modrm(TCGContext *s, int opc, int r, int rm)
445{
446 tcg_out_opc(s, opc, r, rm, 0);
447 tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
448}
449
450/* Output an opcode with a full "rm + (index<<shift) + offset" address mode.
451 We handle either RM and INDEX missing with a negative value. In 64-bit
452 mode for absolute addresses, ~RM is the size of the immediate operand
453 that will follow the instruction. */
454
455static void tcg_out_modrm_sib_offset(TCGContext *s, int opc, int r, int rm,
456 int index, int shift,
457 tcg_target_long offset)
458{
459 int mod, len;
460
461 if (index < 0 && rm < 0) {
462 if (TCG_TARGET_REG_BITS == 64) {
463 /* Try for a rip-relative addressing mode. This has replaced
464 the 32-bit-mode absolute addressing encoding. */
465#ifdef VBOX
466 tcg_target_long pc = (tcg_target_long)s->code_ptr
467 + tcg_calc_opc_len(s, opc, r, 0, 0) + 1 + 4;
468#else
469 tcg_target_long pc = (tcg_target_long)s->code_ptr + 5 + ~rm;
470#endif
471 tcg_target_long disp = offset - pc;
472 if (disp == (int32_t)disp) {
473 tcg_out_opc(s, opc, r, 0, 0);
474 tcg_out8(s, (LOWREGMASK(r) << 3) | 5);
475 tcg_out32(s, disp);
476#ifdef VBOX
477 Assert(pc == (tcg_target_long)s->code_ptr);
478#endif
479 return;
480 }
481
482 /* Try for an absolute address encoding. This requires the
483 use of the MODRM+SIB encoding and is therefore larger than
484 rip-relative addressing. */
485 if (offset == (int32_t)offset) {
486 tcg_out_opc(s, opc, r, 0, 0);
487 tcg_out8(s, (LOWREGMASK(r) << 3) | 4);
488 tcg_out8(s, (4 << 3) | 5);
489 tcg_out32(s, offset);
490 return;
491 }
492
493 /* ??? The memory isn't directly addressable. */
494 tcg_abort();
495 } else {
496 /* Absolute address. */
497 tcg_out_opc(s, opc, r, 0, 0);
498 tcg_out8(s, (r << 3) | 5);
499 tcg_out32(s, offset);
500 return;
501 }
502 }
503
504 /* Find the length of the immediate addend. Note that the encoding
505 that would be used for (%ebp) indicates absolute addressing. */
506 if (rm < 0) {
507 mod = 0, len = 4, rm = 5;
508 } else if (offset == 0 && LOWREGMASK(rm) != TCG_REG_EBP) {
509 mod = 0, len = 0;
510 } else if (offset == (int8_t)offset) {
511 mod = 0x40, len = 1;
512 } else {
513 mod = 0x80, len = 4;
514 }
515
516 /* Use a single byte MODRM format if possible. Note that the encoding
517 that would be used for %esp is the escape to the two byte form. */
518 if (index < 0 && LOWREGMASK(rm) != TCG_REG_ESP) {
519 /* Single byte MODRM format. */
520 tcg_out_opc(s, opc, r, rm, 0);
521 tcg_out8(s, mod | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
522 } else {
523 /* Two byte MODRM+SIB format. */
524
525 /* Note that the encoding that would place %esp into the index
526 field indicates no index register. In 64-bit mode, the REX.X
527 bit counts, so %r12 can be used as the index. */
528 if (index < 0) {
529 index = 4;
530 } else {
531 assert(index != TCG_REG_ESP);
532 }
533
534 tcg_out_opc(s, opc, r, rm, index);
535 tcg_out8(s, mod | (LOWREGMASK(r) << 3) | 4);
536 tcg_out8(s, (shift << 6) | (LOWREGMASK(index) << 3) | LOWREGMASK(rm));
537 }
538
539 if (len == 1) {
540 tcg_out8(s, offset);
541 } else if (len == 4) {
542 tcg_out32(s, offset);
543 }
544}
545
546/* A simplification of the above with no index or shift. */
547static inline void tcg_out_modrm_offset(TCGContext *s, int opc, int r,
548 int rm, tcg_target_long offset)
549{
550 tcg_out_modrm_sib_offset(s, opc, r, rm, -1, 0, offset);
551}
552
553/* Generate dest op= src. Uses the same ARITH_* codes as tgen_arithi. */
554static inline void tgen_arithr(TCGContext *s, int subop, int dest, int src)
555{
556 /* Propagate an opcode prefix, such as P_REXW. */
557 int ext = subop & ~0x7;
558 subop &= 0x7;
559
560 tcg_out_modrm(s, OPC_ARITH_GvEv + (subop << 3) + ext, dest, src);
561}
562
563static inline void tcg_out_mov(TCGContext *s, TCGType type, int ret, int arg)
564{
565 if (arg != ret) {
566 int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
567 tcg_out_modrm(s, opc, ret, arg);
568 }
569}
570
571static void tcg_out_movi(TCGContext *s, TCGType type,
572 int ret, tcg_target_long arg)
573{
574 if (arg == 0) {
575 tgen_arithr(s, ARITH_XOR, ret, ret);
576 return;
577 } else if (arg == (uint32_t)arg || type == TCG_TYPE_I32) {
578 tcg_out_opc(s, OPC_MOVL_Iv + LOWREGMASK(ret), 0, ret, 0);
579 tcg_out32(s, arg);
580 } else if (arg == (int32_t)arg) {
581 tcg_out_modrm(s, OPC_MOVL_EvIz + P_REXW, 0, ret);
582 tcg_out32(s, arg);
583 } else {
584 tcg_out_opc(s, OPC_MOVL_Iv + P_REXW + LOWREGMASK(ret), 0, ret, 0);
585 tcg_out32(s, arg);
586 tcg_out32(s, arg >> 31 >> 1);
587 }
588}
589
590static inline void tcg_out_pushi(TCGContext *s, tcg_target_long val)
591{
592 if (val == (int8_t)val) {
593 tcg_out_opc(s, OPC_PUSH_Ib, 0, 0, 0);
594 tcg_out8(s, val);
595 } else if (val == (int32_t)val) {
596 tcg_out_opc(s, OPC_PUSH_Iv, 0, 0, 0);
597 tcg_out32(s, val);
598 } else {
599 tcg_abort();
600 }
601}
602
603static inline void tcg_out_push(TCGContext *s, int reg)
604{
605 tcg_out_opc(s, OPC_PUSH_r32 + LOWREGMASK(reg), 0, reg, 0);
606}
607
608static inline void tcg_out_pop(TCGContext *s, int reg)
609{
610 tcg_out_opc(s, OPC_POP_r32 + LOWREGMASK(reg), 0, reg, 0);
611}
612
613static inline void tcg_out_ld(TCGContext *s, TCGType type, int ret,
614 int arg1, tcg_target_long arg2)
615{
616 int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
617 tcg_out_modrm_offset(s, opc, ret, arg1, arg2);
618}
619
620static inline void tcg_out_st(TCGContext *s, TCGType type, int arg,
621 int arg1, tcg_target_long arg2)
622{
623 int opc = OPC_MOVL_EvGv + (type == TCG_TYPE_I64 ? P_REXW : 0);
624 tcg_out_modrm_offset(s, opc, arg, arg1, arg2);
625}
626
627static void tcg_out_shifti(TCGContext *s, int subopc, int reg, int count)
628{
629 /* Propagate an opcode prefix, such as P_DATA16. */
630 int ext = subopc & ~0x7;
631 subopc &= 0x7;
632
633 if (count == 1) {
634 tcg_out_modrm(s, OPC_SHIFT_1 + ext, subopc, reg);
635 } else {
636 tcg_out_modrm(s, OPC_SHIFT_Ib + ext, subopc, reg);
637 tcg_out8(s, count);
638 }
639}
640
641static inline void tcg_out_bswap32(TCGContext *s, int reg)
642{
643 tcg_out_opc(s, OPC_BSWAP + LOWREGMASK(reg), 0, reg, 0);
644}
645
646static inline void tcg_out_rolw_8(TCGContext *s, int reg)
647{
648 tcg_out_shifti(s, SHIFT_ROL + P_DATA16, reg, 8);
649}
650
651static inline void tcg_out_ext8u(TCGContext *s, int dest, int src)
652{
653 /* movzbl */
654 assert(src < 4 || TCG_TARGET_REG_BITS == 64);
655 tcg_out_modrm(s, OPC_MOVZBL + P_REXB_RM, dest, src);
656}
657
658static void tcg_out_ext8s(TCGContext *s, int dest, int src, int rexw)
659{
660 /* movsbl */
661 assert(src < 4 || TCG_TARGET_REG_BITS == 64);
662 tcg_out_modrm(s, OPC_MOVSBL + P_REXB_RM + rexw, dest, src);
663}
664
665static inline void tcg_out_ext16u(TCGContext *s, int dest, int src)
666{
667 /* movzwl */
668 tcg_out_modrm(s, OPC_MOVZWL, dest, src);
669}
670
671static inline void tcg_out_ext16s(TCGContext *s, int dest, int src, int rexw)
672{
673 /* movsw[lq] */
674 tcg_out_modrm(s, OPC_MOVSWL + rexw, dest, src);
675}
676
677static inline void tcg_out_ext32u(TCGContext *s, int dest, int src)
678{
679 /* 32-bit mov zero extends. */
680 tcg_out_modrm(s, OPC_MOVL_GvEv, dest, src);
681}
682
683static inline void tcg_out_ext32s(TCGContext *s, int dest, int src)
684{
685 tcg_out_modrm(s, OPC_MOVSLQ, dest, src);
686}
687
688static inline void tcg_out_bswap64(TCGContext *s, int reg)
689{
690 tcg_out_opc(s, OPC_BSWAP + P_REXW + LOWREGMASK(reg), 0, reg, 0);
691}
692
693static void tgen_arithi(TCGContext *s, int c, int r0,
694 tcg_target_long val, int cf)
695{
696 int rexw = 0;
697
698 if (TCG_TARGET_REG_BITS == 64) {
699 rexw = c & -8;
700 c &= 7;
701 }
702
703 /* ??? While INC is 2 bytes shorter than ADDL $1, they also induce
704 partial flags update stalls on Pentium4 and are not recommended
705 by current Intel optimization manuals. */
706 if (!cf && (c == ARITH_ADD || c == ARITH_SUB) && (val == 1 || val == -1)) {
707 int is_inc = (c == ARITH_ADD) ^ (val < 0);
708 if (TCG_TARGET_REG_BITS == 64) {
709 /* The single-byte increment encodings are re-tasked as the
710 REX prefixes. Use the MODRM encoding. */
711 tcg_out_modrm(s, OPC_GRP5 + rexw,
712 (is_inc ? EXT5_INC_Ev : EXT5_DEC_Ev), r0);
713 } else {
714 tcg_out8(s, (is_inc ? OPC_INC_r32 : OPC_DEC_r32) + r0);
715 }
716 return;
717 }
718
719 if (c == ARITH_AND) {
720 if (TCG_TARGET_REG_BITS == 64) {
721 if (val == 0xffffffffu) {
722 tcg_out_ext32u(s, r0, r0);
723 return;
724 }
725 if (val == (uint32_t)val) {
726 /* AND with no high bits set can use a 32-bit operation. */
727 rexw = 0;
728 }
729 }
730 if (val == 0xffu && (r0 < 4 || TCG_TARGET_REG_BITS == 64)) {
731 tcg_out_ext8u(s, r0, r0);
732 return;
733 }
734 if (val == 0xffffu) {
735 tcg_out_ext16u(s, r0, r0);
736 return;
737 }
738 }
739
740 if (val == (int8_t)val) {
741 tcg_out_modrm(s, OPC_ARITH_EvIb + rexw, c, r0);
742 tcg_out8(s, val);
743 return;
744 }
745 if (rexw == 0 || val == (int32_t)val) {
746 tcg_out_modrm(s, OPC_ARITH_EvIz + rexw, c, r0);
747 tcg_out32(s, val);
748 return;
749 }
750
751 tcg_abort();
752}
753
754static void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val)
755{
756 if (val != 0) {
757 tgen_arithi(s, ARITH_ADD + P_REXW, reg, val, 0);
758 }
759}
760
761#ifdef VBOX
762static void tcg_out_subi(TCGContext *s, int reg, tcg_target_long val)
763{
764 if (val != 0) {
765 tgen_arithi(s, ARITH_SUB + P_REXW, reg, val, 0);
766 }
767}
768#endif
769
770/* Use SMALL != 0 to force a short forward branch. */
771static void tcg_out_jxx(TCGContext *s, int opc, int label_index, int small)
772{
773 int32_t val, val1;
774 TCGLabel *l = &s->labels[label_index];
775
776 if (l->has_value) {
777 val = l->u.value - (tcg_target_long)s->code_ptr;
778 val1 = val - 2;
779 if ((int8_t)val1 == val1) {
780 if (opc == -1) {
781 tcg_out8(s, OPC_JMP_short);
782 } else {
783 tcg_out8(s, OPC_JCC_short + opc);
784 }
785 tcg_out8(s, val1);
786 } else {
787 if (small) {
788 tcg_abort();
789 }
790 if (opc == -1) {
791 tcg_out8(s, OPC_JMP_long);
792 tcg_out32(s, val - 5);
793 } else {
794 tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
795 tcg_out32(s, val - 6);
796 }
797 }
798 } else if (small) {
799 if (opc == -1) {
800 tcg_out8(s, OPC_JMP_short);
801 } else {
802 tcg_out8(s, OPC_JCC_short + opc);
803 }
804 tcg_out_reloc(s, s->code_ptr, R_386_PC8, label_index, -1);
805 s->code_ptr += 1;
806 } else {
807 if (opc == -1) {
808 tcg_out8(s, OPC_JMP_long);
809 } else {
810 tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
811 }
812 tcg_out_reloc(s, s->code_ptr, R_386_PC32, label_index, -4);
813 s->code_ptr += 4;
814 }
815}
816
817static void tcg_out_cmp(TCGContext *s, TCGArg arg1, TCGArg arg2,
818 int const_arg2, int rexw)
819{
820 if (const_arg2) {
821 if (arg2 == 0) {
822 /* test r, r */
823 tcg_out_modrm(s, OPC_TESTL + rexw, arg1, arg1);
824 } else {
825 tgen_arithi(s, ARITH_CMP + rexw, arg1, arg2, 0);
826 }
827 } else {
828 tgen_arithr(s, ARITH_CMP + rexw, arg1, arg2);
829 }
830}
831
832static void tcg_out_brcond32(TCGContext *s, TCGCond cond,
833 TCGArg arg1, TCGArg arg2, int const_arg2,
834 int label_index, int small)
835{
836 tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
837 tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small);
838}
839
840#if TCG_TARGET_REG_BITS == 64
841static void tcg_out_brcond64(TCGContext *s, TCGCond cond,
842 TCGArg arg1, TCGArg arg2, int const_arg2,
843 int label_index, int small)
844{
845 tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
846 tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small);
847}
848#else
849/* XXX: we implement it at the target level to avoid having to
850 handle cross basic blocks temporaries */
851static void tcg_out_brcond2(TCGContext *s, const TCGArg *args,
852 const int *const_args, int small)
853{
854 int label_next;
855 label_next = gen_new_label();
856 switch(args[4]) {
857 case TCG_COND_EQ:
858 tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
859 label_next, 1);
860 tcg_out_brcond32(s, TCG_COND_EQ, args[1], args[3], const_args[3],
861 args[5], small);
862 break;
863 case TCG_COND_NE:
864 tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
865 args[5], small);
866 tcg_out_brcond32(s, TCG_COND_NE, args[1], args[3], const_args[3],
867 args[5], small);
868 break;
869 case TCG_COND_LT:
870 tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
871 args[5], small);
872 tcg_out_jxx(s, JCC_JNE, label_next, 1);
873 tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
874 args[5], small);
875 break;
876 case TCG_COND_LE:
877 tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
878 args[5], small);
879 tcg_out_jxx(s, JCC_JNE, label_next, 1);
880 tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
881 args[5], small);
882 break;
883 case TCG_COND_GT:
884 tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
885 args[5], small);
886 tcg_out_jxx(s, JCC_JNE, label_next, 1);
887 tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
888 args[5], small);
889 break;
890 case TCG_COND_GE:
891 tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
892 args[5], small);
893 tcg_out_jxx(s, JCC_JNE, label_next, 1);
894 tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
895 args[5], small);
896 break;
897 case TCG_COND_LTU:
898 tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
899 args[5], small);
900 tcg_out_jxx(s, JCC_JNE, label_next, 1);
901 tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
902 args[5], small);
903 break;
904 case TCG_COND_LEU:
905 tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
906 args[5], small);
907 tcg_out_jxx(s, JCC_JNE, label_next, 1);
908 tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
909 args[5], small);
910 break;
911 case TCG_COND_GTU:
912 tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
913 args[5], small);
914 tcg_out_jxx(s, JCC_JNE, label_next, 1);
915 tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
916 args[5], small);
917 break;
918 case TCG_COND_GEU:
919 tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
920 args[5], small);
921 tcg_out_jxx(s, JCC_JNE, label_next, 1);
922 tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
923 args[5], small);
924 break;
925 default:
926 tcg_abort();
927 }
928 tcg_out_label(s, label_next, (tcg_target_long)s->code_ptr);
929}
930#endif
931
932static void tcg_out_setcond32(TCGContext *s, TCGCond cond, TCGArg dest,
933 TCGArg arg1, TCGArg arg2, int const_arg2)
934{
935 tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
936 tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
937 tcg_out_ext8u(s, dest, dest);
938}
939
940#if TCG_TARGET_REG_BITS == 64
941static void tcg_out_setcond64(TCGContext *s, TCGCond cond, TCGArg dest,
942 TCGArg arg1, TCGArg arg2, int const_arg2)
943{
944 tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
945 tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
946 tcg_out_ext8u(s, dest, dest);
947}
948#else
949static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
950 const int *const_args)
951{
952 TCGArg new_args[6];
953 int label_true, label_over;
954
955 memcpy(new_args, args+1, 5*sizeof(TCGArg));
956
957 if (args[0] == args[1] || args[0] == args[2]
958 || (!const_args[3] && args[0] == args[3])
959 || (!const_args[4] && args[0] == args[4])) {
960 /* When the destination overlaps with one of the argument
961 registers, don't do anything tricky. */
962 label_true = gen_new_label();
963 label_over = gen_new_label();
964
965 new_args[5] = label_true;
966 tcg_out_brcond2(s, new_args, const_args+1, 1);
967
968 tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
969 tcg_out_jxx(s, JCC_JMP, label_over, 1);
970 tcg_out_label(s, label_true, (tcg_target_long)s->code_ptr);
971
972 tcg_out_movi(s, TCG_TYPE_I32, args[0], 1);
973 tcg_out_label(s, label_over, (tcg_target_long)s->code_ptr);
974 } else {
975 /* When the destination does not overlap one of the arguments,
976 clear the destination first, jump if cond false, and emit an
977 increment in the true case. This results in smaller code. */
978
979 tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
980
981 label_over = gen_new_label();
982 new_args[4] = tcg_invert_cond(new_args[4]);
983 new_args[5] = label_over;
984 tcg_out_brcond2(s, new_args, const_args+1, 1);
985
986 tgen_arithi(s, ARITH_ADD, args[0], 1, 0);
987 tcg_out_label(s, label_over, (tcg_target_long)s->code_ptr);
988 }
989}
990#endif
991
992static void tcg_out_branch(TCGContext *s, int call, tcg_target_long dest)
993{
994#ifdef VBOX
995 tcg_target_long disp = dest - (tcg_target_long)s->code_ptr
996 - tcg_calc_opc_len(s, call ? OPC_CALL_Jz : OPC_JMP_long, 0, 0, 0)
997 - 4;
998#else
999 tcg_target_long disp = dest - (tcg_target_long)s->code_ptr - 5;
1000#endif
1001
1002 if (disp == (int32_t)disp) {
1003 tcg_out_opc(s, call ? OPC_CALL_Jz : OPC_JMP_long, 0, 0, 0);
1004 tcg_out32(s, disp);
1005 } else {
1006 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R10, dest);
1007 tcg_out_modrm(s, OPC_GRP5,
1008 call ? EXT5_CALLN_Ev : EXT5_JMPN_Ev, TCG_REG_R10);
1009 }
1010}
1011
1012static inline void tcg_out_calli(TCGContext *s, tcg_target_long dest)
1013{
1014#ifdef VBOX
1015 tcg_gen_stack_alignment_check(s);
1016#endif
1017 tcg_out_branch(s, 1, dest);
1018}
1019
1020static void tcg_out_jmp(TCGContext *s, tcg_target_long dest)
1021{
1022 tcg_out_branch(s, 0, dest);
1023}
1024
1025#if defined(CONFIG_SOFTMMU)
1026
1027#include "../../softmmu_defs.h"
1028
1029static void *qemu_ld_helpers[4] = {
1030 __ldb_mmu,
1031 __ldw_mmu,
1032 __ldl_mmu,
1033 __ldq_mmu,
1034};
1035
1036static void *qemu_st_helpers[4] = {
1037 __stb_mmu,
1038 __stw_mmu,
1039 __stl_mmu,
1040 __stq_mmu,
1041};
1042
1043/* Perform the TLB load and compare.
1044
1045 Inputs:
1046 ADDRLO_IDX contains the index into ARGS of the low part of the
1047 address; the high part of the address is at ADDR_LOW_IDX+1.
1048
1049 MEM_INDEX and S_BITS are the memory context and log2 size of the load.
1050
1051 WHICH is the offset into the CPUTLBEntry structure of the slot to read.
1052 This should be offsetof addr_read or addr_write.
1053
1054 Outputs:
1055 LABEL_PTRS is filled with 1 (32-bit addresses) or 2 (64-bit addresses)
1056 positions of the displacements of forward jumps to the TLB miss case.
1057
1058 First argument register is loaded with the low part of the address.
1059 In the TLB hit case, it has been adjusted as indicated by the TLB
1060 and so is a host address. In the TLB miss case, it continues to
1061 hold a guest address.
1062
1063 Second argument register is clobbered. */
1064
1065static inline void tcg_out_tlb_load(TCGContext *s, int addrlo_idx,
1066 int mem_index, int s_bits,
1067 const TCGArg *args,
1068 uint8_t **label_ptr, int which)
1069{
1070 const int addrlo = args[addrlo_idx];
1071 const int r0 = tcg_target_call_iarg_regs[0];
1072 const int r1 = tcg_target_call_iarg_regs[1];
1073 TCGType type = TCG_TYPE_I32;
1074 int rexw = 0;
1075
1076 if (TCG_TARGET_REG_BITS == 64 && TARGET_LONG_BITS == 64) {
1077 type = TCG_TYPE_I64;
1078 rexw = P_REXW;
1079 }
1080
1081 tcg_out_mov(s, type, r1, addrlo);
1082 tcg_out_mov(s, type, r0, addrlo);
1083
1084 tcg_out_shifti(s, SHIFT_SHR + rexw, r1,
1085 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1086
1087 tgen_arithi(s, ARITH_AND + rexw, r0,
1088 TARGET_PAGE_MASK | ((1 << s_bits) - 1), 0);
1089 tgen_arithi(s, ARITH_AND + rexw, r1,
1090 (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS, 0);
1091
1092 tcg_out_modrm_sib_offset(s, OPC_LEA + P_REXW, r1, TCG_AREG0, r1, 0,
1093 offsetof(CPUState, tlb_table[mem_index][0])
1094 + which);
1095
1096 /* cmp 0(r1), r0 */
1097 tcg_out_modrm_offset(s, OPC_CMP_GvEv + rexw, r0, r1, 0);
1098
1099 tcg_out_mov(s, type, r0, addrlo);
1100
1101 /* jne label1 */
1102 tcg_out8(s, OPC_JCC_short + JCC_JNE);
1103 label_ptr[0] = s->code_ptr;
1104 s->code_ptr++;
1105
1106 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1107 /* cmp 4(r1), addrhi */
1108 tcg_out_modrm_offset(s, OPC_CMP_GvEv, args[addrlo_idx+1], r1, 4);
1109
1110 /* jne label1 */
1111 tcg_out8(s, OPC_JCC_short + JCC_JNE);
1112 label_ptr[1] = s->code_ptr;
1113 s->code_ptr++;
1114 }
1115
1116 /* TLB Hit. */
1117
1118 /* add addend(r1), r0 */
1119 tcg_out_modrm_offset(s, OPC_ADD_GvEv + P_REXW, r0, r1,
1120 offsetof(CPUTLBEntry, addend) - which);
1121}
1122#endif
1123
1124static void tcg_out_qemu_ld_direct(TCGContext *s, int datalo, int datahi,
1125 int base, tcg_target_long ofs, int sizeop)
1126{
1127#ifdef TARGET_WORDS_BIGENDIAN
1128 const int bswap = 1;
1129#else
1130 const int bswap = 0;
1131#endif
1132 switch (sizeop) {
1133 case 0:
1134 tcg_out_modrm_offset(s, OPC_MOVZBL, datalo, base, ofs);
1135 break;
1136 case 0 | 4:
1137 tcg_out_modrm_offset(s, OPC_MOVSBL + P_REXW, datalo, base, ofs);
1138 break;
1139 case 1:
1140 tcg_out_modrm_offset(s, OPC_MOVZWL, datalo, base, ofs);
1141 if (bswap) {
1142 tcg_out_rolw_8(s, datalo);
1143 }
1144 break;
1145 case 1 | 4:
1146 if (bswap) {
1147 tcg_out_modrm_offset(s, OPC_MOVZWL, datalo, base, ofs);
1148 tcg_out_rolw_8(s, datalo);
1149 tcg_out_modrm(s, OPC_MOVSWL + P_REXW, datalo, datalo);
1150 } else {
1151 tcg_out_modrm_offset(s, OPC_MOVSWL + P_REXW, datalo, base, ofs);
1152 }
1153 break;
1154 case 2:
1155 tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
1156 if (bswap) {
1157 tcg_out_bswap32(s, datalo);
1158 }
1159 break;
1160#if TCG_TARGET_REG_BITS == 64
1161 case 2 | 4:
1162 if (bswap) {
1163 tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
1164 tcg_out_bswap32(s, datalo);
1165 tcg_out_ext32s(s, datalo, datalo);
1166 } else {
1167 tcg_out_modrm_offset(s, OPC_MOVSLQ, datalo, base, ofs);
1168 }
1169 break;
1170#endif
1171 case 3:
1172 if (TCG_TARGET_REG_BITS == 64) {
1173 tcg_out_ld(s, TCG_TYPE_I64, datalo, base, ofs);
1174 if (bswap) {
1175 tcg_out_bswap64(s, datalo);
1176 }
1177 } else {
1178 if (bswap) {
1179 int t = datalo;
1180 datalo = datahi;
1181 datahi = t;
1182 }
1183 if (base != datalo) {
1184 tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
1185 tcg_out_ld(s, TCG_TYPE_I32, datahi, base, ofs + 4);
1186 } else {
1187 tcg_out_ld(s, TCG_TYPE_I32, datahi, base, ofs + 4);
1188 tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
1189 }
1190 if (bswap) {
1191 tcg_out_bswap32(s, datalo);
1192 tcg_out_bswap32(s, datahi);
1193 }
1194 }
1195 break;
1196 default:
1197 tcg_abort();
1198 }
1199}
1200
1201#if defined(VBOX) && defined(REM_PHYS_ADDR_IN_TLB)
1202
1203static void * const vbox_ld_helpers[] = {
1204 __ldub_vbox_phys,
1205 __lduw_vbox_phys,
1206 __ldul_vbox_phys,
1207 __ldq_vbox_phys,
1208 __ldb_vbox_phys,
1209 __ldw_vbox_phys,
1210 __ldl_vbox_phys,
1211 __ldq_vbox_phys,
1212};
1213
1214static void * const vbox_st_helpers[] = {
1215 __stb_vbox_phys,
1216 __stw_vbox_phys,
1217 __stl_vbox_phys,
1218 __stq_vbox_phys
1219};
1220
1221DECLINLINE(void) tcg_out_long_call(TCGContext *s, void* dst)
1222{
1223 intptr_t disp;
1224# ifdef VBOX
1225 tcg_gen_stack_alignment_check(s);
1226# endif
1227 disp = (uintptr_t)dst - (uintptr_t)s->code_ptr - 5;
1228 tcg_out8(s, 0xe8); /* call disp32 */
1229 tcg_out32(s, disp); /* disp32 */
1230}
1231
1232static void tcg_out_vbox_phys_read(TCGContext *s, int index,
1233 int addr_reg,
1234 int data_reg, int data_reg2)
1235{
1236 int useReg2 = ((index & 3) == 3);
1237
1238 /** @todo: should we make phys address accessors fastcalls - probably not a big deal */
1239 /* out parameter (address), note that phys address is always 64-bit */
1240 AssertMsg(sizeof(RTGCPHYS) == 8, ("Physical address must be 64-bits, update caller\n"));
1241
1242# if 0
1243 tcg_out8(s, 0x6a); tcg_out8(s, 0x00); /* push $0 */
1244 tcg_out_push(s, addr_reg);
1245# else
1246 /* mov addr_reg, %eax */
1247 tcg_out_mov(s, TCG_REG_EAX, addr_reg);
1248# endif
1249
1250 tcg_out_long_call(s, vbox_ld_helpers[index]);
1251
1252 /* mov %eax, data_reg */
1253 tcg_out_mov(s, data_reg, TCG_REG_EAX);
1254
1255 /* returned 64-bit value */
1256 if (useReg2)
1257 tcg_out_mov(s, data_reg2, TCG_REG_EDX);
1258}
1259
1260static void tcg_out_vbox_phys_write(TCGContext *s, int index,
1261 int addr_reg,
1262 int val_reg, int val_reg2) {
1263 int useReg2 = ((index & 3) == 3);
1264
1265# if 0
1266 /* out parameter (value2) */
1267 if (useReg2)
1268 tcg_out_push(s, val_reg2);
1269 /* out parameter (value) */
1270 tcg_out_push(s, val_reg);
1271 /* out parameter (address), note that phys address is always 64-bit */
1272 AssertMsg(sizeof(RTGCPHYS) == 8, ("Physical address must be 64-bits, update caller\n"));
1273 tcg_out8(s, 0x6a); tcg_out8(s, 0x00); /* push $0 */
1274 tcg_out_push(s, addr_reg);
1275# else
1276 Assert(val_reg != TCG_REG_EAX && (!useReg2 || (val_reg2 != TCG_REG_EAX)));
1277 /* mov addr_reg, %eax */
1278 tcg_out_mov(s, TCG_REG_EAX, addr_reg);
1279 Assert(!useReg2 || (val_reg2 != TCG_REG_EDX));
1280 /* mov val_reg, %edx */
1281 tcg_out_mov(s, TCG_REG_EDX, val_reg);
1282 if (useReg2)
1283 tcg_out_mov(s, TCG_REG_ECX, val_reg2);
1284
1285# endif
1286 /* call it */
1287 tcg_out_long_call(s, vbox_st_helpers[index]);
1288
1289 /* clean stack after us */
1290# if 0
1291 tcg_out_addi(s, TCG_REG_ESP, 8 + (useReg2 ? 8 : 4));
1292# endif
1293}
1294
1295#endif /* defined(VBOX) && defined(REM_PHYS_ADDR_IN_TLB) */
1296
1297/* XXX: qemu_ld and qemu_st could be modified to clobber only EDX and
1298 EAX. It will be useful once fixed registers globals are less
1299 common. */
1300static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
1301 int opc)
1302{
1303 int data_reg, data_reg2 = 0;
1304 int addrlo_idx;
1305#if defined(CONFIG_SOFTMMU)
1306 int mem_index, s_bits, arg_idx;
1307 uint8_t *label_ptr[3];
1308#endif
1309
1310 data_reg = args[0];
1311 addrlo_idx = 1;
1312 if (TCG_TARGET_REG_BITS == 32 && opc == 3) {
1313 data_reg2 = args[1];
1314 addrlo_idx = 2;
1315 }
1316
1317#if defined(CONFIG_SOFTMMU)
1318 mem_index = args[addrlo_idx + 1 + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS)];
1319 s_bits = opc & 3;
1320
1321 tcg_out_tlb_load(s, addrlo_idx, mem_index, s_bits, args,
1322 label_ptr, offsetof(CPUTLBEntry, addr_read));
1323
1324 /* TLB Hit. */
1325 tcg_out_qemu_ld_direct(s, data_reg, data_reg2,
1326 tcg_target_call_iarg_regs[0], 0, opc);
1327
1328 /* jmp label2 */
1329 tcg_out8(s, OPC_JMP_short);
1330 label_ptr[2] = s->code_ptr;
1331 s->code_ptr++;
1332
1333 /* TLB Miss. */
1334
1335 /* label1: */
1336 *label_ptr[0] = s->code_ptr - label_ptr[0] - 1;
1337 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1338 *label_ptr[1] = s->code_ptr - label_ptr[1] - 1;
1339 }
1340
1341 /* XXX: move that code at the end of the TB */
1342 /* The first argument is already loaded with addrlo. */
1343 arg_idx = 1;
1344 if (TCG_TARGET_REG_BITS == 32 && TARGET_LONG_BITS == 64) {
1345 tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[arg_idx++],
1346 args[addrlo_idx + 1]);
1347 }
1348 tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[arg_idx],
1349 mem_index);
1350 tcg_out_calli(s, (tcg_target_long)qemu_ld_helpers[s_bits]);
1351
1352 switch(opc) {
1353 case 0 | 4:
1354 tcg_out_ext8s(s, data_reg, TCG_REG_EAX, P_REXW);
1355 break;
1356 case 1 | 4:
1357 tcg_out_ext16s(s, data_reg, TCG_REG_EAX, P_REXW);
1358 break;
1359 case 0:
1360 tcg_out_ext8u(s, data_reg, TCG_REG_EAX);
1361 break;
1362 case 1:
1363 tcg_out_ext16u(s, data_reg, TCG_REG_EAX);
1364 break;
1365 case 2:
1366 tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
1367 break;
1368#if TCG_TARGET_REG_BITS == 64
1369 case 2 | 4:
1370 tcg_out_ext32s(s, data_reg, TCG_REG_EAX);
1371 break;
1372#endif
1373 case 3:
1374 if (TCG_TARGET_REG_BITS == 64) {
1375 tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_RAX);
1376 } else if (data_reg == TCG_REG_EDX) {
1377 /* xchg %edx, %eax */
1378 tcg_out_opc(s, OPC_XCHG_ax_r32 + TCG_REG_EDX, 0, 0, 0);
1379 tcg_out_mov(s, TCG_TYPE_I32, data_reg2, TCG_REG_EAX);
1380 } else {
1381 tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
1382 tcg_out_mov(s, TCG_TYPE_I32, data_reg2, TCG_REG_EDX);
1383 }
1384 break;
1385 default:
1386 tcg_abort();
1387 }
1388
1389 /* label2: */
1390 *label_ptr[2] = s->code_ptr - label_ptr[2] - 1;
1391#else
1392 {
1393 int32_t offset = GUEST_BASE;
1394 int base = args[addrlo_idx];
1395
1396 if (TCG_TARGET_REG_BITS == 64) {
1397 /* ??? We assume all operations have left us with register
1398 contents that are zero extended. So far this appears to
1399 be true. If we want to enforce this, we can either do
1400 an explicit zero-extension here, or (if GUEST_BASE == 0)
1401 use the ADDR32 prefix. For now, do nothing. */
1402
1403 if (offset != GUEST_BASE) {
1404 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_RDI, GUEST_BASE);
1405 tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_RDI, base);
1406 base = TCG_REG_RDI, offset = 0;
1407 }
1408 }
1409
1410 tcg_out_qemu_ld_direct(s, data_reg, data_reg2, base, offset, opc);
1411 }
1412#endif
1413}
1414
1415static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi,
1416 int base, tcg_target_long ofs, int sizeop)
1417{
1418#if !defined(VBOX) || !defined(REM_PHYS_ADDR_IN_TLB)
1419#ifdef TARGET_WORDS_BIGENDIAN
1420 const int bswap = 1;
1421#else
1422 const int bswap = 0;
1423#endif
1424 /* ??? Ideally we wouldn't need a scratch register. For user-only,
1425 we could perform the bswap twice to restore the original value
1426 instead of moving to the scratch. But as it is, the L constraint
1427 means that the second argument reg is definitely free here. */
1428 int scratch = tcg_target_call_iarg_regs[1];
1429
1430 switch (sizeop) {
1431 case 0:
1432 tcg_out_modrm_offset(s, OPC_MOVB_EvGv + P_REXB_R, datalo, base, ofs);
1433 break;
1434 case 1:
1435 if (bswap) {
1436 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1437 tcg_out_rolw_8(s, scratch);
1438 datalo = scratch;
1439 }
1440 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + P_DATA16, datalo, base, ofs);
1441 break;
1442 case 2:
1443 if (bswap) {
1444 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1445 tcg_out_bswap32(s, scratch);
1446 datalo = scratch;
1447 }
1448 tcg_out_st(s, TCG_TYPE_I32, datalo, base, ofs);
1449 break;
1450 case 3:
1451 if (TCG_TARGET_REG_BITS == 64) {
1452 if (bswap) {
1453 tcg_out_mov(s, TCG_TYPE_I64, scratch, datalo);
1454 tcg_out_bswap64(s, scratch);
1455 datalo = scratch;
1456 }
1457 tcg_out_st(s, TCG_TYPE_I64, datalo, base, ofs);
1458 } else if (bswap) {
1459 tcg_out_mov(s, TCG_TYPE_I32, scratch, datahi);
1460 tcg_out_bswap32(s, scratch);
1461 tcg_out_st(s, TCG_TYPE_I32, scratch, base, ofs);
1462 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1463 tcg_out_bswap32(s, scratch);
1464 tcg_out_st(s, TCG_TYPE_I32, scratch, base, ofs + 4);
1465 } else {
1466 tcg_out_st(s, TCG_TYPE_I32, datalo, base, ofs);
1467 tcg_out_st(s, TCG_TYPE_I32, datahi, base, ofs + 4);
1468 }
1469 break;
1470 default:
1471 tcg_abort();
1472 }
1473#else /* VBOX */
1474# error "broken"
1475 tcg_out_vbox_phys_read(s, opc, r0, data_reg, data_reg2);
1476#endif
1477}
1478
1479static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
1480 int opc)
1481{
1482 int data_reg, data_reg2 = 0;
1483 int addrlo_idx;
1484#if defined(CONFIG_SOFTMMU)
1485 int mem_index, s_bits;
1486 int stack_adjust;
1487 uint8_t *label_ptr[3];
1488#endif
1489
1490 data_reg = args[0];
1491 addrlo_idx = 1;
1492 if (TCG_TARGET_REG_BITS == 32 && opc == 3) {
1493 data_reg2 = args[1];
1494 addrlo_idx = 2;
1495 }
1496
1497#if defined(CONFIG_SOFTMMU)
1498 mem_index = args[addrlo_idx + 1 + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS)];
1499 s_bits = opc;
1500
1501 tcg_out_tlb_load(s, addrlo_idx, mem_index, s_bits, args,
1502 label_ptr, offsetof(CPUTLBEntry, addr_write));
1503
1504 /* TLB Hit. */
1505 tcg_out_qemu_st_direct(s, data_reg, data_reg2,
1506 tcg_target_call_iarg_regs[0], 0, opc);
1507
1508 /* jmp label2 */
1509 tcg_out8(s, OPC_JMP_short);
1510 label_ptr[2] = s->code_ptr;
1511 s->code_ptr++;
1512
1513 /* TLB Miss. */
1514
1515 /* label1: */
1516 *label_ptr[0] = s->code_ptr - label_ptr[0] - 1;
1517 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1518 *label_ptr[1] = s->code_ptr - label_ptr[1] - 1;
1519 }
1520
1521# if !defined(VBOX) || !defined(REM_PHYS_ADDR_IN_TLB)
1522# if defined(VBOX) && defined(RT_OS_DARWIN) && ARCH_BITS == 32
1523# define VBOX_16_BYTE_STACK_ALIGN
1524# endif
1525
1526 /* XXX: move that code at the end of the TB */
1527 if (TCG_TARGET_REG_BITS == 64) {
1528 tcg_out_mov(s, (opc == 3 ? TCG_TYPE_I64 : TCG_TYPE_I32),
1529 TCG_REG_RSI, data_reg);
1530 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_RDX, mem_index);
1531 stack_adjust = 0;
1532 } else if (TARGET_LONG_BITS == 32) {
1533 tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_EDX, data_reg);
1534 if (opc == 3) {
1535 tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_ECX, data_reg2);
1536# ifdef VBOX_16_BYTE_STACK_ALIGN
1537 tcg_out_subi(s, TCG_REG_ESP, 12);
1538# endif
1539 tcg_out_pushi(s, mem_index);
1540 stack_adjust = 4;
1541 } else {
1542 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_ECX, mem_index);
1543 stack_adjust = 0;
1544 }
1545 } else {
1546 if (opc == 3) {
1547 tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_EDX, args[addrlo_idx + 1]);
1548# ifdef VBOX_16_BYTE_STACK_ALIGN
1549 tcg_out_pushi(s, 0);
1550# endif
1551 tcg_out_pushi(s, mem_index);
1552 tcg_out_push(s, data_reg2);
1553 tcg_out_push(s, data_reg);
1554 stack_adjust = 12;
1555 } else {
1556 tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_EDX, args[addrlo_idx + 1]);
1557 switch(opc) {
1558 case 0:
1559 tcg_out_ext8u(s, TCG_REG_ECX, data_reg);
1560 break;
1561 case 1:
1562 tcg_out_ext16u(s, TCG_REG_ECX, data_reg);
1563 break;
1564 case 2:
1565 tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_ECX, data_reg);
1566 break;
1567 }
1568# ifdef VBOX_16_BYTE_STACK_ALIGN
1569 tcg_out_subi(s, TCG_REG_ESP, 12);
1570# endif
1571 tcg_out_pushi(s, mem_index);
1572 stack_adjust = 4;
1573 }
1574 }
1575
1576 tcg_out_calli(s, (tcg_target_long)qemu_st_helpers[s_bits]);
1577
1578# ifdef VBOX_16_BYTE_STACK_ALIGN
1579 if (stack_adjust != 0) {
1580 tcg_out_addi(s, TCG_REG_ESP, RT_ALIGN(stack_adjust, 16));
1581 }
1582# else
1583 if (stack_adjust == (TCG_TARGET_REG_BITS / 8)) {
1584 /* Pop and discard. This is 2 bytes smaller than the add. */
1585 tcg_out_pop(s, TCG_REG_ECX);
1586 } else if (stack_adjust != 0) {
1587 tcg_out_addi(s, TCG_REG_ESP, stack_adjust);
1588 }
1589# endif
1590
1591# else /* VBOX && REM_PHYS_ADDR_IN_TLB */
1592# error Borked
1593 tcg_out_vbox_phys_write(s, opc, r0, data_reg, data_reg2);
1594# endif /* VBOX && REM_PHYS_ADDR_IN_TLB */
1595
1596 /* label2: */
1597 *label_ptr[2] = s->code_ptr - label_ptr[2] - 1;
1598#else
1599 {
1600 int32_t offset = GUEST_BASE;
1601 int base = args[addrlo_idx];
1602
1603 if (TCG_TARGET_REG_BITS == 64) {
1604 /* ??? We assume all operations have left us with register
1605 contents that are zero extended. So far this appears to
1606 be true. If we want to enforce this, we can either do
1607 an explicit zero-extension here, or (if GUEST_BASE == 0)
1608 use the ADDR32 prefix. For now, do nothing. */
1609
1610 if (offset != GUEST_BASE) {
1611 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_RDI, GUEST_BASE);
1612 tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_RDI, base);
1613 base = TCG_REG_RDI, offset = 0;
1614 }
1615 }
1616
1617 tcg_out_qemu_st_direct(s, data_reg, data_reg2, base, offset, opc);
1618 }
1619#endif
1620}
1621
1622static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
1623 const TCGArg *args, const int *const_args)
1624{
1625 int c, rexw = 0;
1626
1627#if TCG_TARGET_REG_BITS == 64
1628# define OP_32_64(x) \
1629 case glue(glue(INDEX_op_, x), _i64): \
1630 rexw = P_REXW; /* FALLTHRU */ \
1631 case glue(glue(INDEX_op_, x), _i32)
1632#else
1633# define OP_32_64(x) \
1634 case glue(glue(INDEX_op_, x), _i32)
1635#endif
1636
1637 switch(opc) {
1638 case INDEX_op_exit_tb:
1639 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_EAX, args[0]);
1640 tcg_out_jmp(s, (tcg_target_long) tb_ret_addr);
1641 break;
1642 case INDEX_op_goto_tb:
1643 if (s->tb_jmp_offset) {
1644 /* direct jump method */
1645 tcg_out8(s, OPC_JMP_long); /* jmp im */
1646 s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf;
1647 tcg_out32(s, 0);
1648 } else {
1649 /* indirect jump method */
1650 tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, -1,
1651 (tcg_target_long)(s->tb_next + args[0]));
1652 }
1653 s->tb_next_offset[args[0]] = s->code_ptr - s->code_buf;
1654 break;
1655 case INDEX_op_call:
1656 if (const_args[0]) {
1657 tcg_out_calli(s, args[0]);
1658 } else {
1659 /* call *reg */
1660 tcg_out_modrm(s, OPC_GRP5, EXT5_CALLN_Ev, args[0]);
1661 }
1662 break;
1663 case INDEX_op_jmp:
1664 if (const_args[0]) {
1665 tcg_out_jmp(s, args[0]);
1666 } else {
1667 /* jmp *reg */
1668 tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, args[0]);
1669 }
1670 break;
1671 case INDEX_op_br:
1672 tcg_out_jxx(s, JCC_JMP, args[0], 0);
1673 break;
1674 case INDEX_op_movi_i32:
1675 tcg_out_movi(s, TCG_TYPE_I32, args[0], args[1]);
1676 break;
1677 OP_32_64(ld8u):
1678 /* Note that we can ignore REXW for the zero-extend to 64-bit. */
1679 tcg_out_modrm_offset(s, OPC_MOVZBL, args[0], args[1], args[2]);
1680 break;
1681 OP_32_64(ld8s):
1682 tcg_out_modrm_offset(s, OPC_MOVSBL + rexw, args[0], args[1], args[2]);
1683 break;
1684 OP_32_64(ld16u):
1685 /* Note that we can ignore REXW for the zero-extend to 64-bit. */
1686 tcg_out_modrm_offset(s, OPC_MOVZWL, args[0], args[1], args[2]);
1687 break;
1688 OP_32_64(ld16s):
1689 tcg_out_modrm_offset(s, OPC_MOVSWL + rexw, args[0], args[1], args[2]);
1690 break;
1691#if TCG_TARGET_REG_BITS == 64
1692 case INDEX_op_ld32u_i64:
1693#endif
1694 case INDEX_op_ld_i32:
1695 tcg_out_ld(s, TCG_TYPE_I32, args[0], args[1], args[2]);
1696 break;
1697
1698 OP_32_64(st8):
1699 tcg_out_modrm_offset(s, OPC_MOVB_EvGv | P_REXB_R,
1700 args[0], args[1], args[2]);
1701 break;
1702 OP_32_64(st16):
1703 tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_DATA16,
1704 args[0], args[1], args[2]);
1705 break;
1706#if TCG_TARGET_REG_BITS == 64
1707 case INDEX_op_st32_i64:
1708#endif
1709 case INDEX_op_st_i32:
1710 tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
1711 break;
1712
1713 OP_32_64(add):
1714 /* For 3-operand addition, use LEA. */
1715 if (args[0] != args[1]) {
1716 TCGArg a0 = args[0], a1 = args[1], a2 = args[2], c3 = 0;
1717
1718 if (const_args[2]) {
1719 c3 = a2, a2 = -1;
1720 } else if (a0 == a2) {
1721 /* Watch out for dest = src + dest, since we've removed
1722 the matching constraint on the add. */
1723 tgen_arithr(s, ARITH_ADD + rexw, a0, a1);
1724 break;
1725 }
1726
1727 tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, a1, a2, 0, c3);
1728 break;
1729 }
1730 c = ARITH_ADD;
1731 goto gen_arith;
1732 OP_32_64(sub):
1733 c = ARITH_SUB;
1734 goto gen_arith;
1735 OP_32_64(and):
1736 c = ARITH_AND;
1737 goto gen_arith;
1738 OP_32_64(or):
1739 c = ARITH_OR;
1740 goto gen_arith;
1741 OP_32_64(xor):
1742 c = ARITH_XOR;
1743 goto gen_arith;
1744 gen_arith:
1745 if (const_args[2]) {
1746 tgen_arithi(s, c + rexw, args[0], args[2], 0);
1747 } else {
1748 tgen_arithr(s, c + rexw, args[0], args[2]);
1749 }
1750 break;
1751
1752 OP_32_64(mul):
1753 if (const_args[2]) {
1754 int32_t val;
1755 val = args[2];
1756 if (val == (int8_t)val) {
1757 tcg_out_modrm(s, OPC_IMUL_GvEvIb + rexw, args[0], args[0]);
1758 tcg_out8(s, val);
1759 } else {
1760 tcg_out_modrm(s, OPC_IMUL_GvEvIz + rexw, args[0], args[0]);
1761 tcg_out32(s, val);
1762 }
1763 } else {
1764 tcg_out_modrm(s, OPC_IMUL_GvEv + rexw, args[0], args[2]);
1765 }
1766 break;
1767
1768 OP_32_64(div2):
1769 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IDIV, args[4]);
1770 break;
1771 OP_32_64(divu2):
1772 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_DIV, args[4]);
1773 break;
1774
1775 OP_32_64(shl):
1776 c = SHIFT_SHL;
1777 goto gen_shift;
1778 OP_32_64(shr):
1779 c = SHIFT_SHR;
1780 goto gen_shift;
1781 OP_32_64(sar):
1782 c = SHIFT_SAR;
1783 goto gen_shift;
1784 OP_32_64(rotl):
1785 c = SHIFT_ROL;
1786 goto gen_shift;
1787 OP_32_64(rotr):
1788 c = SHIFT_ROR;
1789 goto gen_shift;
1790 gen_shift:
1791 if (const_args[2]) {
1792 tcg_out_shifti(s, c + rexw, args[0], args[2]);
1793 } else {
1794 tcg_out_modrm(s, OPC_SHIFT_cl + rexw, c, args[0]);
1795 }
1796 break;
1797
1798 case INDEX_op_brcond_i32:
1799 tcg_out_brcond32(s, args[2], args[0], args[1], const_args[1],
1800 args[3], 0);
1801 break;
1802 case INDEX_op_setcond_i32:
1803 tcg_out_setcond32(s, args[3], args[0], args[1],
1804 args[2], const_args[2]);
1805 break;
1806
1807 OP_32_64(bswap16):
1808 tcg_out_rolw_8(s, args[0]);
1809 break;
1810 OP_32_64(bswap32):
1811 tcg_out_bswap32(s, args[0]);
1812 break;
1813
1814 OP_32_64(neg):
1815 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NEG, args[0]);
1816 break;
1817 OP_32_64(not):
1818 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NOT, args[0]);
1819 break;
1820
1821 OP_32_64(ext8s):
1822 tcg_out_ext8s(s, args[0], args[1], rexw);
1823 break;
1824 OP_32_64(ext16s):
1825 tcg_out_ext16s(s, args[0], args[1], rexw);
1826 break;
1827 OP_32_64(ext8u):
1828 tcg_out_ext8u(s, args[0], args[1]);
1829 break;
1830 OP_32_64(ext16u):
1831 tcg_out_ext16u(s, args[0], args[1]);
1832 break;
1833
1834 case INDEX_op_qemu_ld8u:
1835 tcg_out_qemu_ld(s, args, 0);
1836 break;
1837 case INDEX_op_qemu_ld8s:
1838 tcg_out_qemu_ld(s, args, 0 | 4);
1839 break;
1840 case INDEX_op_qemu_ld16u:
1841 tcg_out_qemu_ld(s, args, 1);
1842 break;
1843 case INDEX_op_qemu_ld16s:
1844 tcg_out_qemu_ld(s, args, 1 | 4);
1845 break;
1846#if TCG_TARGET_REG_BITS == 64
1847 case INDEX_op_qemu_ld32u:
1848#endif
1849 case INDEX_op_qemu_ld32:
1850 tcg_out_qemu_ld(s, args, 2);
1851 break;
1852 case INDEX_op_qemu_ld64:
1853 tcg_out_qemu_ld(s, args, 3);
1854 break;
1855
1856 case INDEX_op_qemu_st8:
1857 tcg_out_qemu_st(s, args, 0);
1858 break;
1859 case INDEX_op_qemu_st16:
1860 tcg_out_qemu_st(s, args, 1);
1861 break;
1862 case INDEX_op_qemu_st32:
1863 tcg_out_qemu_st(s, args, 2);
1864 break;
1865 case INDEX_op_qemu_st64:
1866 tcg_out_qemu_st(s, args, 3);
1867 break;
1868
1869#if TCG_TARGET_REG_BITS == 32
1870 case INDEX_op_brcond2_i32:
1871 tcg_out_brcond2(s, args, const_args, 0);
1872 break;
1873 case INDEX_op_setcond2_i32:
1874 tcg_out_setcond2(s, args, const_args);
1875 break;
1876 case INDEX_op_mulu2_i32:
1877 tcg_out_modrm(s, OPC_GRP3_Ev, EXT3_MUL, args[3]);
1878 break;
1879 case INDEX_op_add2_i32:
1880 if (const_args[4]) {
1881 tgen_arithi(s, ARITH_ADD, args[0], args[4], 1);
1882 } else {
1883 tgen_arithr(s, ARITH_ADD, args[0], args[4]);
1884 }
1885 if (const_args[5]) {
1886 tgen_arithi(s, ARITH_ADC, args[1], args[5], 1);
1887 } else {
1888 tgen_arithr(s, ARITH_ADC, args[1], args[5]);
1889 }
1890 break;
1891 case INDEX_op_sub2_i32:
1892 if (const_args[4]) {
1893 tgen_arithi(s, ARITH_SUB, args[0], args[4], 1);
1894 } else {
1895 tgen_arithr(s, ARITH_SUB, args[0], args[4]);
1896 }
1897 if (const_args[5]) {
1898 tgen_arithi(s, ARITH_SBB, args[1], args[5], 1);
1899 } else {
1900 tgen_arithr(s, ARITH_SBB, args[1], args[5]);
1901 }
1902 break;
1903#else /* TCG_TARGET_REG_BITS == 64 */
1904 case INDEX_op_movi_i64:
1905 tcg_out_movi(s, TCG_TYPE_I64, args[0], args[1]);
1906 break;
1907 case INDEX_op_ld32s_i64:
1908 tcg_out_modrm_offset(s, OPC_MOVSLQ, args[0], args[1], args[2]);
1909 break;
1910 case INDEX_op_ld_i64:
1911 tcg_out_ld(s, TCG_TYPE_I64, args[0], args[1], args[2]);
1912 break;
1913 case INDEX_op_st_i64:
1914 tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]);
1915 break;
1916 case INDEX_op_qemu_ld32s:
1917 tcg_out_qemu_ld(s, args, 2 | 4);
1918 break;
1919
1920 case INDEX_op_brcond_i64:
1921 tcg_out_brcond64(s, args[2], args[0], args[1], const_args[1],
1922 args[3], 0);
1923 break;
1924 case INDEX_op_setcond_i64:
1925 tcg_out_setcond64(s, args[3], args[0], args[1],
1926 args[2], const_args[2]);
1927 break;
1928
1929 case INDEX_op_bswap64_i64:
1930 tcg_out_bswap64(s, args[0]);
1931 break;
1932 case INDEX_op_ext32u_i64:
1933 tcg_out_ext32u(s, args[0], args[1]);
1934 break;
1935 case INDEX_op_ext32s_i64:
1936 tcg_out_ext32s(s, args[0], args[1]);
1937 break;
1938#endif
1939
1940 default:
1941 tcg_abort();
1942 }
1943
1944#undef OP_32_64
1945}
1946
1947static const TCGTargetOpDef x86_op_defs[] = {
1948 { INDEX_op_exit_tb, { } },
1949 { INDEX_op_goto_tb, { } },
1950 { INDEX_op_call, { "ri" } },
1951 { INDEX_op_jmp, { "ri" } },
1952 { INDEX_op_br, { } },
1953 { INDEX_op_mov_i32, { "r", "r" } },
1954 { INDEX_op_movi_i32, { "r" } },
1955 { INDEX_op_ld8u_i32, { "r", "r" } },
1956 { INDEX_op_ld8s_i32, { "r", "r" } },
1957 { INDEX_op_ld16u_i32, { "r", "r" } },
1958 { INDEX_op_ld16s_i32, { "r", "r" } },
1959 { INDEX_op_ld_i32, { "r", "r" } },
1960 { INDEX_op_st8_i32, { "q", "r" } },
1961 { INDEX_op_st16_i32, { "r", "r" } },
1962 { INDEX_op_st_i32, { "r", "r" } },
1963
1964 { INDEX_op_add_i32, { "r", "r", "ri" } },
1965 { INDEX_op_sub_i32, { "r", "0", "ri" } },
1966 { INDEX_op_mul_i32, { "r", "0", "ri" } },
1967 { INDEX_op_div2_i32, { "a", "d", "0", "1", "r" } },
1968 { INDEX_op_divu2_i32, { "a", "d", "0", "1", "r" } },
1969 { INDEX_op_and_i32, { "r", "0", "ri" } },
1970 { INDEX_op_or_i32, { "r", "0", "ri" } },
1971 { INDEX_op_xor_i32, { "r", "0", "ri" } },
1972
1973 { INDEX_op_shl_i32, { "r", "0", "ci" } },
1974 { INDEX_op_shr_i32, { "r", "0", "ci" } },
1975 { INDEX_op_sar_i32, { "r", "0", "ci" } },
1976 { INDEX_op_rotl_i32, { "r", "0", "ci" } },
1977 { INDEX_op_rotr_i32, { "r", "0", "ci" } },
1978
1979 { INDEX_op_brcond_i32, { "r", "ri" } },
1980
1981 { INDEX_op_bswap16_i32, { "r", "0" } },
1982 { INDEX_op_bswap32_i32, { "r", "0" } },
1983
1984 { INDEX_op_neg_i32, { "r", "0" } },
1985
1986 { INDEX_op_not_i32, { "r", "0" } },
1987
1988 { INDEX_op_ext8s_i32, { "r", "q" } },
1989 { INDEX_op_ext16s_i32, { "r", "r" } },
1990 { INDEX_op_ext8u_i32, { "r", "q" } },
1991 { INDEX_op_ext16u_i32, { "r", "r" } },
1992
1993 { INDEX_op_setcond_i32, { "q", "r", "ri" } },
1994
1995#if TCG_TARGET_REG_BITS == 32
1996 { INDEX_op_mulu2_i32, { "a", "d", "a", "r" } },
1997 { INDEX_op_add2_i32, { "r", "r", "0", "1", "ri", "ri" } },
1998 { INDEX_op_sub2_i32, { "r", "r", "0", "1", "ri", "ri" } },
1999 { INDEX_op_brcond2_i32, { "r", "r", "ri", "ri" } },
2000 { INDEX_op_setcond2_i32, { "r", "r", "r", "ri", "ri" } },
2001#else
2002 { INDEX_op_mov_i64, { "r", "r" } },
2003 { INDEX_op_movi_i64, { "r" } },
2004 { INDEX_op_ld8u_i64, { "r", "r" } },
2005 { INDEX_op_ld8s_i64, { "r", "r" } },
2006 { INDEX_op_ld16u_i64, { "r", "r" } },
2007 { INDEX_op_ld16s_i64, { "r", "r" } },
2008 { INDEX_op_ld32u_i64, { "r", "r" } },
2009 { INDEX_op_ld32s_i64, { "r", "r" } },
2010 { INDEX_op_ld_i64, { "r", "r" } },
2011 { INDEX_op_st8_i64, { "r", "r" } },
2012 { INDEX_op_st16_i64, { "r", "r" } },
2013 { INDEX_op_st32_i64, { "r", "r" } },
2014 { INDEX_op_st_i64, { "r", "r" } },
2015
2016 { INDEX_op_add_i64, { "r", "0", "re" } },
2017 { INDEX_op_mul_i64, { "r", "0", "re" } },
2018 { INDEX_op_div2_i64, { "a", "d", "0", "1", "r" } },
2019 { INDEX_op_divu2_i64, { "a", "d", "0", "1", "r" } },
2020 { INDEX_op_sub_i64, { "r", "0", "re" } },
2021 { INDEX_op_and_i64, { "r", "0", "reZ" } },
2022 { INDEX_op_or_i64, { "r", "0", "re" } },
2023 { INDEX_op_xor_i64, { "r", "0", "re" } },
2024
2025 { INDEX_op_shl_i64, { "r", "0", "ci" } },
2026 { INDEX_op_shr_i64, { "r", "0", "ci" } },
2027 { INDEX_op_sar_i64, { "r", "0", "ci" } },
2028 { INDEX_op_rotl_i64, { "r", "0", "ci" } },
2029 { INDEX_op_rotr_i64, { "r", "0", "ci" } },
2030
2031 { INDEX_op_brcond_i64, { "r", "re" } },
2032 { INDEX_op_setcond_i64, { "r", "r", "re" } },
2033
2034 { INDEX_op_bswap16_i64, { "r", "0" } },
2035 { INDEX_op_bswap32_i64, { "r", "0" } },
2036 { INDEX_op_bswap64_i64, { "r", "0" } },
2037 { INDEX_op_neg_i64, { "r", "0" } },
2038 { INDEX_op_not_i64, { "r", "0" } },
2039
2040 { INDEX_op_ext8s_i64, { "r", "r" } },
2041 { INDEX_op_ext16s_i64, { "r", "r" } },
2042 { INDEX_op_ext32s_i64, { "r", "r" } },
2043 { INDEX_op_ext8u_i64, { "r", "r" } },
2044 { INDEX_op_ext16u_i64, { "r", "r" } },
2045 { INDEX_op_ext32u_i64, { "r", "r" } },
2046#endif
2047
2048#if TCG_TARGET_REG_BITS == 64
2049 { INDEX_op_qemu_ld8u, { "r", "L" } },
2050 { INDEX_op_qemu_ld8s, { "r", "L" } },
2051 { INDEX_op_qemu_ld16u, { "r", "L" } },
2052 { INDEX_op_qemu_ld16s, { "r", "L" } },
2053 { INDEX_op_qemu_ld32, { "r", "L" } },
2054 { INDEX_op_qemu_ld32u, { "r", "L" } },
2055 { INDEX_op_qemu_ld32s, { "r", "L" } },
2056 { INDEX_op_qemu_ld64, { "r", "L" } },
2057
2058 { INDEX_op_qemu_st8, { "L", "L" } },
2059 { INDEX_op_qemu_st16, { "L", "L" } },
2060 { INDEX_op_qemu_st32, { "L", "L" } },
2061 { INDEX_op_qemu_st64, { "L", "L" } },
2062#elif TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
2063 { INDEX_op_qemu_ld8u, { "r", "L" } },
2064 { INDEX_op_qemu_ld8s, { "r", "L" } },
2065 { INDEX_op_qemu_ld16u, { "r", "L" } },
2066 { INDEX_op_qemu_ld16s, { "r", "L" } },
2067 { INDEX_op_qemu_ld32, { "r", "L" } },
2068 { INDEX_op_qemu_ld64, { "r", "r", "L" } },
2069
2070 { INDEX_op_qemu_st8, { "cb", "L" } },
2071 { INDEX_op_qemu_st16, { "L", "L" } },
2072 { INDEX_op_qemu_st32, { "L", "L" } },
2073 { INDEX_op_qemu_st64, { "L", "L", "L" } },
2074#else
2075 { INDEX_op_qemu_ld8u, { "r", "L", "L" } },
2076 { INDEX_op_qemu_ld8s, { "r", "L", "L" } },
2077 { INDEX_op_qemu_ld16u, { "r", "L", "L" } },
2078 { INDEX_op_qemu_ld16s, { "r", "L", "L" } },
2079 { INDEX_op_qemu_ld32, { "r", "L", "L" } },
2080 { INDEX_op_qemu_ld64, { "r", "r", "L", "L" } },
2081
2082 { INDEX_op_qemu_st8, { "cb", "L", "L" } },
2083 { INDEX_op_qemu_st16, { "L", "L", "L" } },
2084 { INDEX_op_qemu_st32, { "L", "L", "L" } },
2085 { INDEX_op_qemu_st64, { "L", "L", "L", "L" } },
2086#endif
2087 { -1 },
2088};
2089
2090static int tcg_target_callee_save_regs[] = {
2091#if TCG_TARGET_REG_BITS == 64
2092 TCG_REG_RBP,
2093 TCG_REG_RBX,
2094 TCG_REG_R12,
2095 TCG_REG_R13,
2096 /* TCG_REG_R14, */ /* Currently used for the global env. */
2097 TCG_REG_R15,
2098#else
2099# ifndef VBOX
2100 /* TCG_REG_EBP, */ /* Currently used for the global env. */
2101 TCG_REG_EBX,
2102 TCG_REG_ESI,
2103 TCG_REG_EDI,
2104# else
2105 TCG_REG_EBP,
2106 TCG_REG_EBX,
2107 /* TCG_REG_ESI, */ /* Currently used for the global env. */
2108 TCG_REG_EDI,
2109# endif
2110#endif
2111};
2112
2113/* Generate global QEMU prologue and epilogue code */
2114static void tcg_target_qemu_prologue(TCGContext *s)
2115{
2116 int i, frame_size, push_size, stack_addend;
2117
2118 /* TB prologue */
2119
2120 /* Save all callee saved registers. */
2121 for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
2122 tcg_out_push(s, tcg_target_callee_save_regs[i]);
2123 }
2124# if defined(VBOX_STRICT) && defined(RT_ARCH_X86)
2125 tcg_out8(s, 0x31); /* xor ebp, ebp */
2126 tcg_out8(s, 0xed);
2127# endif
2128
2129 /* Reserve some stack space. */
2130 push_size = 1 + ARRAY_SIZE(tcg_target_callee_save_regs);
2131 push_size *= TCG_TARGET_REG_BITS / 8;
2132
2133 frame_size = push_size + TCG_STATIC_CALL_ARGS_SIZE;
2134 frame_size = (frame_size + TCG_TARGET_STACK_ALIGN - 1) &
2135 ~(TCG_TARGET_STACK_ALIGN - 1);
2136 stack_addend = frame_size - push_size;
2137 tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
2138
2139 /* jmp *tb. */
2140 tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[0]);
2141# ifdef VBOX
2142 tcg_gen_stack_alignment_check(s);
2143# endif
2144
2145 tcg_out_modrm(s, 0xff, 4, TCG_REG_EAX); /* jmp *%eax */
2146
2147 /* TB epilogue */
2148 tb_ret_addr = s->code_ptr;
2149
2150 tcg_out_addi(s, TCG_REG_ESP, stack_addend);
2151
2152 for (i = ARRAY_SIZE(tcg_target_callee_save_regs) - 1; i >= 0; i--) {
2153 tcg_out_pop(s, tcg_target_callee_save_regs[i]);
2154 }
2155 tcg_out_opc(s, OPC_RET, 0, 0, 0);
2156}
2157
2158static void tcg_target_init(TCGContext *s)
2159{
2160#if !defined(CONFIG_USER_ONLY)
2161 /* fail safe */
2162 if ((1 << CPU_TLB_ENTRY_BITS) != sizeof(CPUTLBEntry))
2163 tcg_abort();
2164#endif
2165
2166 if (TCG_TARGET_REG_BITS == 64) {
2167 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff);
2168 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffff);
2169 } else {
2170 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xff);
2171 }
2172
2173 tcg_regset_clear(tcg_target_call_clobber_regs);
2174 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EAX);
2175 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EDX);
2176 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_ECX);
2177 if (TCG_TARGET_REG_BITS == 64) {
2178 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RDI);
2179 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RSI);
2180 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8);
2181 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9);
2182 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10);
2183 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11);
2184 }
2185
2186 tcg_regset_clear(s->reserved_regs);
2187 tcg_regset_set_reg(s->reserved_regs, TCG_REG_ESP);
2188
2189 tcg_add_target_add_op_defs(x86_op_defs);
2190}
注意: 瀏覽 TracBrowser 來幫助您使用儲存庫瀏覽器

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette