1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468
#define LIBFFI_ASM
#include <fficonfig.h>
#include <ffi.h>
	
/* Constants for ffi_call_win64 */	
#define STACK 0
#define PREP_ARGS_FN 32
#define ECIF 40
#define CIF_BYTES 48
#define CIF_FLAGS 56
#define RVALUE 64
#define FN 72

/* ffi_call_win64 (void (*prep_args_fn)(char *, extended_cif *),
                   extended_cif *ecif, unsigned bytes, unsigned flags,
                   unsigned *rvalue, void (*fn)());
 */

#ifdef _MSC_VER
PUBLIC	ffi_call_win64

EXTRN	__chkstk:NEAR
EXTRN	ffi_closure_win64_inner:NEAR

_TEXT	SEGMENT

;;; ffi_closure_win64 will be called with these registers set:
;;;    rax points to 'closure'
;;;    r11 contains a bit mask that specifies which of the
;;;    first four parameters are float or double
;;;
;;; It must move the parameters passed in registers to their stack location,
;;; call ffi_closure_win64_inner for the actual work, then return the result.
;;; 
ffi_closure_win64 PROC FRAME
	;; copy register arguments onto stack
	test	r11, 1
	jne	first_is_float	
	mov	QWORD PTR [rsp+8], rcx
	jmp	second
first_is_float:
	movlpd	QWORD PTR [rsp+8], xmm0

second:
	test	r11, 2
	jne	second_is_float	
	mov	QWORD PTR [rsp+16], rdx
	jmp	third
second_is_float:
	movlpd	QWORD PTR [rsp+16], xmm1

third:
	test	r11, 4
	jne	third_is_float	
	mov	QWORD PTR [rsp+24], r8
	jmp	fourth
third_is_float:
	movlpd	QWORD PTR [rsp+24], xmm2

fourth:
	test	r11, 8
	jne	fourth_is_float	
	mov	QWORD PTR [rsp+32], r9
	jmp	done
fourth_is_float:
	movlpd	QWORD PTR [rsp+32], xmm3

done:
        .ALLOCSTACK 40
	sub	rsp, 40
        .ENDPROLOG
	mov	rcx, rax	; context is first parameter
	mov	rdx, rsp	; stack is second parameter
	add	rdx, 48		; point to start of arguments
	mov	rax, ffi_closure_win64_inner
	call	rax		; call the real closure function
	add	rsp, 40
	movd	xmm0, rax	; If the closure returned a float,
                                ; ffi_closure_win64_inner wrote it to rax
	ret	0
ffi_closure_win64 ENDP

ffi_call_win64 PROC FRAME
        ;; copy registers onto stack
	mov	QWORD PTR [rsp+32], r9
	mov	QWORD PTR [rsp+24], r8
	mov	QWORD PTR [rsp+16], rdx
	mov	QWORD PTR [rsp+8], rcx
        .PUSHREG rbp
	push	rbp
        .ALLOCSTACK 48
	sub	rsp, 48					; 00000030H
        .SETFRAME rbp, 32
	lea	rbp, QWORD PTR [rsp+32]
        .ENDPROLOG

	mov	eax, DWORD PTR CIF_BYTES[rbp]
	add	rax, 15
	and	rax, -16
	call	__chkstk
	sub	rsp, rax
	lea	rax, QWORD PTR [rsp+32]
	mov	QWORD PTR STACK[rbp], rax

	mov	rdx, QWORD PTR ECIF[rbp]
	mov	rcx, QWORD PTR STACK[rbp]
	call	QWORD PTR PREP_ARGS_FN[rbp]

	mov	rsp, QWORD PTR STACK[rbp]

	movlpd	xmm3, QWORD PTR [rsp+24]
	movd	r9, xmm3

	movlpd	xmm2, QWORD PTR [rsp+16]
	movd	r8, xmm2

	movlpd	xmm1, QWORD PTR [rsp+8]
	movd	rdx, xmm1

	movlpd	xmm0, QWORD PTR [rsp]
	movd	rcx, xmm0

	call	QWORD PTR FN[rbp]
ret_struct4b$:
 	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SMALL_STRUCT_4B
 	jne	ret_struct2b$

	mov	rcx, QWORD PTR RVALUE[rbp]
	mov	DWORD PTR [rcx], eax
	jmp	ret_void$

ret_struct2b$:
 	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SMALL_STRUCT_2B
 	jne	ret_struct1b$

	mov	rcx, QWORD PTR RVALUE[rbp]
	mov	WORD PTR [rcx], ax
	jmp	ret_void$

ret_struct1b$:
 	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SMALL_STRUCT_1B
 	jne	ret_uint8$

	mov	rcx, QWORD PTR RVALUE[rbp]
	mov	BYTE PTR [rcx], al
	jmp	ret_void$

ret_uint8$:
 	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_UINT8
 	jne	ret_sint8$

	mov	rcx, QWORD PTR RVALUE[rbp]
	movzx   rax, al
	mov	QWORD PTR [rcx], rax
	jmp	ret_void$

ret_sint8$:
 	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SINT8
 	jne	ret_uint16$

	mov	rcx, QWORD PTR RVALUE[rbp]
	movsx   rax, al
	mov	QWORD PTR [rcx], rax
	jmp	ret_void$

ret_uint16$:
 	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_UINT16
 	jne	ret_sint16$

	mov	rcx, QWORD PTR RVALUE[rbp]
	movzx   rax, ax
	mov	QWORD PTR [rcx], rax
	jmp	SHORT ret_void$

ret_sint16$:
 	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SINT16
 	jne	ret_uint32$

	mov	rcx, QWORD PTR RVALUE[rbp]
	movsx   rax, ax
	mov	QWORD PTR [rcx], rax
	jmp	SHORT ret_void$

ret_uint32$:
 	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_UINT32
 	jne	ret_sint32$

	mov	rcx, QWORD PTR RVALUE[rbp]
	mov     eax, eax
	mov	QWORD PTR [rcx], rax
	jmp	SHORT ret_void$

ret_sint32$:
 	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SINT32
 	jne	ret_float$

	mov	rcx, QWORD PTR RVALUE[rbp]
	cdqe
	mov	QWORD PTR [rcx], rax
	jmp	SHORT ret_void$

ret_float$:
 	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_FLOAT
 	jne	SHORT ret_double$

 	mov	rax, QWORD PTR RVALUE[rbp]
 	movss	DWORD PTR [rax], xmm0
 	jmp	SHORT ret_void$

ret_double$:
 	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_DOUBLE
 	jne	SHORT ret_sint64$

 	mov	rax, QWORD PTR RVALUE[rbp]
 	movlpd	QWORD PTR [rax], xmm0
 	jmp	SHORT ret_void$

ret_sint64$:
  	cmp	DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SINT64
  	jne	ret_void$

 	mov	rcx, QWORD PTR RVALUE[rbp]
 	mov	QWORD PTR [rcx], rax
 	jmp	SHORT ret_void$
	
ret_void$:
	xor	rax, rax

	lea	rsp, QWORD PTR [rbp+16]
	pop	rbp
	ret	0
ffi_call_win64 ENDP
_TEXT	ENDS
END

#else

#ifdef SYMBOL_UNDERSCORE
#define SYMBOL_NAME(name) _##name
#else
#define SYMBOL_NAME(name) name
#endif

.text

.extern SYMBOL_NAME(ffi_closure_win64_inner)

# ffi_closure_win64 will be called with these registers set:
#    rax points to 'closure'
#    r11 contains a bit mask that specifies which of the
#    first four parameters are float or double
#
# It must move the parameters passed in registers to their stack location,
# call ffi_closure_win64_inner for the actual work, then return the result.
# 
	.balign 16
        .globl SYMBOL_NAME(ffi_closure_win64)
SYMBOL_NAME(ffi_closure_win64):
	# copy register arguments onto stack
	test	$1,%r11
	jne	.Lfirst_is_float	
	mov	%rcx, 8(%rsp)
	jmp	.Lsecond
.Lfirst_is_float:
	movlpd	%xmm0, 8(%rsp)

.Lsecond:
	test	$2, %r11
	jne	.Lsecond_is_float	
	mov	%rdx, 16(%rsp)
	jmp	.Lthird
.Lsecond_is_float:
	movlpd	%xmm1, 16(%rsp)

.Lthird:
	test	$4, %r11
	jne	.Lthird_is_float	
	mov	%r8,24(%rsp)
	jmp	.Lfourth
.Lthird_is_float:
	movlpd	%xmm2, 24(%rsp)

.Lfourth:
	test	$8, %r11
	jne	.Lfourth_is_float	
	mov	%r9, 32(%rsp)
	jmp	.Ldone
.Lfourth_is_float:
	movlpd	%xmm3, 32(%rsp)

.Ldone:
#.ALLOCSTACK 40
	sub	$40, %rsp
#.ENDPROLOG
	mov	%rax, %rcx	# context is first parameter
	mov	%rsp, %rdx	# stack is second parameter
	add	$48, %rdx	# point to start of arguments
	mov	$SYMBOL_NAME(ffi_closure_win64_inner), %rax
	callq	*%rax		# call the real closure function
	add	$40, %rsp
	movq	%rax, %xmm0	# If the closure returned a float,
                                # ffi_closure_win64_inner wrote it to rax
	retq
.ffi_closure_win64_end:

	.balign 16
        .globl	SYMBOL_NAME(ffi_call_win64)
SYMBOL_NAME(ffi_call_win64):
        # copy registers onto stack
	mov	%r9,32(%rsp)
	mov	%r8,24(%rsp)
	mov	%rdx,16(%rsp)
	mov	%rcx,8(%rsp)
        #.PUSHREG rbp
	push	%rbp
        #.ALLOCSTACK 48
	sub	$48,%rsp
        #.SETFRAME rbp, 32
	lea	32(%rsp),%rbp
        #.ENDPROLOG

	mov	CIF_BYTES(%rbp),%eax
	add	$15, %rax
	and	$-16, %rax
	cmpq	$0x1000, %rax
	jb	Lch_done
Lch_probe:
	subq	$0x1000,%rsp
	orl	$0x0, (%rsp)
	subq	$0x1000,%rax
	cmpq	$0x1000,%rax
	ja	Lch_probe
Lch_done:
	subq	%rax, %rsp
	orl	$0x0, (%rsp)
	lea	32(%rsp), %rax
	mov	%rax, STACK(%rbp)

	mov	ECIF(%rbp), %rdx
	mov	STACK(%rbp), %rcx
	callq	*PREP_ARGS_FN(%rbp)

	mov	STACK(%rbp), %rsp

	movlpd	24(%rsp), %xmm3
	movd	%xmm3, %r9

	movlpd	16(%rsp), %xmm2
	movd	%xmm2, %r8

	movlpd	8(%rsp), %xmm1
	movd	%xmm1, %rdx

	movlpd	(%rsp), %xmm0
	movd	%xmm0, %rcx

	callq	*FN(%rbp)
.Lret_struct4b:
 	cmpl	$FFI_TYPE_SMALL_STRUCT_4B, CIF_FLAGS(%rbp)
 	jne .Lret_struct2b

	mov	RVALUE(%rbp), %rcx
	mov	%eax, (%rcx)
	jmp	.Lret_void

.Lret_struct2b:
	cmpl	$FFI_TYPE_SMALL_STRUCT_2B, CIF_FLAGS(%rbp)
	jne .Lret_struct1b
	
	mov	RVALUE(%rbp), %rcx
	mov	%ax, (%rcx)
	jmp .Lret_void
	
.Lret_struct1b:
	cmpl	$FFI_TYPE_SMALL_STRUCT_1B, CIF_FLAGS(%rbp)
	jne .Lret_uint8
	
	mov	RVALUE(%rbp), %rcx
	mov	%al, (%rcx)
	jmp .Lret_void

.Lret_uint8:
	cmpl	$FFI_TYPE_UINT8, CIF_FLAGS(%rbp)
	jne .Lret_sint8
	
        mov     RVALUE(%rbp), %rcx
        movzbq  %al, %rax
	movq    %rax, (%rcx)
	jmp .Lret_void

.Lret_sint8:
	cmpl	$FFI_TYPE_SINT8, CIF_FLAGS(%rbp)
	jne .Lret_uint16
	
        mov     RVALUE(%rbp), %rcx
        movsbq  %al, %rax
	movq    %rax, (%rcx)
	jmp .Lret_void

.Lret_uint16:
	cmpl	$FFI_TYPE_UINT16, CIF_FLAGS(%rbp)
	jne .Lret_sint16
	
        mov     RVALUE(%rbp), %rcx
        movzwq  %ax, %rax
	movq    %rax, (%rcx)
	jmp .Lret_void

.Lret_sint16:
	cmpl	$FFI_TYPE_SINT16, CIF_FLAGS(%rbp)
	jne .Lret_uint32
	
        mov     RVALUE(%rbp), %rcx
        movswq  %ax, %rax
	movq    %rax, (%rcx)
	jmp .Lret_void

.Lret_uint32:
	cmpl	$FFI_TYPE_UINT32, CIF_FLAGS(%rbp)
	jne .Lret_sint32
	
        mov     RVALUE(%rbp), %rcx
        movl    %eax, %eax
	movq    %rax, (%rcx)
	jmp .Lret_void

.Lret_sint32:
 	cmpl	$FFI_TYPE_SINT32, CIF_FLAGS(%rbp)
 	jne	.Lret_float

	mov	RVALUE(%rbp), %rcx
	cltq
	movq	%rax, (%rcx)
	jmp	.Lret_void

.Lret_float:
 	cmpl	$FFI_TYPE_FLOAT, CIF_FLAGS(%rbp)
 	jne	.Lret_double

 	mov	RVALUE(%rbp), %rax
 	movss	%xmm0, (%rax)
 	jmp	.Lret_void

.Lret_double:
 	cmpl	$FFI_TYPE_DOUBLE, CIF_FLAGS(%rbp)
 	jne	.Lret_sint64

 	mov	RVALUE(%rbp), %rax
 	movlpd	%xmm0, (%rax)
 	jmp	.Lret_void

.Lret_sint64:
  	cmpl	$FFI_TYPE_SINT64, CIF_FLAGS(%rbp)
  	jne	.Lret_void

 	mov	RVALUE(%rbp), %rcx
 	mov	%rax, (%rcx)
 	jmp	.Lret_void
	
.Lret_void:
	xor	%rax, %rax

	lea	16(%rbp), %rsp
	pop	%rbp
	retq
.ffi_call_win64_end:
#endif /* !_MSC_VER */