; ----------------------------------------------------------------------------
; satexample.asm
;
; This is a short example of parallel saturated addition using paddsw.
; It takes two 64-bit quantities
;
;   80008FFF0005FEF2
;   800020E07FFE99AA
;
; and performs saturated addition on the four 16-bit blocks in parallel,
; then writes the resulting value, in hex, to standard output.  The answer
; should be
;
;   8000B0DF7FFF989C
; ----------------------------------------------------------------------------

	extern	_printf
	global	_main
	
	section	.text
_main:
	movq	mm0, [x]
	paddsw	mm0, [y]		; Do 4 saturated additions in parallel
	movq	[x], mm0

	push	dword [x]		; can't push 64 bits at once
	push	dword [x+4]		; nor does printf handle 64-bit ints
	push	dword format
	call	_printf
	add	esp, 12
	ret
	
	section	.data
x	dw	0fef2h, 0005h, 8fffh, 8000h
y	dw	099aah, 7ffeh, 20e0h, 8000h
format	db	'%0x%0x', 10, 0

