; ----------------------------------------------------------------------------
; mmxarrayadd.asm
;
; NASM implementation of a function that adds two short arrays.
;
;   void add(short a[], short b[], int n)
; ----------------------------------------------------------------------------	

	global	_add
	
	section .text
_add:
	push	ebx			; callee save register
	
	mov	eax, [esp+8]		; eax points to a
	mov	edx, [esp+12]		; edx points to b
	mov	ecx, [esp+16]		; ecx <- number of items in each array
	or	ecx, ecx		; guard against negative lengths
	js	L4
L1:	
	cmp	ecx, 4			; Less than 4 items left?
	jl	L2			; if so, handle them individually
	movq	mm0, qword [eax]	; Get four items from a
	paddw	mm0, qword [edx]	; Add them with next four items from b
	movq	qword [eax], mm0	; Write them back to a
	add	eax, 8			; Advance a to point to next 4 words
	add	edx, 8			; Advance b to point to next 4 words
	sub	ecx, 4			; We've just handled four
	jmp	L1
L2:
	jecxz	L4			; Are there zero items left?
L3:
	mov	bx, word [eax]		; One word at a time addition
	add	bx, word [edx]
	mov	word [eax], bx
	inc	eax
	inc	eax
	inc	edx
	inc	edx
	dec	ecx
	jnz	L3
L4:	
	pop	ebx
	ret
