memset8_ proc far jcxz thatsIt_8 8 / 5 push bx 1 mov ah,al 1 mov bx,ax 1 shl eax,16 2 mov ax,bx 1 pop bx 4 cld 2 test di,1 1 jz aligned16_8 3 / 1 stosb 5 dec cx 1 jz thatsIt_8 3 / 1 aligned16_8: test di,2 1 jz aligned32_8 3 / 1 stosb 5 dec cx 1 jz thatsIt_8 3 / 1 stosb 5 dec cx 1 aligned32_8: push cx 1 shr cx,2 2 rep stosd ?+5x pop cx 4 and cx,3 1 rep stosb ?+5x thatsIt_8: ret memset8_ endp total: 60+5x / 49+5x / 33+5x memset16_ proc far jcxz thatsIt_16 8 / 5 mov bx,ax 1 shl eax,16 2 mov ax,bx 1 mov dx,di 1 cld 2 test di,1 1 jz aligned16_16 3 / 1 stosb 5 ror eax,8 2 dec cx 1 jz lastByte_16 3 / 1 aligned16_16: test di,2 1 jz aligned32_16 3 / 1 stosw 5 dec cx 1 aligned32_16: shr cx,1 2 rep stosd ?+5x jnc lastByte_16 3 / 1 stosw 5 lastByte_16: test dx,1 1 jz thatsIt_16 3 / 1 stosb 5 thatsIt_16: ret memset16_ endp 57+5x / 42+5x / 29+5x memset32_ proc far jcxz thatsIt_32 8 / 5 cld 2 xchg dx,ax 3 shl eax,16 2 mov ax,dx 1 mov dx,di 1 test di,1 1 jz aligned16_32 3 / 1 stosb 5 ror eax,8 2 dec cx 1 jz doRest_32 3 / 1 test di,2 1 jz mainCopy_32 3 / 1 stosw 5 ror eax,16 2 jmp mainCopy_32 3 aligned16_32: stosw 5 ror eax,16 2 dec cx 1 jz doRest_32 3 / 1 mainCopy_32: rep stosd ?+5x doRest_32: test dx,2 1 jz lastByte_32 3 / 1 stosw 5 ror eax,16 2 lastByte_32: test dx,1 1 jz thatsIt_32 3 / 1 stosb 5 thatsIt_32: ret memset32_ endp 76+5x / 61+5x / 35+5x