        title  "Compute Checksum"

;/*++
;
; Copyright (c) 1992  Microsoft Corporation
;
; Module Name:
;
;    cksy.asm
;
; Abstract:
;
;    This module implements a function to compute the checksum of a buffer.
;
; Author:
;
;    David N. Cutler (davec) 27-Jan-1992
;
; Revision History:
;
;     Who         When        What
;     --------    --------    ----------------------------------------------
;     mikeab      01-22-94    Pentium optimization
;
; Environment:
;
;    Any mode.
;
; Revision History:
;
;--*/

LOOP_UNROLLING_BITS     equ     5
LOOP_UNROLLING          equ     (1 SHL LOOP_UNROLLING_BITS)

        .386
        .model  small,c

        assume cs:FLAT,ds:FLAT,es:FLAT,ss:FLAT
        assume fs:nothing,gs:nothing

        .xlist
        include callconv.inc
        include ks386.inc
        .list

        .code

;++
;
; ULONG
; tcpxsum(
;   IN ULONG cksum,
;   IN PUCHAR buf,
;   IN ULONG len
;   )
;
; Routine Description:
;
;    This function computes the checksum of the specified buffer.
;
; Arguments:
;
;    cksum - Suppiles the initial checksum value, in 16-bit form,
;            with the high word set to 0.
;
;    buf - Supplies a pointer to the buffer to the checksum buffer.
;
;    len - Supplies the length of the buffer in bytes.
;
; Return Value:
;
;    The computed checksum in 32-bit two-partial-accumulators form, added to
;    the initial checksum, is returned as the function value.
;
;--

cksum   equ     12                      ; stack offset to initial checksum
buf     equ     16                      ; stack offset to source address
len     equ     20                      ; stack offset to length in words

to_checksum_last_word:
        jmp     checksum_last_word

to_checksum_done:
        jmp     checksum_done

to_checksum_dword_loop_done:
        jmp     checksum_dword_loop_done

cPublicProc tcpxsum,3

        push    ebx                     ; save nonvolatile register
        push    esi                     ; save nonvolatile register

        mov     ecx,[esp + len]         ; get length in bytes
        sub     eax,eax                 ; clear computed checksum
        test    ecx,ecx                 ; any bytes to checksum at all?
        jz      short to_checksum_done  ; no bytes to checksum

;
; if the checksum buffer is not word aligned, then add the first byte of
; the buffer to the input checksum.
;

        mov     esi,[esp + buf]         ; get source address
        sub     edx,edx                 ; set up to load word into EDX below
        test    esi,1                   ; check if buffer word aligned
        jz      short checksum_word_aligned ; if zf, buffer word aligned
        mov     ah,[esi]                ; get first byte (we know we'll have
                                        ;  to swap at the end)
        inc     esi                     ; increment buffer address
        dec     ecx                     ; decrement number of bytes
        jz      short to_checksum_done  ; if zf set, no more bytes

;
; If the buffer is not an even number of of bytes, then initialize
; the computed checksum with the last byte of the buffer.
;

checksum_word_aligned:                  ;
        shr     ecx,1                   ; convert to word count
        jnc     short checksum_start    ; if nc, even number of bytes
        mov     al,[esi+ecx*2]          ; initialize the computed checksum
        jz      short to_checksum_done  ; if zf set, no more bytes

;
; Compute checksum in large blocks of dwords, with one partial word up front if
; necessary to get dword alignment, and another partial word at the end if
; needed.
;

;
; Compute checksum on the leading word, if that's necessary to get dword
; alignment.
;

checksum_start:                         ;
        test    esi,02h                 ; check if source dword aligned
        jz      short checksum_dword_aligned ; source is already dword aligned
        mov     dx,[esi]                ; get first word to checksum
        add     esi,2                   ; update source address
        add     eax,edx                 ; update partial checksum
                                        ;  (no carry is possible, because EAX
                                        ;  and EDX are both 16-bit values)
        dec     ecx                     ; count off this word (zero case gets
                                        ;  picked up below)

;
; Checksum as many words as possible by processing a dword at a time.
;

checksum_dword_aligned:
        push    ecx                     ; so we can tell if there's a trailing
                                        ;  word later
        shr     ecx,1                   ; # of dwords to checksum
        jz      short to_checksum_last_word ; no dwords to checksum

        mov     edx,[esi]               ; preload the first dword
        add     esi,4                   ; point to the next dword
        dec     ecx                     ; count off the dword we just loaded
        jz      short to_checksum_dword_loop_done
                                        ; skip the loop if that was the only
                                        ;  dword
        mov     ebx,ecx                 ; EBX = # of dwords left to checksum
        add     ecx,LOOP_UNROLLING-1    ; round up loop count
        shr     ecx,LOOP_UNROLLING_BITS ; convert from word count to unrolled
                                        ;  loop count
        and     ebx,LOOP_UNROLLING-1    ; # of partial dwords to do in first
                                        ;  loop
        jz      short checksum_dword_loop ; special-case when no partial loop,
                                          ;  because fixup below doesn't work
                                          ;  in that case (carry flag is
                                          ;  cleared at this point, as required
                                          ;  at loop entry)
        lea     esi,[esi+ebx*4-(LOOP_UNROLLING*4)]
                                        ; adjust buffer pointer back to
                                        ;  compensate for hardwired displacement
                                        ;  at loop entry point
                                        ; ***doesn't change carry flag***
        jmp     loop_entry[ebx*4]       ; enter the loop to do the first,
                                        ; partial iteration, after which we can
                                        ; just do 64-word blocks
                                        ; ***doesn't change carry flag***

checksum_dword_loop:

DEFLAB  macro   pre,suf
pre&suf:
        endm

TEMP=0
        REPT    LOOP_UNROLLING
        deflab  loop_entry_,%TEMP
        adc     eax,edx
        mov     edx,[esi + TEMP]
TEMP=TEMP+4
        ENDM

checksum_dword_loop_end:

        lea     esi,[esi + LOOP_UNROLLING * 4]  ; update source address
                                        ; ***doesn't change carry flag***
        dec     ecx                     ; count off unrolled loop iteration
                                        ; ***doesn't change carry flag***
        jnz     checksum_dword_loop     ; do more blocks

checksum_dword_loop_done label proc
        adc     eax,edx                 ; finish dword checksum
        mov     edx,0                   ; prepare to load trailing word
        adc     eax,edx

;
; Compute checksum on the trailing word, if there is one.
; High word of EDX = 0 at this point
; Carry flag set iff there's a trailing word to do at this point
;

checksum_last_word label proc           ; "proc" so not scoped to function
        pop     ecx                     ; get back word count
        test    ecx,1                   ; is there a trailing word?
        jz      short checksum_done     ; no trailing word
        add     ax,[esi]                ; add in the trailing word
        adc     eax,0                   ;

checksum_done label proc                ; "proc" so not scoped to function
        mov     ecx,eax                 ; fold the checksum to 16 bits
        ror     ecx,16
        add     eax,ecx
        mov     ebx,[esp + buf]
        shr     eax,16
        test    ebx,1                   ; check if buffer word aligned
        jz      short checksum_combine  ; if zf set, buffer word aligned
        ror     ax,8                    ; byte aligned--swap bytes back
checksum_combine label proc             ; "proc" so not scoped to function
        add     ax,word ptr [esp + cksum] ; combine checksums
        pop     esi                     ; restore nonvolatile register
        adc     eax,0                   ;
        pop     ebx                     ; restore nonvolatile register
        stdRET  tcpxsum


REFLAB  macro   pre,suf
        dd      pre&suf
        endm

        align   4
loop_entry      label   dword
        dd      0
TEMP=LOOP_UNROLLING*4
        REPT    LOOP_UNROLLING-1
TEMP=TEMP-4
        reflab  loop_entry_,%TEMP
        ENDM

stdENDP tcpxsum

        end
