/**
 * \brief Size-optimized code for TPI
 * \file tpi.s
 * \author S³awomir Fraœ
 */
#include <avr/io.h>
#include "tpi_defs.h"


#define TPI_CLK_PORT PORTB
#define TPI_CLK_DDR DDRB
#define TPI_CLK_BIT 5
#define TPI_DATAOUT_PORT PORTB
#define TPI_DATAOUT_DDR DDRB
#define TPI_DATAOUT_BIT 3
#ifdef TPI_WITH_OPTO
#       define TPI_DATAIN_PIN PINB
#       define TPI_DATAIN_DDR DDRB
#       define TPI_DATAIN_BIT 4
#else
#       define TPI_DATAIN_PIN PINB
#       define TPI_DATAIN_BIT 3
#endif

.comm tpi_dly_cnt, 2


/**
 * TPI init
 */
.global tpi_init
tpi_init:
        /* CLK <= out */
        sbi _SFR_IO_ADDR(TPI_CLK_DDR), TPI_CLK_BIT
#ifdef TPI_WITH_OPTO
        /* DATAIN <= pull-up */
        cbi _SFR_IO_ADDR(TPI_DATAIN_DDR), TPI_DATAIN_BIT
        sbi _SFR_IO_ADDR(TPI_DATAIN_PORT), TPI_DATAIN_BIT
        /* DATAOUT <= high */
        sbi _SFR_IO_ADDR(TPI_DATAOUT_DDR), TPI_DATAOUT_BIT
        sbi _SFR_IO_ADDR(TPI_DATAOUT_PORT), TPI_DATAOUT_BIT
#else
        /* DATA <= pull-up */
        cbi _SFR_IO_ADDR(TPI_DATAOUT_DDR), TPI_DATAOUT_BIT
        sbi _SFR_IO_ADDR(TPI_DATAOUT_PORT), TPI_DATAOUT_BIT
#endif

        /* 32 bits */
        ldi r21, 32
1:
                rcall tpi_bit_h
        dec r21
        brne 1b

        ret


/**
 * Update PR
 * in: r25:r24 <= PR
 * lost: r18-r21,r24,r30-r31
 */
tpi_pr_update:
        movw r20, r24
        ldi r24, TPI_OP_SSTPR(0)
        rcall tpi_send_byte
        mov r24, r20
        rcall tpi_send_byte
        ldi r24, TPI_OP_SSTPR(1)
        rcall tpi_send_byte
        mov r24, r21
//      rjmp tpi_send_byte


/**
 * Send one byte
 * in: r24 <= byte
 * lost: r18-r19,r30-r31
 */
.global tpi_send_byte
tpi_send_byte:
        /* start bit */
        rcall tpi_bit_l
        /* 8 data bits */
        ldi r18, 8
        ldi r19, 0
1:
                // parity
                eor r19, r24
                // get bit, shift
                bst r24, 0
                lsr r24
                // send
                rcall tpi_bit
        dec r18
        brne 1b
        /* parity bit */
        bst r19, 0
        rcall tpi_bit
        /* 2 stop bits */
        rcall tpi_bit_h
//      rjmp tpi_bit_h


/**
 * Exchange of one bit
 * in: T <= bit_in
 * out: T => bit_out
 * lost: r30-r31
 */
tpi_bit_h:
        set
tpi_bit:
        /* TPIDATA = T */
#ifdef TPI_WITH_OPTO
        // DATAOUT = high (opto should allow TPIDATA to be pulled low by external device)
        // if(T == 0)
        //   DATAOUT = low
        sbi _SFR_IO_ADDR(TPI_DATAOUT_PORT), TPI_DATAOUT_BIT
        brts 1f
tpi_bit_l:
                cbi _SFR_IO_ADDR(TPI_DATAOUT_PORT), TPI_DATAOUT_BIT
1:
#else
        // DATAOUT = pull-up
        // if(T == 0)
        //   DATAOUT = low
        cbi _SFR_IO_ADDR(TPI_DATAOUT_DDR), TPI_DATAOUT_BIT
        sbi _SFR_IO_ADDR(TPI_DATAOUT_PORT), TPI_DATAOUT_BIT
        brts 1f
tpi_bit_l:
                cbi _SFR_IO_ADDR(TPI_DATAOUT_PORT), TPI_DATAOUT_BIT
                sbi _SFR_IO_ADDR(TPI_DATAOUT_DDR), TPI_DATAOUT_BIT
1:
#endif
        /* delay(); */
        lds r30, tpi_dly_cnt
        lds r31, tpi_dly_cnt+1
1:
                sbiw r30, 1
        brsh 1b
        /* TPICLK = 1 */
        sbi _SFR_IO_ADDR(TPI_CLK_PORT), TPI_CLK_BIT
        /* T = TPIDATA */
        in r30, _SFR_IO_ADDR(TPI_DATAIN_PIN)
        bst r30, TPI_DATAIN_BIT
        /* delay(); */
        lds r30, tpi_dly_cnt
        lds r31, tpi_dly_cnt+1
1:
                sbiw r30, 1
        brsh 1b

        /* TPICLK = 0 */
        cbi _SFR_IO_ADDR(TPI_CLK_PORT), TPI_CLK_BIT
        ret


/**
 * Receive one byte
 * out: r24 => byte
 * lost: r18-r19,r30-r31
 */
.global tpi_recv_byte
tpi_recv_byte:
        /* waitfor(start_bit, 192); */
        ldi r18, 192
1:
                rcall tpi_bit_h
                brtc .tpi_recv_found_start
        dec r18
        brne 1b
        /* no start bit: set return value */
.tpi_break_ret0:
        ldi r24, 0
        /* send 2 breaks (24++ bits) */
        ldi r18, 26
1:
                rcall tpi_bit_l
        dec r18
        brne 1b
        /* send hi */
        rjmp tpi_bit_h
        
// ----
.tpi_recv_found_start:
        /* recv 8bits(+calc.parity) */
        ldi r18, 8
        ldi r19, 0
1:
                rcall tpi_bit_h
                lsr r24
                bld r24, 7
                eor r19, r24
        dec r18
        brne 1b
        /* recv parity */
        rcall tpi_bit_h
        bld r18, 7
        eor r19, r18
        brmi .tpi_break_ret0
        /* recv stop bits */
        rcall tpi_bit_h
        rjmp tpi_bit_h


/**
 * Read Block
 */
.global tpi_read_block
tpi_read_block:
        // X <= dptr
        movw XL, r22
        // r23 <= len
        mov r23, r20
        /* set PR */
        rcall tpi_pr_update
        /* read data */ 
.tpi_read_loop:
                ldi r24, TPI_OP_SLD_INC
                rcall tpi_send_byte
                rcall tpi_recv_byte
                st X+, r24
        dec r23
        brne .tpi_read_loop
        ret


/**
 * Write block
 */
.global tpi_write_block
tpi_write_block:
        // X <= sptr
        movw XL, r22
        // r23 <= len
        mov r23, r20
        /* set PR */
        rcall tpi_pr_update
        /* write data */
.tpi_write_loop:
                ldi r24, TPI_OP_SOUT(NVMCMD)
                rcall tpi_send_byte
                ldi r24, NVMCMD_WORD_WRITE
                rcall tpi_send_byte
                ldi r24, TPI_OP_SST_INC
                rcall tpi_send_byte
                ld r24, X+
                rcall tpi_send_byte
.tpi_nvmbsy_wait:
                        ldi r24, TPI_OP_SIN(NVMCSR)
                        rcall tpi_send_byte
                        rcall tpi_recv_byte
                        andi r24, NVMCSR_BSY
                brne .tpi_nvmbsy_wait
        dec r23
        brne .tpi_write_loop
        ret