Source file: /~heha/Mikrocontroller/LEDs/u-wire.zip/usbdrv/usbdrvasm12.inc

/* Name: usbdrvasm12.inc
 * Project: V-USB, virtual USB port for Atmel's(r) AVR(r) microcontrollers
 * Author: Christian Starkjohann
 * Creation Date: 2004-12-29
 * Tabsize: 4
 * Copyright: (c) 2007 by OBJECTIVE DEVELOPMENT Software GmbH
 * License: GNU GPL v2 (see License.txt), GNU GPL v3 or proprietary (CommercialLicense.txt)
 * 
 * Heavily modified to be compatible with ATtiny10 - cpldcpu Jan 2014
 */

/* Do not link this file! Link usbdrvasm.S instead, which includes the
 * appropriate implementation!
 */

/*
General Description:
This file is the 12 MHz version of the asssembler part of the USB driver. It
requires a 12 MHz crystal (not a ceramic resonator and not a calibrated RC
oscillator).

See usbdrv.h for a description of the entire driver.

Since almost all of this code is timing critical, don't change unless you
really know what you are doing! Many parts require not only a maximum number
of CPU cycles, but even an exact number of cycles!


Timing constraints according to spec (in bit times):
timing subject                                      min max    CPUcycles
---------------------------------------------------------------------------
EOP of OUT/SETUP to sync pattern of DATA0 (both rx) 2   16     16-128
EOP of IN to sync pattern of DATA0 (rx, then tx)    2   7.5    16-60
DATAx (rx) to ACK/NAK/STALL (tx)                    2   7.5    16-60
*/

;Software-receiver engine. Strict timing! Don't change unless you can preserve timing!
;interrupt response time: 4 cycles + insn running = 7 max if interrupts always enabled
;max allowable interrupt latency: 34 cycles -> max 25 cycles interrupt disable
;max stack usage: [ret(2), ZL, SREG, ZH, shift, x1, x2, x3, cnt, x4] = 11 bytes
;Numbers in brackets are maximum cycles since SOF.
USB_INTR_VECTOR:
;order of registers pushed: ZL, SREG [sofError], ZH, shift, x1, x2, x3, cnt
;    push    ZL              ;2 [35] push only what is necessary to sync with edge ASAP
;    in      ZL, SREG        ;1 [37]
;    push    ZH              ;2 [39]
    
    ;----------------------------------------------------------------------------
; Synchronize with sync pattern:
;----------------------------------------------------------------------------
;sync byte (D-) pattern LSb to MSb: 01010100 [1 = idle = J, 0 = K]
;sync up with J to K edge during sync pattern -- use fastest possible loops
;The first part waits at most 1 bit long since we must be in sync pattern.
;ZL is guarenteed to be < 0x80 because I flag is clear. When we jump to
;waitForJ, ensure that this prerequisite is met.
waitForJ:
	inc	ZL
	sbis	USBIN, USBMINUS
	 brne	waitForJ        ; just make sure we have ANY timeout
waitForK:
;The following code results in a sampling window of 1/4 bit which meets the spec.
	sbis	USBIN, USBMINUS
	 rjmp	foundK
	sbis	USBIN, USBMINUS
	 rjmp	foundK
	sbis	USBIN, USBMINUS
	 rjmp	foundK
	sbis	USBIN, USBMINUS
	 rjmp	foundK
	sbis	USBIN, USBMINUS
	 rjmp	foundK
	ret   ; SOF Error
foundK:
;{3, 5} after falling D- edge, average delay: 4 cycles [we want 4 for center sampling]
;we have 1 bit time for setup purposes, then sample again. Numbers in brackets
;are cycles from center of first sync (double K) bit after the instruction

;Altered to remove double buffering
	nop2				;2 [2]
	nop2				;2 [4]
	clr	ZH			;1 [5]
	ldi	ZL, lo8((usbRxBuf))	;1 [6]
	ldi	ZH, hi8((usbRxBuf))	;1 [7]

	sbis	USBIN, USBMINUS		;1 [8] we want two bits K [sample 1 cycle too early]
	 rjmp	haveTwoBitsK		;2 [10]
	rjmp    waitForK		;2  this was not the end of sync, retry
haveTwoBitsK:
;----------------------------------------------------------------------------
; push more registers and initialize values while we sample the first bits:
;----------------------------------------------------------------------------
	ldi     cnt, USB_BUFSIZE;1 [12] [inserted init instruction]
	ser     x3              ;1 [13] [inserted init instruction]
	nop2                    ;2 [14]
	nop2                    ;2 [16]

	in      x1, USBIN       ;1 [17] <-- sample bit 0
	ldi     shift, 0xff     ;1 [18]
	bst     x1, USBMINUS    ;1 [19]
	bld     shift, 0        ;1 [20]
	nop2
	rjmp    rxbit1          ;2 [24]
    
;----------------------------------------------------------------------------
; Receiver loop (numbers in brackets are cycles within byte after instr)
;----------------------------------------------------------------------------

unstuff0:               ;1 (branch taken)
	andi    x3, ~0x01   ;1 [15]
	mov     x1, x2      ;1 [16] x2 contains last sampled (stuffed) bit
	in      x2, USBIN   ;1 [17] <-- sample bit 1 again
	ori     shift, 0x01 ;1 [18]
	rjmp    didUnstuff0 ;2 [20]

unstuff1:               ;1 (branch taken)
	mov     x2, x1      ;1 [21] x1 contains last sampled (stuffed) bit
	andi    x3, ~0x02   ;1 [22]
	ori     shift, 0x02 ;1 [23]
	nop                 ;1 [24]
	in      x1, USBIN   ;1 [25] <-- sample bit 2 again
	rjmp    didUnstuff1 ;2 [27]

unstuff2:               ;1 (branch taken)
	andi    x3, ~0x04   ;1 [29]
	ori     shift, 0x04 ;1 [30]
	mov     x1, x2      ;1 [31] x2 contains last sampled (stuffed) bit
	nop                 ;1 [32]
	in      x2, USBIN   ;1 [33] <-- sample bit 3
	rjmp    didUnstuff2 ;2 [35]

unstuff3:               ;1 (branch taken)
	in      x2, USBIN   ;1 [34] <-- sample stuffed bit 3 [one cycle too late]
	andi    x3, ~0x08   ;1 [35]
	ori     shift, 0x08 ;1 [36]
	rjmp    didUnstuff3 ;2 [38]

unstuff4:               ;1 (branch taken)
	andi    x3, ~0x10   ;1 [40]
	in      x1, USBIN   ;1 [41] <-- sample stuffed bit 4
	ori     shift, 0x10 ;1 [42]
	rjmp    didUnstuff4 ;2 [44]

unstuff5:               ;1 (branch taken)
	andi    x3, ~0x20   ;1 [48]
	in      x2, USBIN   ;1 [49] <-- sample stuffed bit 5
	ori     shift, 0x20 ;1 [50]
	rjmp    didUnstuff5 ;2 [52]

unstuff6:               ;1 (branch taken)
	andi    x3, ~0x40   ;1 [56]
	in      x1, USBIN   ;1 [57] <-- sample stuffed bit 6
	ori     shift, 0x40 ;1 [58]
	rjmp    didUnstuff6 ;2 [60]

; extra jobs done during bit interval:
; bit 0:    store, clear [SE0 is unreliable here due to bit dribbling in hubs]
; bit 1:    se0 check
; bit 2:    overflow check
; bit 3:    recovery from delay [bit 0 tasks took too long]
; bit 4:    none
; bit 5:    none
; bit 6:    none
; bit 7:    jump, eor
rxLoop:
	eor     x3, shift   ;1 [0] reconstruct: x3 is 0 at bit locations we changed, 1 at others
	in      x1, USBIN   ;1 [1] <-- sample bit 0
	st      z+, x3      ;2 [3] store data
#ifdef __AVR_ATtiny10__
	nop
#endif     
	ser     x3          ;1 [4]
	nop                 ;1 [5]
	eor     x2, x1      ;1 [6]
	bst     x2, USBMINUS;1 [7]       
	bld     shift, 0    ;1 [8]
rxbit1:     
	in      x2, USBIN   ;1 [9] <-- sample bit 1 (or possibly bit 0 stuffed)
	andi    x2, USBMASK ;1 [10]
	breq    se0         ;1 [11] SE0 check for bit 1
	andi    shift, 0xf9 ;1 [12]
didUnstuff0:
	breq    unstuff0    ;1 [13]
	eor     x1, x2      ;1 [14]
	bst     x1, USBMINUS;1 [15]
	bld     shift, 1    ;1 [16]
rxbit2:
	in      x1, USBIN   ;1 [17] <-- sample bit 2 (or possibly bit 1 stuffed)
	andi    shift, 0xf3 ;1 [18]
	breq    unstuff1    ;1 [19] do remaining work for bit 1
didUnstuff1:
	subi    cnt, 1      ;1 [20]
	brcs    overflow    ;1 [21] loop control
	eor     x2, x1      ;1 [22]
	bst     x2, USBMINUS;1 [23]
	bld     shift, 2    ;1 [24]
	in      x2, USBIN   ;1 [25] <-- sample bit 3 (or possibly bit 2 stuffed)
	andi    shift, 0xe7 ;1 [26]
	breq    unstuff2    ;1 [27]
didUnstuff2:
	eor     x1, x2      ;1 [28]
	bst     x1, USBMINUS;1 [29]
	bld     shift, 3    ;1 [30]
didUnstuff3:
	andi    shift, 0xcf ;1 [31]
	breq    unstuff3    ;1 [32]
	in      x1, USBIN   ;1 [33] <-- sample bit 4
	eor     x2, x1      ;1 [34]
	bst     x2, USBMINUS;1 [35]
	bld     shift, 4    ;1 [36]
didUnstuff4:
	andi    shift, 0x9f ;1 [37]
	breq    unstuff4    ;1 [38]
	nop2                ;2 [40]
	in      x2, USBIN   ;1 [41] <-- sample bit 5
	eor     x1, x2      ;1 [42]
	bst     x1, USBMINUS;1 [43]
	bld     shift, 5    ;1 [44]
didUnstuff5:
	andi    shift, 0x3f ;1 [45]
	breq    unstuff5    ;1 [46]
	nop2                ;2 [48]
	in      x1, USBIN   ;1 [49] <-- sample bit 6
	eor     x2, x1      ;1 [50]
	bst     x2, USBMINUS;1 [51]
	bld     shift, 6    ;1 [52]
didUnstuff6:
	cpi     shift, 0x02 ;1 [53]
	brlo    unstuff6    ;1 [54]
	nop2                ;2 [56]
	in      x2, USBIN   ;1 [57] <-- sample bit 7
	eor     x1, x2      ;1 [58]
	bst     x1, USBMINUS;1 [59]
	bld     shift, 7    ;1 [60]
didUnstuff7:
	cpi     shift, 0x04 ;1 [61]
	brsh    rxLoop      ;2 [63] loop control
unstuff7:
	andi    x3, ~0x80   ;1 [63]
	ori     shift, 0x80 ;1 [64]
	in      x2, USBIN   ;1 [65] <-- sample stuffed bit 7
	nop                 ;1 [66]
	rjmp    didUnstuff7 ;2 [68]

#include "asmcommon.inc"

;----------------------------------------------------------------------------
; Transmitting data
;----------------------------------------------------------------------------

txByteLoop:
txBitloop:
stuffN1Delay:                   ;     [03]
	ror     shift               ;[-5] [11] [59]
	brcc    doExorN1            ;[-4]      [60]
	subi    x4, 1               ;[-3]
	brne    commonN1            ;[-2]
	lsl     shift               ;[-1] compensate ror after rjmp stuffDelay
	nop                         ;[00] stuffing consists of just waiting 8 cycles
	rjmp    stuffN1Delay        ;[01] after ror, C bit is reliably clear

sendNakAndReti:                 ;0 [-19] 19 cycles until SOP
	ldi     x3, USBPID_NAK      ;1 [-18]
	rjmp    usbSendX3           ;2 [-16]
sendAckAndReti:                 ;0 [-19] 19 cycles until SOP
	ldi     x3, USBPID_ACK      ;1 [-18]
	rjmp    usbSendX3           ;2 [-16]
sendCntAndReti:                 ;0 [-17] 17 cycles until SOP
	mov     x3, cnt             ;1 [-16]
usbSendX3:                      ;0 [-16]
	ldi     cnt, 2              ;1 [-13]
    
; Attiny10 does not have its registers mapped to the memory space    
#ifdef __AVR_ATtiny10__   
    sts     usbRxBuf+9,x3   ; overwrite CRC, but this is not checked anyways
    ldi     ZL, lo8(usbRxBuf+9) ;1 [-15] 'x3' is R20
    ldi     ZH, 0               ;1 [-14]
#else
    ldi     ZL, 20              ;1 [-15] 'x3' is R20
    ldi     ZH, 0               ;1 [-14]
#endif    
    
;   rjmp    usbSendAndReti      fallthrough

; USB spec says:
; idle = J
; J = (D+ = 0), (D- = 1) or USBOUT = 0x01
; K = (D+ = 1), (D- = 0) or USBOUT = 0x02
; Spec allows 7.5 bit times from EOP to SOP for replies (= 60 cycles)

;usbSend:
;pointer to data in 'Y'
;number of bytes in 'cnt' -- including sync byte
;uses: x1...x2, x4, shift, cnt, Y [x1 = mirror USBOUT, x2 = USBMASK, x4 = bitstuff cnt]
;Numbers in brackets are time since first bit of sync pattern is sent (start of instruction)
usbSendAndReti:
    in      x2, USBDDR          ;[-12] 12 cycles until SOP
    ori     x2, USBMASK         ;[-11]
#ifdef __AVR_ATtiny10__
 ;   nop
#endif    
    sbi     USBOUT, USBMINUS    ;[-10] prepare idle state; D+ and D- must have been 0 (no pullups)
    out     USBDDR, x2          ;[-8] <--- acquire bus
    in      x1, USBOUT          ;[-7] port mirror for tx loop
    ldi     shift, 0x40         ;[-6] sync byte is first byte sent (we enter loop after ror)
    ldi     x2, USBMASK         ;[-5]
    nop2;push   x4                  ;[-4]
doExorN1:
    eor     x1, x2              ;[-2] [06] [62]
    ldi     x4, 6               ;[-1] [07] [63]
commonN1:
stuffN2Delay:
    out     USBOUT, x1          ;[00] [08] [64] <--- set bit
    ror     shift               ;[01]
    brcc    doExorN2            ;[02]
    subi    x4, 1               ;[03]
    brne    commonN2            ;[04]
    lsl     shift               ;[05] compensate ror after rjmp stuffDelay
    rjmp    stuffN2Delay        ;[06] after ror, C bit is reliably clear
doExorN2:
    eor     x1, x2              ;[04] [12]
    ldi     x4, 6               ;[05] [13]
commonN2:
    nop                         ;[06] [14]
    subi    cnt, 171            ;[07] [15] trick: (3 * 171) & 0xff = 1
    out     USBOUT, x1          ;[08] [16] <--- set bit
    brcs    txBitloop           ;[09]      [25] [41]

stuff6Delay:
    ror     shift               ;[42] [50]
    brcc    doExor6             ;[43]
    subi    x4, 1               ;[44]
    brne    common6             ;[45]
    lsl     shift               ;[46] compensate ror after rjmp stuffDelay
    nop                         ;[47] stuffing consists of just waiting 8 cycles
    rjmp    stuff6Delay         ;[48] after ror, C bit is reliably clear
doExor6:
    eor     x1, x2              ;[45] [53]
    ldi     x4, 6               ;[46]
common6:
stuff7Delay:
    ror     shift               ;[47] [55]
    out     USBOUT, x1          ;[48] <--- set bit
    brcc    doExor7             ;[49]
    subi    x4, 1               ;[50]
    brne    common7             ;[51]
    lsl     shift               ;[52] compensate ror after rjmp stuffDelay
    rjmp    stuff7Delay         ;[53] after ror, C bit is reliably clear
doExor7:
    eor     x1, x2              ;[51] [59]
    ldi     x4, 6               ;[52]
common7:
    ld      shift, z+           ;[53]
    
    tst     cnt                 ;[55]
    out     USBOUT, x1          ;[56] <--- set bit
    brne    txByteLoop          ;[57]

;make SE0:
    cbr     x1, USBMASK         ;[58] prepare SE0 [spec says EOP may be 15 to 18 cycles]
    lds     x2, usbNewDeviceAddr;[59]
#ifdef __AVR_ATtiny10__
    nop
#endif     
    lsl     x2                  ;[61] we compare with left shifted address
    subi    ZL, 2 + 20          ;[62] Only assign address on data packets, not ACK/NAK in x3
    sbci    ZH, 0               ;[63]
    out     USBOUT, x1          ;[00] <-- out SE0 -- from now 2 bits = 16 cycles until bus idle
;2006-03-06: moved transfer of new address to usbDeviceAddr from C-Code to asm:
;set address only after data packet was sent, not after handshake
    breq    skipAddrAssign      ;[01]
    sts     usbDeviceAddr, x2   ; if not skipped: SE0 is one cycle longer
#ifdef __AVR_ATtiny10__
    nop
#endif     
skipAddrAssign:
;end of usbDeviceAddress transfer
    ldi     x2, 1<<USB_INTR_PENDING_BIT;[03] int0 occurred during TX -- clear pending flag
    USB_STORE_PENDING(x2)       ;[04]
    ori     x1, USBIDLE         ;[05]
    in      x2, USBDDR          ;[06]
    cbr     x2, USBMASK         ;[07] set both pins to input
    mov     x3, x1              ;[08]
    cbr     x3, USBMASK         ;[09] configure no pullup on both pins
    nop2;pop    x4                   ;[10]
    nop2                        ;[12]
    nop2                        ;[14]
    out     USBOUT, x1          ;[16] <-- out J (idle) -- end of SE0 (EOP signal)
    out     USBDDR, x2          ;[17] <-- release bus now
    out     USBOUT, x3          ;[18] <-- ensure no pull-up resistors are active
    ret
Detected encoding: ASCII (7 bit)2