Rev Author Line No. Line
3471 miho 1 /* Name: usbdrvasm20.inc
2 * Project: V-USB, virtual USB port for Atmel's(r) AVR(r) microcontrollers
3 * Author: Jeroen Benschop
4 * Based on usbdrvasm16.inc from Christian Starkjohann
5 * Creation Date: 2008-03-05
6 * Tabsize: 4
7 * Copyright: (c) 2008 by Jeroen Benschop and OBJECTIVE DEVELOPMENT Software GmbH
8 * License: GNU GPL v2 (see License.txt), GNU GPL v3 or proprietary (CommercialLicense.txt)
9 * Revision: $Id: usbdrvasm20.inc 740 2009-04-13 18:23:31Z cs $
10 */
11  
12 /* Do not link this file! Link usbdrvasm.S instead, which includes the
13 * appropriate implementation!
14 */
15  
16 /*
17 General Description:
18 This file is the 20 MHz version of the asssembler part of the USB driver. It
19 requires a 20 MHz crystal (not a ceramic resonator and not a calibrated RC
20 oscillator).
21  
22 See usbdrv.h for a description of the entire driver.
23  
24 Since almost all of this code is timing critical, don't change unless you
25 really know what you are doing! Many parts require not only a maximum number
26 of CPU cycles, but even an exact number of cycles!
27 */
28  
29 #define leap2 x3
30 #ifdef __IAR_SYSTEMS_ASM__
31 #define nextInst $+2
32 #else
33 #define nextInst .+0
34 #endif
35  
36 ;max stack usage: [ret(2), YL, SREG, YH, bitcnt, shift, x1, x2, x3, x4, cnt] = 12 bytes
37 ;nominal frequency: 20 MHz -> 13.333333 cycles per bit, 106.666667 cycles per byte
38 ; Numbers in brackets are clocks counted from center of last sync bit
39 ; when instruction starts
40 ;register use in receive loop:
41 ; shift assembles the byte currently being received
42 ; x1 holds the D+ and D- line state
43 ; x2 holds the previous line state
44 ; x4 (leap) is used to add a leap cycle once every three bytes received
45 ; X3 (leap2) is used to add a leap cycle once every three stuff bits received
46 ; bitcnt is used to determine when a stuff bit is due
47 ; cnt holds the number of bytes left in the receive buffer
48  
49 USB_INTR_VECTOR:
50 ;order of registers pushed: YL, SREG YH, [sofError], bitcnt, shift, x1, x2, x3, x4, cnt
51 push YL ;[-28] push only what is necessary to sync with edge ASAP
52 in YL, SREG ;[-26]
53 push YL ;[-25]
54 push YH ;[-23]
55 ;----------------------------------------------------------------------------
56 ; Synchronize with sync pattern:
57 ;----------------------------------------------------------------------------
58 ;sync byte (D-) pattern LSb to MSb: 01010100 [1 = idle = J, 0 = K]
59 ;sync up with J to K edge during sync pattern -- use fastest possible loops
60 ;The first part waits at most 1 bit long since we must be in sync pattern.
61 ;YL is guarenteed to be < 0x80 because I flag is clear. When we jump to
62 ;waitForJ, ensure that this prerequisite is met.
63 waitForJ:
64 inc YL
65 sbis USBIN, USBMINUS
66 brne waitForJ ; just make sure we have ANY timeout
67 waitForK:
68 ;The following code results in a sampling window of < 1/4 bit which meets the spec.
69 sbis USBIN, USBMINUS ;[-19]
70 rjmp foundK ;[-18]
71 sbis USBIN, USBMINUS
72 rjmp foundK
73 sbis USBIN, USBMINUS
74 rjmp foundK
75 sbis USBIN, USBMINUS
76 rjmp foundK
77 sbis USBIN, USBMINUS
78 rjmp foundK
79 sbis USBIN, USBMINUS
80 rjmp foundK
81 sbis USBIN, USBMINUS
82 rjmp foundK
83 sbis USBIN, USBMINUS
84 rjmp foundK
85 sbis USBIN, USBMINUS
86 rjmp foundK
87 #if USB_COUNT_SOF
88 lds YL, usbSofCount
89 inc YL
90 sts usbSofCount, YL
91 #endif /* USB_COUNT_SOF */
92 #ifdef USB_SOF_HOOK
93 USB_SOF_HOOK
94 #endif
95 rjmp sofError
96 foundK: ;[-16]
97 ;{3, 5} after falling D- edge, average delay: 4 cycles
98 ;bit0 should be at 34 for center sampling. Currently at 4 so 30 cylces till bit 0 sample
99 ;use 1 bit time for setup purposes, then sample again. Numbers in brackets
100 ;are cycles from center of first sync (double K) bit after the instruction
101 push bitcnt ;[-16]
102 ; [---] ;[-15]
103 lds YL, usbInputBufOffset;[-14]
104 ; [---] ;[-13]
105 clr YH ;[-12]
106 subi YL, lo8(-(usbRxBuf));[-11] [rx loop init]
107 sbci YH, hi8(-(usbRxBuf));[-10] [rx loop init]
108 push shift ;[-9]
109 ; [---] ;[-8]
110 ldi shift,0x40 ;[-7] set msb to "1" so processing bit7 can be detected
111 nop2 ;[-6]
112 ; [---] ;[-5]
113 ldi bitcnt, 5 ;[-4] [rx loop init]
114 sbis USBIN, USBMINUS ;[-3] we want two bits K (sample 3 cycles too early)
115 rjmp haveTwoBitsK ;[-2]
116 pop shift ;[-1] undo the push from before
117 pop bitcnt ;[1]
118 rjmp waitForK ;[3] this was not the end of sync, retry
119 ; The entire loop from waitForK until rjmp waitForK above must not exceed two
120 ; bit times (= 27 cycles).
121  
122 ;----------------------------------------------------------------------------
123 ; push more registers and initialize values while we sample the first bits:
124 ;----------------------------------------------------------------------------
125 haveTwoBitsK:
126 push x1 ;[0]
127 push x2 ;[2]
128 push x3 ;[4] (leap2)
129 ldi leap2, 0x55 ;[6] add leap cycle on 2nd,5th,8th,... stuff bit
130 push x4 ;[7] == leap
131 ldi leap, 0x55 ;[9] skip leap cycle on 2nd,5th,8th,... byte received
132 push cnt ;[10]
133 ldi cnt, USB_BUFSIZE ;[12] [rx loop init]
134 ldi x2, 1<<USBPLUS ;[13] current line state is K state. D+=="1", D-=="0"
135 bit0:
136 in x1, USBIN ;[0] sample line state
137 andi x1, USBMASK ;[1] filter only D+ and D- bits
138 rjmp handleBit ;[2] make bit0 14 cycles long
139  
140 ;----------------------------------------------------------------------------
141 ; Process bit7. However, bit 6 still may need unstuffing.
142 ;----------------------------------------------------------------------------
143  
144 b6checkUnstuff:
145 dec bitcnt ;[9]
146 breq unstuff6 ;[10]
147 bit7:
148 subi cnt, 1 ;[11] cannot use dec becaus it does not affect the carry flag
149 brcs overflow ;[12] Too many bytes received. Ignore packet
150 in x1, USBIN ;[0] sample line state
151 andi x1, USBMASK ;[1] filter only D+ and D- bits
152 cpse x1, x2 ;[2] when previous line state equals current line state, handle "1"
153 rjmp b7handle0 ;[3] when line state differs, handle "0"
154 sec ;[4]
155 ror shift ;[5] shift "1" into the data
156 st y+, shift ;[6] store the data into the buffer
157 ldi shift, 0x40 ;[7] reset data for receiving the next byte
158 subi leap, 0x55 ;[9] trick to introduce a leap cycle every 3 bytes
159 brcc nextInst ;[10 or 11] it will fail after 85 bytes. However low speed can only receive 11
160 dec bitcnt ;[11 or 12]
161 brne bit0 ;[12 or 13]
162 ldi x1, 1 ;[13 or 14] unstuffing bit 7
163 in bitcnt, USBIN ;[0] sample stuff bit
164 rjmp unstuff ;[1]
165  
166 b7handle0:
167 mov x2,x1 ;[5] Set x2 to current line state
168 ldi bitcnt, 6 ;[6]
169 lsr shift ;[7] shift "0" into the data
170 st y+, shift ;[8] store data into the buffer
171 ldi shift, 0x40 ;[10] reset data for receiving the next byte
172 subi leap, 0x55 ;[11] trick to introduce a leap cycle every 3 bytes
173 brcs bit0 ;[12] it will fail after 85 bytes. However low speed can only receive 11
174 rjmp bit0 ;[13]
175  
176  
177 ;----------------------------------------------------------------------------
178 ; Handle unstuff
179 ; x1==0xFF indicate unstuffing bit6
180 ;----------------------------------------------------------------------------
181  
182 unstuff6:
183 ldi x1,0xFF ;[12] indicate unstuffing bit 6
184 in bitcnt, USBIN ;[0] sample stuff bit
185 nop ;[1] fix timing
186 unstuff: ;b0-5 b6 b7
187 mov x2,bitcnt ;[3] [2] [3] Set x2 to match line state
188 subi leap2, 0x55 ;[4] [3] [4] delay loop
189 brcs nextInst ;[5] [4] [5] add one cycle every three stuff bits
190 sbci leap2,0 ;[6] [5] [6]
191 ldi bitcnt,6 ;[7] [6] [7] reset bit stuff counter
192 andi x2, USBMASK ;[8] [7] [8] only keep D+ and D-
193 cpi x1,0 ;[9] [8] [9]
194 brmi bit7 ;[10] [9] [10] finished unstuffing bit6 When x1<0
195 breq bitloop ;[11] --- [11] finished unstuffing bit0-5 when x1=0
196 nop ;--- --- [12]
197 in x1, USBIN ;--- --- [0] sample line state for bit0
198 andi x1, USBMASK ;--- --- [1] filter only D+ and D- bits
199 rjmp handleBit ;--- --- [2] make bit0 14 cycles long
200  
201 ;----------------------------------------------------------------------------
202 ; Receiver loop (numbers in brackets are cycles within byte after instr)
203 ;----------------------------------------------------------------------------
204 bitloop:
205 in x1, USBIN ;[0] sample line state
206 andi x1, USBMASK ;[1] filter only D+ and D- bits
207 breq se0 ;[2] both lines are low so handle se0
208 handleBit:
209 cpse x1, x2 ;[3] when previous line state equals current line state, handle "1"
210 rjmp handle0 ;[4] when line state differs, handle "0"
211 sec ;[5]
212 ror shift ;[6] shift "1" into the data
213 brcs b6checkUnstuff ;[7] When after shift C is set, next bit is bit7
214 nop2 ;[8]
215 dec bitcnt ;[10]
216 brne bitloop ;[11]
217 ldi x1,0 ;[12] indicate unstuff for bit other than bit6 or bit7
218 in bitcnt, USBIN ;[0] sample stuff bit
219 rjmp unstuff ;[1]
220  
221 handle0:
222 mov x2, x1 ;[6] Set x2 to current line state
223 ldi bitcnt, 6 ;[7] reset unstuff counter.
224 lsr shift ;[8] shift "0" into the data
225 brcs bit7 ;[9] When after shift C is set, next bit is bit7
226 nop ;[10]
227 rjmp bitloop ;[11]
228  
229 ;----------------------------------------------------------------------------
230 ; End of receive loop. Now start handling EOP
231 ;----------------------------------------------------------------------------
232  
233 macro POP_STANDARD ; 14 cycles
234 pop cnt
235 pop x4
236 pop x3
237 pop x2
238 pop x1
239 pop shift
240 pop bitcnt
241 endm
242 macro POP_RETI ; 7 cycles
243 pop YH
244 pop YL
245 out SREG, YL
246 pop YL
247 endm
248  
249  
250  
251 #include "asmcommon.inc"
252  
253 ; USB spec says:
254 ; idle = J
255 ; J = (D+ = 0), (D- = 1)
256 ; K = (D+ = 1), (D- = 0)
257 ; Spec allows 7.5 bit times from EOP to SOP for replies
258 ; 7.5 bit times is 100 cycles. This implementation arrives a bit later at se0
259 ; then specified in the include file but there is plenty of time
260  
261 bitstuffN:
262 eor x1, x4 ;[8]
263 ldi x2, 0 ;[9]
264 nop2 ;[10]
265 out USBOUT, x1 ;[12] <-- out
266 rjmp didStuffN ;[0]
267  
268 bitstuff7:
269 eor x1, x4 ;[6]
270 ldi x2, 0 ;[7] Carry is zero due to brcc
271 rol shift ;[8] compensate for ror shift at branch destination
272 nop2 ;[9]
273 rjmp didStuff7 ;[11]
274  
275 sendNakAndReti:
276 ldi x3, USBPID_NAK ;[-18]
277 rjmp sendX3AndReti ;[-17]
278 sendAckAndReti:
279 ldi cnt, USBPID_ACK ;[-17]
280 sendCntAndReti:
281 mov x3, cnt ;[-16]
282 sendX3AndReti:
283 ldi YL, 20 ;[-15] x3==r20 address is 20
284 ldi YH, 0 ;[-14]
285 ldi cnt, 2 ;[-13]
286 ; rjmp usbSendAndReti fallthrough
287  
288 ;usbSend:
289 ;pointer to data in 'Y'
290 ;number of bytes in 'cnt' -- including sync byte [range 2 ... 12]
291 ;uses: x1...x4, btcnt, shift, cnt, Y
292 ;Numbers in brackets are time since first bit of sync pattern is sent
293 ;We don't match the transfer rate exactly (don't insert leap cycles every third
294 ;byte) because the spec demands only 1.5% precision anyway.
295 usbSendAndReti: ; 12 cycles until SOP
296 in x2, USBDDR ;[-12]
297 ori x2, USBMASK ;[-11]
298 sbi USBOUT, USBMINUS;[-10] prepare idle state; D+ and D- must have been 0 (no pullups)
299 in x1, USBOUT ;[-8] port mirror for tx loop
300 out USBDDR, x2 ;[-7] <- acquire bus
301 ; need not init x2 (bitstuff history) because sync starts with 0
302 ldi x4, USBMASK ;[-6] exor mask
303 ldi shift, 0x80 ;[-5] sync byte is first byte sent
304 txByteLoop:
305 ldi bitcnt, 0x49 ;[-4] [10] binary 01001001
306 txBitLoop:
307 sbrs shift, 0 ;[-3] [10] [11]
308 eor x1, x4 ;[-2] [11] [12]
309 out USBOUT, x1 ;[-1] [12] [13] <-- out N
310 ror shift ;[0] [13] [14]
311 ror x2 ;[1]
312 didStuffN:
313 nop2 ;[2]
314 nop ;[4]
315 cpi x2, 0xfc ;[5]
316 brcc bitstuffN ;[6]
317 lsr bitcnt ;[7]
318 brcc txBitLoop ;[8]
319 brne txBitLoop ;[9]
320  
321 sbrs shift, 0 ;[10]
322 eor x1, x4 ;[11]
323 didStuff7:
324 out USBOUT, x1 ;[-1] [13] <-- out 7
325 ror shift ;[0] [14]
326 ror x2 ;[1]
327 nop ;[2]
328 cpi x2, 0xfc ;[3]
329 brcc bitstuff7 ;[4]
330 ld shift, y+ ;[5]
331 dec cnt ;[7]
332 brne txByteLoop ;[8]
333 ;make SE0:
334 cbr x1, USBMASK ;[9] prepare SE0 [spec says EOP may be 25 to 30 cycles]
335 lds x2, usbNewDeviceAddr;[10]
336 lsl x2 ;[12] we compare with left shifted address
337 out USBOUT, x1 ;[13] <-- out SE0 -- from now 2 bits = 22 cycles until bus idle
338 subi YL, 20 + 2 ;[0] Only assign address on data packets, not ACK/NAK in x3
339 sbci YH, 0 ;[1]
340 ;2006-03-06: moved transfer of new address to usbDeviceAddr from C-Code to asm:
341 ;set address only after data packet was sent, not after handshake
342 breq skipAddrAssign ;[2]
343 sts usbDeviceAddr, x2; if not skipped: SE0 is one cycle longer
344 skipAddrAssign:
345 ;end of usbDeviceAddress transfer
346 ldi x2, 1<<USB_INTR_PENDING_BIT;[4] int0 occurred during TX -- clear pending flag
347 USB_STORE_PENDING(x2) ;[5]
348 ori x1, USBIDLE ;[6]
349 in x2, USBDDR ;[7]
350 cbr x2, USBMASK ;[8] set both pins to input
351 mov x3, x1 ;[9]
352 cbr x3, USBMASK ;[10] configure no pullup on both pins
353 ldi x4, 5 ;[11]
354 se0Delay:
355 dec x4 ;[12] [15] [18] [21] [24]
356 brne se0Delay ;[13] [16] [19] [22] [25]
357 out USBOUT, x1 ;[26] <-- out J (idle) -- end of SE0 (EOP signal)
358 out USBDDR, x2 ;[27] <-- release bus now
359 out USBOUT, x3 ;[28] <-- ensure no pull-up resistors are active
360 rjmp doReturn