/* Copyright 2017 by Robert Evans (rrevans@gmail.com)
*1903xx Make this source file more readable,
USB VID+PID bound to Atmels genuine boot loader "dfu" (Atmel Flip)
+1904xx Added "ubaboot" string descriptor, added signature, fuse and lock data
+1904xx Created Windows programmer as replacement for the python script,
it reads ELF files
+190514 Added short-circuit for EEPROM write: Write on change only (save time)
-190515 Failed to program first 128 bytes
+210407 Added app_spm subroutine at end to allow application software
to self-modify flash out of RWW section with own idle procedure
(needed for Leica/Wild GRM card)
Support for AT90USB(8|16)2, ATmega(8|16|32)U2, ATmega(16|32)U4
Binary compatible to 190515 code
Features/commands:
Write and read flash memory
Write and read eeprom memory
Read signature bytes
Read lock/fuse bits
Reboot (jump) into user program
These are implemented as a vendor-defined protocol. See README for details.
The included sample ôpyusbö driver can upload and verify programs.
Booting disables the watchdog and clears MCUSR which is preserved in r2.
Jumps directly to user program at $0000 for power-on reset, watchdog reset,
brownout reset, or USB CPU reset.
User code can enter the bootloader by
- disabling interrupts
- resetting the USB and PLL registers to reset values
- setting SPL, SPH to the top of SRAM
- setting MCUSR to zero
- jumping to the beginning of the bootloader
Implementation notes:
- heavily optimized for size not speed
- USB registers accessed via indirect addressing: LDD Y+d, STD Y+d
- many branches fall-through instead of jumping
- no interrupts (a vector table takes too much space)
- zero register is moved to YH vs. gcc's usual r1
*/
#define __SFR_OFFSET 0
#include <avr/io.h>
// avr-gcc forgot to define signature bytes for AT90USBà devices
#if defined(__AVR_AT90USB82__)
# define SIGNATURE_0 0x1E
# define SIGNATURE_1 0x93 // in opposite to data sheet, define 8K Flash here
# define SIGNATURE_2 0x82
#elif defined(__AVR_AT90USB162__)
# define SIGNATURE_0 0x1E
# define SIGNATURE_1 0x94
# define SIGNATURE_2 0x82
#endif
.section .signature,"",@progbits
.byte SIGNATURE_2,SIGNATURE_1,SIGNATURE_0
#if defined(__AVR_ATmega32U4__) || defined(__AVR_ATmega16U4__)
.section .fuse,"",@progbits
.byte 0xDE // lfuse: CKDIV8: no, CKSEL: Low power Quartz crystal 16 MHz
.byte 0xDF // hfuse: JTAG: no, BOOTSZ: Minimum (512 Byte)
.byte 0xC3 // efuse: HWBE, BODLEVEL (bits 4+5 seem to be fixed to 0 by ATmega32U4)
#else
.section .fuse,"",@progbits
.byte 0xDE // lfuse: CKDIV8: no, CKSEL: Low power Quartz crystal 16 MHz
.byte 0xDF // hfuse: Reset: yes, BOOTSZ: Minimum (512 Byte)
.byte 0xF4 // efuse: HWBE, BODLEVEL
#endif
.section .lock,"",@progbits
.byte 0x2F // SPM cannot overwrite boot loader section (bits 6+7 seem to be fixed to 0)
.text
// bootloader start address can be deduced from SIGNATURE_1 byte
.type appspace,@common // don't disassemble
appspace:
.org (1<<(SIGNATURE_1-0x86))-0x200,0xFF
// The better solution than generating a huge space filled with 0xFF
// would be passing the section start to the linker.
// But I don't know how (from this assembly source).
// USB module I/O uses indirect addressing to save program words. The Y register
// is used for this and always points to base of the USB module address space.
#define YBASE 0xD7
.macro sty a,b
std y+\a-YBASE,\b
.endm
.macro ldy b,a
ldd \b,y+\a-YBASE
.endm
// avr-libc 2.0.0 is missing USBRF definition for atmega32u4?
#ifndef USBRF
# define USBRF 5
#endif
// USB commands
//
// The SETUP handling code exploits the fact that for all requests the upper
// four bits of bRequest are zero and the lower four bits of bmRequestType are
// also zero. So the two values can be bit-wise OR'ed and compared at the same
// time against a single command value. This saves space vs. separate compares.
//
// Note that this requires all requests to be DEVICE requests because the lower
// four bits of bmRequestType are used to identify the interface or endpoint for
// other transaction types.
#define CMD_GET_SIGRD (0xC0 | 1)
#define CMD_GET_PMEM (0xC0 | 2)
#define CMD_SET_PMEM (0x40 | 3)
#define CMD_REBOOT (0x40 | 4)
#define CMD_GET_EEPROM (0xC0 | 5)
#define CMD_SET_EEPROM (0x40 | 6)
#define CMD_GET_LOCK (0xC0 | 7)
// USB control endpoint state machine values
//
// Each state reacts to a subset of UEINTX bits.
//
// State description and active UEINTX bits
// ----- ---------------------------
// SETUP waiting for SETUP token
// WR_DATA waiting for OUT tokens for host-to-device DATA stage
// RXOUTI -> handle data from host
// NAKINI -> start STATUS stage (new state = WR_STATUS)
// WR_STATUS waiting for IN token for host-to-device STATUS data
// TXINI -> transaction done (new state = SETUP)
// RD_DATA waiting for IN/OUT token
// TXINI -> buffer DATA stage to send to host
// sets state = SETUP when all bytes sent
//
// RXSTPI (not shown above) resets the state machine in all states.
// Other UEINTX bits not listed for a given state are ignored in that state.
//
// The main loop processes UEINTX bits with this equivalent C code:
//
// uint8_t intx = UEINTX;
// intx &= state; // clear all inactive UEINTX bits
// intx = intx & -intx; // find lowest asserted UEINTX bit
// intx = ~intx; // complement the result for reset
// uint8_t ms = state & intx; // selector value (see below)
// switch (ms) {
// ...
// // maybe update intx
// ...
// }
// UEINTX = intx; // reset handled UEINTX bit(s)
//
// When some active UEINTX bit is asserted, the selector value (ms) equals the
// state value with the lowest asserted UEINTX bit **CLEARED**.
//
// For example, if ms == WR_DATA & ~_BV(RXOUTI) then RXOUTI has been triggered
// in state WR_DATA.
//
// This uniquely identifies both state and each possible asserted UEINTX bit if
// every state value is at least Hamming distance 2 from all other state values.
//
// To meet this criteria some extra bits must be added; any unused bits suffice.
// Below the state values use STALLEDI and FIFOCON for this purpose.
//
// These extra bit values have no effect because the corresponding selector
// values are never tested in the main loop.
//
// RXSTPI is handled separately and does not appear in any of the state values.
//
// The SETUP psuedo-state is zero since no UEINTX bits are active in that state.
//
// Summary:
// WR_DATA = 01000001b
// WR_STATUS = 00000011b
// RD_DATA = 10000001b
//
// See also ch. 22 of the datasheet.
#define WR_DATA (1<<RXOUTI|1<<NAKINI)
#define WR_STATUS (1<<TXINI|1<<STALLEDI)
#define RD_DATA (1<<TXINI|1<<FIFOCON)
// Program entry
// Save and clear MCUSR
// WDRF must be cleared to disable the watchdog timer.
bootspace:
// Original MCUSR is preserved in r2 for user program
in r2,MCUSR
clr YH
out MCUSR,YH
clr r18
rcall set_wdt // Disable watchdog
// Busy loop pause for USB detach during reset.
// This ensures that the host detects detach before restart. Typically the
// oscillator/PLL startup delays will exceed the specified USB max detach
// detection timing (2.5 us), but this is here anyway for robustness.
1: dec r18 // 0 on entry
brne 1b // loops 256 times * 3 cycles = 768 cycles
// Jump to user code if reset was:
// - brown-out
// - watchdog (except if external reset also set)
// - power-on
// - USB reset
//
// Watchdog + external reset triggers the bootloader in case WDTON is set
// since the watchdog may fire while the reset button is being held down.
//
// User code can enter the bootloader by triggering any other reset,
// or by following the instructions at the top of this file.
mov r16, r2
cpi r16,1<<EXTRF|1<<WDRF
breq 1f
andi r16,1<<BORF|1<<WDRF|1<<PORF|1<<USBRF
breq 1f
jmp 0 // jump to user program
// Enable watchdog for bootloader, 16 ms timeout
1: ldi r18,1<<WDE
rcall set_wdt
// Hardware initialization
// PLL initialization
// PDIV3:0 = 0100 (equals reset value)
#if (F_CPU == 8000000)
ldi r16,1<<PLLE
#else
# ifndef PINDIV // AT90USBà, àU2
# define PINDIV PLLP0
# endif
ldi r16,1<<PINDIV|1<<PLLE
#endif
out PLLCSR,r16
1: in r0,PLLCSR // loop_until_bit_is_set(PLLCSR, PLOCK)
sbrs r0,PLOCK
rjmp 1b
// Setup Y register for indirect addressing, YH is already zero
ldi YL,YBASE
// USB initialization
#ifdef UHWCON // àU4
ldi r16,1<<UVREGE
sty UHWCON,r16 // set UVREGE
// The first store does not set OTGPADE because clock is not enabled.
// Using the same value for both stores saves program space.
ldi r16,1<<USBE|1<<OTGPADE
sty USBCON,r16 // set USBE
#else // AT90USBà, àU2
nop
nop
nop // free space
ldi r16,1<<USBE
#endif
sty USBCON,r16 // set OTGPADE
sty UDCON,YH // set DETACH=0
// Main loop
// Exits only by watchdog reset triggered by REBOOT command.
// Register assignments in all states:
// r2 cmd see above
// r3 state see above
// X len length of current transaction
// Y YBASE Y-register always equals YBASE
// Z ptr memory pointer (varies by command type)
// Loop entry and initialization
// state = SETUP
clr r3
// Main loop body
loop:
wdr // Clear watchdog
// Check for USB reset
// if (UDINT & _BV(EORSTI)) {
ldy r0,UDINT
sbrs r0,EORSTI
rjmp intx
// reset USB module and setup endpoint
ldi r24,1<<EPEN
sty UECONX,r24 // UECONX = EPEN;
ldi r24,1<<EPSIZE1|1<<EPSIZE0|1<<ALLOC
sty UECFG1X,r24 // setup EP0 for 64 byte FIFO size, one bank
sty UDINT,YH // clear interrupts
sty UEINTX,YH
// Endpoint handling
// Register assignment:
// r16 = intx (clobbered by SETUP handling)
intx:
// Check for USB endpoint events
ldy r16,UEINTX
// Check for SETUP token
sbrs r16, RXSTPI
rjmp handle_state // if not got SETUP token
// Handle SETUP token
// The 8 byte SETUP token is copied into r2 through r9.
// r2 = bmRequestType
// r3 = bRequest
// r4 = wValueL
// r5 = wValueH
// r6 = wIndexL
// r7 = wIndexH
// r8 = wLengthL
// r9 = wLengthH
// r24 = command
// r25 = state
// These are copied to r2:r3 upon setup completion
ldi ZL,2 // copy 8 bytes from UEDATX to r2:r9
ldi ZH,0 // ptr = 0x0002 (r2 in data space)
1: ldy r0,UEDATX
st Z+,r0
cpi ZL,10 // 8 bytes
brne 1b
sty UEINTX,YH // clear interrupts
// Parse setup packet
// STALL if bmRequestType has any bit 0-5 set
// or bRequest has any bit 4-7 set
// The command value is the bit-wise OR of these two values where
// bits 0-3 = bits 0-3 of bRequest
// bits 4-5 = 0
// bits 6-7 = bits 6-7 of bmRequestType
movw r24, r2
andi r24, 0x3f // if (bmRequestType & 0x3f) goto stall
brne stall
andi r25, 0xf0 // if (bRequest & 0xf0) goto stall
brne stall
movw ZL,r4 // Most commands want Z = wValue, so setup memory pointer here
// See USB commands above
or r2,r3 // cmd = bmRequestType | bRequest
mov r24,r2
// Descriptor handling
cpi r24,0x86
brne setup_set_addr
ldi ZL,lo8(dev_desc)
ldi ZH,hi8(dev_desc) // all descriptors share the same high address
dec r5 // wValueH = descriptor type
breq 2f // device descriptor (wValueH==1)
ldi ZL,lo8(conf_desc)
ldi XL,9+9 // sizeof(conf_desc)
dec r5
breq 3f // configuration descriptor (wValueH==2)
dec r5
brne stall // no string descriptor (wValueH==3)
ldi ZL,lo8(string_desc0)
tst r4 // wValueL = string ID
breq 2f // deliver language list when zero
ldi ZL,lo8(string_desc1) // otherwise, deliver "ubaboot" string
// Descriptor reads can be short because host may read a prefix of either
// descriptor during enumeration (e.g. for bMaxPacketSize0)
2: lpm XL,Z // get length from descriptor start
3: cp r8,XL // if (wLength < len) {
cpc r9,YH
brcc 2f
// Set cmd = 0x86 for program memory reads
// which are implemented exactly the same as descriptor reads
setup_get_pmem_done:
ldi r24,0x86
// Common SETUP token finalization
// For non-descriptor control reads the host must always request
// exact correct length or buffer overrun error occurs.
setup_done:
// len = wLength;
movw XL,r8
2:
// state = cmd & 0x80 ? RD_DATA : WR_DATA
ldi r25,WR_DATA
sbrc r2,7
ldi r25,RD_DATA
// cmd = r24
// state = r25
movw r2, r24
rjmp loop
stall: // Bad request: STALL endpoint
ldi r24,1<<STALLRQ|1<<EPEN
sty UECONX,r24
clr r3
rjmp loop
// The following commands are no-ops during setup
// See state machine handling below for specific behavior
setup_set_addr:
cpi r24,0x05
breq setup_done
cpi r24,0x09
breq setup_done
cpi r24,CMD_REBOOT
breq setup_done
cpi r24,CMD_GET_EEPROM
breq setup_done
cpi r24,CMD_SET_EEPROM
breq setup_done
// Signature read
// This reads the bytes directly into UEDATX during setup
// The state machine read loop is not used
cpi r24,CMD_GET_SIGRD
brne setup_get_lock
// read signature row via SIGRD bit
// 0000 = signature[0]
// 0002 = signature[1]
// 0004 = signature[2]
ldi r16,1<<SIGRD|1<<SPMEN
ldi r17,2
ldi r18,6
// fall-through to setup_rd_spm
// read special bytes through SPMCSR/LPM
// r16 = SPMCSR value
// r17 = lo(Z) step
// r18 = lo(Z) limit
setup_rd_spm:
ldi ZL,0
ldi ZH,0
1: // UEDATX = load byte from special SPM row
out SPMCSR,r16
lpm r0,Z
sty UEDATX,r0
add ZL,r17
cp ZL,r18
brne 1b
rjmp setup_done
// Lock/fuse read
// This also reads directly into UEDATX during setup
// The state machine read loop is not used
setup_get_lock:
cpi r24, CMD_GET_LOCK
brne setup_get_pmem
// read fuse/lock bytes via BLBSET bit
// 0000 = low fuse
// 0001 = lock byte
// 0002 = ext fuse
// 0003 = high fuse
ldi r16,1<<BLBSET|1<<SPMEN
ldi r17,1
ldi r18,4
rjmp setup_rd_spm
// Read from program memory
// Nothing to do except set 0x86
setup_get_pmem:
cpi r24, CMD_GET_PMEM
breq setup_get_pmem_done
// Write to program memory
// Enforces that pointer/length are page-aligned
// And decrements pointer by one page (see state machine below)
cpi r24,CMD_SET_PMEM
brne stall
mov r17,XL // length or address must not have lower bits set
or r17,ZL
andi r17,0x7f
brne stall
rcall do_spm_rwwsre // reset temporary page
rjmp setup_done
// Endpoint state machine handling
//
// Each loop iteration handles at most one UEINTX bit.
// See USB control endpoint state machine above.
//
// At the very end this stores intx to UEINTX to clear handled bits
// The endpoint handling code may clear bits in intx as required
handle_state:
// Compute next bit to process
// Note that when state == SETUP the following code does nothing
// because r17 = 0xff and r25 == 0
// so this falls-through to set UEINTX = 0xff which has no effect
// intx &= state
and r16, r3
// intx = ~(intx & -intx)
mov r17, r16
neg r17
and r16, r17
com r16
// r24 = cmd
// r25 = intx & state
movw r24, r2
and r25, r16
// Control read TXINI: write data for host to UEINTX
// Common loop contains command-specific handling
// Implements flash/eeprom memory reads
// Other reads already filled UEDATX and this is a no-op
cpi r25, RD_DATA & ~_BV(TXINI)
brne 3f // not ready to put data to USB
ldi r17,64 // EP0 FIFO size
1: adiw XL,0
breq state_end // nothing to put
// Flash memory reads, same as descriptor reads
cpi r24,0x86
brne 2f
lpm r0,Z+
sty UEDATX,r0
2: // EEPROM memory reads
cpi r24,CMD_GET_EEPROM
brne 2f
rcall readEE
sty UEDATX,r0
2: sbiw XL,1
dec r17
brne 1b
// Control write RXOUTI: handle data from host in UEDATX
// Unlike reads no common outer loop; each command implements its own.
// Implements flash/eeprom memory writes
3: cpi r25, WR_DATA & ~_BV(RXOUTI)
brne wr_status_begin // not got USB data
// nb = UEBCLX
ldy r18,UEBCLX // number of bytes in FIFO (0..64)
// Flash memory writes
// The temporary buffer is filled from the payload one *word* at a time.
// Writes are always a multiple of page size and aligned to page boundaries.
// Each OUT token comprises one half of the page temporary buffer.
// The page is erased and written after every second token.
cpi r24,CMD_SET_PMEM
brne 2f
1: subi r18,2
brcs 1f
ldy r0,UEDATX
ldy r1,UEDATX
ldi r19,1<<SPMEN
rcall do_spm // fill temporary buffer
adiw ZL,2
rjmp 1b
1: // Erase and write the page if buffer filled.
// Z points BEYOND current page
mov r18,ZL
andi r18,0x7f // if (lo(ptr) & 0x7f == 0) {
brne mask_intx
sbiw ZL,2 // go back to current page (PCWORD bits are ignored)
ldi r19,1<<PGERS|1<<SPMEN
rcall do_spm
ldi r19,1<<PGWRT|1<<SPMEN
rcall do_spm
adiw ZL,2 // go to next page
rcall do_spm_rwwsre // re-enable read-while-write section
2: cpi r24,CMD_SET_EEPROM
brne mask_intx
// EEPROM memory writes.
// The hardware allows atomic byte-wise erase+write so this is easy.
// Loops over the token payload writing each byte.
1: subi r18,1
brcs mask_intx
rcall readEE // wait and read value at address Z
ldy r1,UEDATX
cp r0,r1
breq 1b // don't change (faster)
out EEDR,r1
sbi EECR,EEMPE
sbi EECR,EEPE // write changed byte, don't wait for completion here
rjmp 1b
// Control write NAKINI: write finished
wr_status_begin:
cpi r25,WR_DATA & ~_BV(NAKINI)
brne wr_status_end
andi r16,~_BV(TXINI)
ldi r25,WR_STATUS
mov r3,r25
wr_status_end:
cpi r25,WR_STATUS & ~_BV(TXINI)
brne mask_intx
cpi r24,0x05 // Set address
brne do_reboot
sty UDADDR,ZL
ori ZL,0x80
sty UDADDR,ZL
// Reboot to user code
do_reboot:
cpi r24,CMD_REBOOT
breq .-2 // stay here until watchdog reset
state_end:
clr r3
mask_intx:
sty UEINTX,r16
rjmp loop
// EEPROM read, increment Z pointer
// inputs: Z = address
// outputs: Z = address + 1, r0 = byte
// clobbers: none
1: wdr
readEE: sbic EECR,EEPE // Wait until EEPROM ready
rjmp 1b
out EEARL,ZL
out EEARH,ZH
adiw ZL,1
sbi EECR,EERE
in r0,EEDR
ret
// Watchdog setup
// inputs: r18 = new WDTCSR
// clobbers: r19
set_wdt:
ldi r19,1<<WDCE|1<<WDE
sts WDTCSR,r19
sts WDTCSR,r18
ret
// Re-enable read-while-write section, also resets temporary page
do_spm_rwwsre:
ldi r19,1<<RWWSRE | 1<<SPMEN
// intentional fall-through to do_spm
// SPM subroutine
// inputs: r19 = spmctrl = SPMCSR value
// clobbers: r0
do_spm:
out SPMCSR,r19
spm
1: wdr
in r0,SPMCSR
sbrc r0,SPMEN
rjmp 1b
ret
#define W(x) (x)&0xFF,(x)>>8 // define unaligned 16-bit LSBfirst quantities
.type dev_desc,@common // don't disassemble
dev_desc: // device descriptor
.byte 18 //bLength
.byte 1 //bDescriptorType = Device
.byte W(0x0200) //bcdUSB
.byte 0xFF //bDeviceClass (libusb)
.byte 1 //bDeviceSubClass
.byte 0 //bDeviceProtocol
.byte 64 //bMaxPacketSize0
.byte W(0x03EB) //idVendor = Atmel
.byte W(0x2FF4) //idProduct = ATmega32U4 DFU
.byte W(0x2104) //bcdDevice (Year/Month)
.byte 1 //iManufacturer
.byte 1 //iProduct
.byte 0 //iSerialNumber
.byte 1 //bNumConfigurations
.type conf_desc,@common // don't disassemble
conf_desc: // configuration + interface (+ endpoint) descriptors
.byte 9 //bLength
.byte 2 //bDescriptorType = Configuration,
.byte W(9+9) //wTotalLength
.byte 1 //bNumInterfaces
.byte 1 //bConfigurationValue
.byte 0 //iConfiguration
.byte 0x80 //bmAttributes
.byte 50 //MaxPower: 100mA
.byte 9 //bLength
.byte 4 //bDescriptorType = Interface
.byte 0 //bInterfaceNumber
.byte 0 //bAlternateSetting
.byte 0 //bNumEndpoints
.byte 0 //bInterfaceClass
.byte 0 //bInterfaceSubClass
.byte 0 //bInterfaceProtocol
.byte 0 //iInterface
.type string_desc0,@common // don't disassemble
string_desc0:
.byte 4,3,W(0x0409) //language(english)
.type string_desc1,@common // don't disassemble
string_desc1:
.byte 16,3 //L"ubaboot"
.byte 'u',0,'b',0,'a',0,'b',0,'o',0,'o',0,'t',0
#undef W
app_spm:
spm
ret
Detected encoding: OEM (CP437) | 1
|
|