#include "ws2812.h"
/*
This routine writes an array of bytes with RGB values to the Dataout pin
using the fast 800kHz clockless WS2811/2812 protocol.
The description of the protocol in the datasheet is somewhat confusing and
it appears that some timing values have been rounded.
The order of the color-data is GRB 8:8:8. Serial data transmission begins
with the most significant bit in each byte.
The total length of each bit is 1.25µs (20 cycles @ 16MHz)
* At 0µs the dataline is pulled high.
* To send a zero the dataline is pulled low after 0.375µs (6 cycles).
* To send a one the dataline is pulled low after 0.625µs (10 cycles).
After the entire bitstream has been written, the dataout pin has to remain low
for at least 50µs (reset condition).
*/
void ws2812::sendarray(const uint8_t*data,uint16_t datlen) {
uint8_t hi=ws2812_port|(1<<pin),lo=hi&~(1<<pin);
#if F_CPU==16000000
/*
Due to the loop overhead there is a slight timing error: The loop will execute
in 21 cycles for the last bit write. This does not cause any issues though,
as only the timing between the rising and the falling edge seems to be critical.
Some quick experiments have shown that the bitstream has to be delayed by
more than 3µs until it cannot be continued (3µs=48 cyles).
*/
while (datlen--) {
uint8_t curbyte=*data++,c=8;
asm volatile(
"1: out %0,%2 \n" // 1
" lsl %3 \n" // 2
" dec __zero_reg__ \n" // 3
" rjmp . \n" // 5
" brcs 2f \n" // 6l / 7h
" out %0,%1 \n" // 7l / -
"2: rjmp . \n" // 9
" nop \n" // 10
" out %0,%1 \n" // 11
" breq 3f \n" // 12 nt. 13 taken
" rjmp . \n" // 14
" rjmp . \n" // 16
" rjmp . \n" // 18
" rjmp 1b \n" // 20
"3: \n"
::"I"(_SFR_IO_ADDR(ws2812_port)),"r"(lo),"r"(hi),"r"(curbyte),"r"(c));
}
#elif F_CPU==12000000
/*
The total length of each bit is 1.25µs (15 cycles @ 12MHz)
* At 0µs the dataline is pulled high. (cycle 1+0)
* To send a zero the dataline is pulled low after 0.333µs (1+4=5 cycles).
* To send a one the dataline is pulled low after 0.666µs (1+8=9 cycles).
Total loop timing is correct, but the timing for the falling edge can
not be accurately reached as the correct 0.375µs (4.5 cyc.) and 0.675µs (7.5 cyc)
timings fall in between cycles.
Final timing:
* 15 cycles for bits 7-1
* 16 cycles for bit 0
- The bit 0 timing exceeds the 1.25µs bit-timing by 66.7µs, which is still
within datasheet tolerances (600µs)
*/
uint8_t c=0;
asm volatile(
"1: subi %A4,1 \n" // 12
" sbci %B4,0 \n" // 13
" brcs 4f \n" // 14
" ld __tmp_reg__,%a3+\n" // 15
"2: out %0,%2 \n" // 1
" lsl __tmp_reg__ \n" // 2
" nop \n" // 3
" brcs 3f \n" // 4nt / 5t
" out %0,%1 \n" // 5
"3: subi %5,0x20 \n" // 6
" rjmp . \n" // 8
" out %0,%1 \n" // 9
" breq 1b \n" // 10nt / 11t
" nop \n" // 11
" rjmp . \n" // 13
" rjmp 2b \n" // 15
"4: \n" //
::"I"(_SFR_IO_ADDR(ws2812_port)),"r"(lo),"r"(hi),"e"(data),"d"(datlen),"d"(c));
#elif F_CPU==9600000
/*
The total length of each bit is 1.25µs (12 cycles @ 9.6MHz)
* At 0µs the dataline is pulled high. (cycle 1)
* To send a zero the dataline is pulled low after 0.312µs (1+3=4 cycles) (error 0.06µs)
* To send a one the dataline is pulled low after 0.625µs (1+6=7 cycles) (no error).
12 cycles can not be reached for bit 0 write. However since the timing
between the rising and falling edge is correct, it seems to be acceptable
to slightly increase bit timing
Final timing:
* 12 cycles for bits 7-1
* 15 cycles for bit 0
- The bit 0 timing exceeds the 1.25µs timing by 312µs, which is still within
datasheet tolerances (600µs).
*/
uint8_t c=0;
asm volatile(
"1: subi %A4,1 \n" // 10
" sbci %B4,0 \n" // 11
" brcs 4f \n" // 12
" ld __tmp_reg__,%a3+\n" // 14
"2: out %0,%2 \n" // 1
" lsl __tmp_reg__ \n" // 2
" brcs 3f \n" // 3nt / 4t
" out %0,%1 \n" // 4
"3: subi %5,0x20 \n" // 5
" nop \n" // 6
" out %0,%1 \n" // 7
" breq 1b \n" // 8nt / 9t
" rjmp . \n" // 10
" rjmp 2b \n" // 12
"4: \n"
::"I"(_SFR_IO_ADDR(ws2812_port)),"r"(lo),"r"(hi),"e"(data),"r"(datlen),"d"(c));
#elif F_CPU==8000000
/*
Timing optimized for 8MHz AVR (excl. XMEGA and reduced instruction set)
The total length of each bit is 1.25µs (10 cycles @ 8MHz)
* At 0µs the dataline is pulled high. (cycle 1+0=1)
* To send a zero the dataline is pulled low after 0.375µs (1+3=4 cycles).
* To send a one the dataline is pulled low after 0.625µs (1+5=6 cycles).
Final timing:
* 10 cycles for bits 7-1
* 14 cycles for bit 0
- The bit 0 timing exceeds the 1.25µs bit-timing by 500µs, which is still
within datasheet tolerances (600µs)
*/
uint8_t c=0;
asm volatile(
"1: subi %A4,1 \n" // 9
" sbci %B4,0 \n" // 10
" brcs 4f \n" // 11
" ld __tmp_reg__,%a3+\n" // 13
"2: out %0,%2 \n" // 1
" lsl __tmp_reg__ \n" // 2
" brcs 3f \n" // 3nt / 4t
" out %0,%1 \n" // 4
"3: subi %5,0x20 \n" // 5
" out %0,%1 \n" // 6
" breq 1b \n" // 7nt / 8t
" nop \n" // 8
" rjmp 2b \n" // 10
"4: \n"
::"I"(_SFR_IO_ADDR(ws2812_port)),"r"(lo),"r"(hi),"e"(data),"d"(datlen),"d"(c));
#elif F_CPU==4000000
/*
The total length of each bit is 1.25µs (5 cycles @ 4MHz)
* At 0µs the dataline is pulled high. (cycle 0+1)
* To send a zero the dataline is pulled low after 0.5µs (spec: 0.375µs) (2+1=3 cycles).
* To send a one the dataline is pulled low after 0.75µs (spec: 0.625µs) (3+1=4 cycles).
The timing of this implementation is slightly off, however it seems to
work empirically.
Final timing:
* 5 cycles for bits 7-1
* 6 cycles for bit 0
- The bit 0 timing exceeds the 1.25µs timing by 250µs, which is still within
the tolerances stated in the datasheet (600 µs).
*/
asm volatile(
" .macro bit n \n"
" out %0,%2 \n" // 1
" sbrs __tmp_reg__,\\n \n" // 2
" out %0,%1 \n" // 3
" out %0,%1 \n" // 4
" .endm \n"
"1: ld __tmp_reg__,%a3+\n"
" bit 7 \n"
" subi %A4,-1 \n" // 5
" bit 6 \n"
" sbci %B4,-1 \n" // 5
" bit 5 \n"
" nop \n" // 5
" bit 4 \n"
" nop \n" // 5
" bit 3 \n"
" nop \n" // 5
" bit 2 \n"
" nop \n" // 5
" bit 1 \n"
" nop \n" // 5
" bit 0 \n"
" brne 1b \n" // 6
::"I"(_SFR_IO_ADDR(ws2812_port)),"r"(lo),"r"(hi),"e"(data),"d"(datlen));
#else
# error "Invalid F_CPU for ws2812 library!"
#endif
}
| Detected encoding: ANSI (CP1252) | 4
|
|
|