Source file: /~heha/Mikrocontroller/LEDs/WS2812.zip/Ramsch/ws2812.cpp

#include "ws2812.h"

/*
This routine writes an array of bytes with RGB values to the Dataout pin
using the fast 800kHz clockless WS2811/2812 protocol.

The description of the protocol in the datasheet is somewhat confusing and
it appears that some timing values have been rounded. 

The order of the color-data is GRB 8:8:8. Serial data transmission begins 
with the most significant bit in each byte.

The total length of each bit is 1.25µs (20 cycles @ 16MHz)
* At 0µs the dataline is pulled high.
* To send a zero the dataline is pulled low after 0.375µs (6 cycles).
* To send a one the dataline is pulled low after 0.625µs (10 cycles).

After the entire bitstream has been written, the dataout pin has to remain low
for at least 50µs (reset condition).
*/
void ws2812::sendarray(const uint8_t*data,uint16_t datlen) {
 uint8_t hi=ws2812_port|(1<<pin),lo=hi&~(1<<pin);

#if F_CPU==16000000
/*
Due to the loop overhead there is a slight timing error: The loop will execute
in 21 cycles for the last bit write. This does not cause any issues though,
as only the timing between the rising and the falling edge seems to be critical.
Some quick experiments have shown that the bitstream has to be delayed by 
more than 3µs until it cannot be continued (3µs=48 cyles).
*/
 while (datlen--) {
  uint8_t curbyte=*data++,c=8;
  asm volatile(
"1:	out	%0,%2		\n"	// 1
"	lsl	%3		\n"	// 2
"	dec	__zero_reg__	\n"	// 3
"	rjmp	.		\n"	// 5
"	brcs	2f		\n"	// 6l / 7h
"	out	%0,%1		\n"	// 7l / -
"2:	rjmp	.		\n"	// 9
"	nop			\n"	// 10
"	out	%0,%1		\n"	// 11
"	breq	3f		\n"	// 12      nt. 13 taken
"	rjmp	.		\n"	// 14
"	rjmp	.		\n"	// 16
"	rjmp	.		\n"	// 18
"	rjmp	1b		\n"	// 20
"3:				\n"
::"I"(_SFR_IO_ADDR(ws2812_port)),"r"(lo),"r"(hi),"r"(curbyte),"r"(c));
 }

#elif F_CPU==12000000
/*
The total length of each bit is 1.25µs (15 cycles @ 12MHz)
* At 0µs the dataline is pulled high.  (cycle 1+0)
* To send a zero the dataline is pulled low after 0.333µs (1+4=5 cycles).
* To send a one the dataline is pulled low after 0.666µs (1+8=9 cycles).

Total loop timing is correct, but the timing for the falling edge can
not be accurately reached as the correct 0.375µs (4.5 cyc.) and 0.675µs (7.5 cyc)
timings fall in between cycles.

Final timing: 
* 15 cycles for bits 7-1
* 16 cycles for bit 0	
- The bit 0 timing exceeds the 1.25µs bit-timing by 66.7µs, which is still
within datasheet tolerances (600µs)
*/
 uint8_t c=0;
 asm volatile(
"1:	subi	%A4,1		\n"	// 12
"	sbci	%B4,0		\n"	// 13
"	brcs	4f		\n"	// 14
"	ld	__tmp_reg__,%a3+\n"	// 15
"2:	out	%0,%2		\n"	// 1
"	lsl	__tmp_reg__	\n"	// 2
"	nop			\n"	// 3
"	brcs	3f		\n"	// 4nt / 5t
"	out	%0,%1		\n"	// 5
"3:	subi	%5,0x20		\n"	// 6
"	rjmp	.		\n"	// 8	
"	out	%0,%1		\n"	// 9
"	breq	1b		\n"	// 10nt  / 11t
"	nop			\n"	// 11
"	rjmp	.		\n"	// 13
"	rjmp	2b		\n"	// 15
"4:				\n"	//
::"I"(_SFR_IO_ADDR(ws2812_port)),"r"(lo),"r"(hi),"e"(data),"d"(datlen),"d"(c));

#elif F_CPU==9600000
/*
The total length of each bit is 1.25µs (12 cycles @ 9.6MHz)
* At 0µs the dataline is pulled high.  (cycle 1)
* To send a zero the dataline is pulled low after 0.312µs (1+3=4 cycles) (error 0.06µs)
* To send a one the dataline is pulled low after 0.625µs (1+6=7 cycles) (no error).

12 cycles can not be reached for bit 0 write. However since the timing 
between the rising and falling edge is correct, it seems to be acceptable 
to slightly increase bit timing

Final timing:
* 12 cycles for bits 7-1
* 15 cycles for bit 0

- The bit 0 timing exceeds the 1.25µs timing by 312µs, which is still within
  datasheet tolerances (600µs).
*/
 uint8_t c=0;
 asm volatile(
"1:	subi	%A4,1		\n"	// 10
"	sbci	%B4,0		\n"	// 11
"	brcs	4f		\n"	// 12
"	ld	__tmp_reg__,%a3+\n"	// 14
"2:	out	%0,%2		\n"	// 1
"	lsl	__tmp_reg__	\n"	// 2
"	brcs	3f		\n"	// 3nt / 4t
"	out	%0,%1		\n"	// 4
"3:	subi	%5,0x20		\n"	// 5
"	nop			\n"	// 6
"	out	%0,%1		\n"	// 7
"	breq	1b		\n"	// 8nt  / 9t
"	rjmp	.		\n"	// 10
"	rjmp	2b		\n"	// 12
"4:				\n"
::"I"(_SFR_IO_ADDR(ws2812_port)),"r"(lo),"r"(hi),"e"(data),"r"(datlen),"d"(c));

#elif F_CPU==8000000
/*
Timing optimized for 8MHz AVR (excl. XMEGA and reduced instruction set)

The total length of each bit is 1.25µs (10 cycles @ 8MHz)
* At 0µs the dataline is pulled high.  (cycle 1+0=1)
* To send a zero the dataline is pulled low after 0.375µs (1+3=4 cycles).
* To send a one the dataline is pulled low after 0.625µs (1+5=6 cycles).

Final timing: 
* 10 cycles for bits 7-1
* 14 cycles for bit 0

- The bit 0 timing exceeds the 1.25µs bit-timing by 500µs, which is still
  within datasheet tolerances (600µs)
*/
 uint8_t c=0;
 asm volatile(
"1:	subi	%A4,1		\n"	// 9
"	sbci	%B4,0		\n"	// 10
"	brcs	4f		\n"	// 11
"	ld	__tmp_reg__,%a3+\n"	// 13
"2:	out	%0,%2		\n"	// 1
"	lsl	__tmp_reg__	\n"	// 2
"	brcs	3f		\n"	// 3nt / 4t
"	out	%0,%1		\n"	// 4
"3:	subi	%5,0x20		\n"	// 5
"	out	%0,%1		\n"	// 6
"	breq	1b		\n"	// 7nt  / 8t
"	nop			\n"	// 8
"	rjmp	2b		\n"	// 10
"4:				\n"
::"I"(_SFR_IO_ADDR(ws2812_port)),"r"(lo),"r"(hi),"e"(data),"d"(datlen),"d"(c));

#elif F_CPU==4000000
/*
The total length of each bit is 1.25µs (5 cycles @ 4MHz)
* At 0µs the dataline is pulled high.  (cycle 0+1)
* To send a zero the dataline is pulled low after 0.5µs (spec: 0.375µs)  (2+1=3 cycles).
* To send a one the dataline is pulled low after 0.75µs  (spec: 0.625µs) (3+1=4 cycles).

The timing of this implementation is slightly off, however it seems to 
work empirically. 

Final timing: 
* 5 cycles for bits 7-1
* 6 cycles for bit 0

- The bit 0 timing exceeds the 1.25µs timing by 250µs, which is still within
the tolerances stated in the datasheet (600 µs).
*/
 asm volatile(
"	.macro	bit n		\n"
"	out	%0,%2		\n"	// 1
"	sbrs	__tmp_reg__,\\n	\n"	// 2
"	out	%0,%1		\n"	// 3
"	out	%0,%1		\n"	// 4
"	.endm			\n"
"1:	ld	__tmp_reg__,%a3+\n"
"	bit	7		\n"
"	subi	%A4,-1		\n"	// 5
"	bit	6		\n"
"	sbci	%B4,-1		\n"	// 5
"	bit	5		\n"
"	nop			\n"	// 5
"	bit	4		\n"
"	nop			\n"	// 5
"	bit	3		\n"
"	nop			\n"	// 5
"	bit	2		\n"
"	nop			\n"	// 5
"	bit	1		\n"
"	nop			\n"	// 5
"	bit	0		\n"
"	brne	1b		\n"	// 6
::"I"(_SFR_IO_ADDR(ws2812_port)),"r"(lo),"r"(hi),"e"(data),"d"(datlen));

#else
# error "Invalid F_CPU for ws2812 library!"
#endif
}
Detected encoding: ANSI (CP1252)4
Wrong umlauts? - Assume file is ANSI (CP1252) encoded