util.c

// Normalerweise wäre _alloca(size_t) ein simples "sub esp,eax".
// Aber das Win32-Speicherlayout macht einen Strich durch die Rechnung und verlangt,
// dass der Stack Seite für Seite abwärts belegt wird.
// Sonst kommt eine Schutzverletzung.
// Oder man legt per Linker-Schalter ein reichliches Commit für den Stack fest. (Geht das??)
// Daher generiert der Compiler für _alloca() sowie für Funktionen mit mehr als 4 KByte
// lokale Variablen einen Aufruf dieser Funktion.
// Prinzipiell könnte diese eax/4 push-Befehle ausführen …
// PE: eax = Zu reservierende Bytes
// PA: esp = Anfang des Speicherbereiches
// VR: eax

#define PROC void _declspec(naked) _cdecl

PROC _chkstk() {}	// läuft hinein, anders kriege ich's nicht hin
PROC _alloca_probe() {_asm{
	add	eax,3
	and	al,~3		// Teilbarkeit durch 4 sicherstellen
	push	ecx
	 lea	ecx,[esp]+8	// ecx = Endadresse auf Stack
	 jmp	short testpg
onepg:	 sub	ecx,4096
	 sub	eax,4096
	 test	dword ptr [ecx],eax	// seitenweise Speicherlesezyklus anstoßen
testpg:	 cmp	eax,4096
	 jae	short onepg
	 sub	ecx,eax		// ecx = Startadresse auf Stack
	 mov	eax,esp		// eax = Zeiger auf ursprüngliches ecx
	 test	dword ptr [ecx],eax	// nochmal Speicherlesezyklus
	 mov	esp,ecx		// esp = Startadresse (Rückgabewert)
	mov	ecx,[eax]	// ecx restaurieren
	jmp	dword ptr[eax+4]// Rücksprung zum Aufrufer
}}

PROC _aullshr() {_asm{
	btr	ecx,5
	jc	short l1	// Für Schiebelängen >= 32 ohne shrd
	shrd	eax,edx,cl
	shr	edx,cl
	ret
l1:	mov	eax,edx
	xor	edx,edx
	shr	eax,cl
	ret 
}}

PROC _allshl() {_asm{
	btr	ecx,5
	jc	short l1	// Für Schiebelängen >= 32 ohne shld
	shld	edx,eax,cl
	shl	eax,cl
	ret
l1:	mov	edx,eax
	xor	eax,eax
	shl	edx,cl
	ret
}}

PROC _allmul() {_asm{
	mov	eax,[esp+8]	// hi(f1)
	mov	ecx,[esp+16]	// hi(f2)
	or	ecx,eax
	mov	ecx,[esp+12]	// lo(f2)
	jnz	short l1
	mov	eax,[esp+4]	// lo(f1)
	mul	ecx		// edx:eax = lo(f2)×lo(f1)
	ret	16
l1:	push	ebx
	 mul	ecx		// (edx:)eax = lo(f2)×hi(f1)
	 mov	ebx,eax
	 mov	eax,[esp+8]	// lo(f1)
	 mul	dword ptr[esp+20]// (edx:)eax = lo(f1)×hi(f2)
	 add	ebx,eax
	 mov	eax,[esp+8]
	 mul	ecx		// edx:eax = lo(f2)×lo(f1)
	 add	edx,ebx
	pop	ebx
	ret	16
}}

// Nur für DWORD-Divisor geeignet!
PROC _aulldiv() {_asm{
	mov	eax,[esp+8]	//High-Teil Zähler
	mov	ecx,[esp+12]	//Nenner
	xor	edx,edx
	div	ecx		//EAX=Ergebnis, EDX=Rest
	push	eax
	 mov	eax,[esp+8]	//Low-Teil Zähler
	 div	ecx		//EAX=Ergebnis, EDX=Rest
	pop	edx		//Rest durch High-Teil ersetzen
	ret	16
}}

PROC _byteswap_uint64() {_asm{
	mov	edx,[esp+4]
	mov	eax,[esp+8]
	bswap	edx
	bswap	eax
	ret
}}

PROC _byteswap_ulong() {_asm{
	mov	eax,[esp+4]
	bswap	eax
	ret
}}

/* Fifefox-Quelltext (angeblich):
static FORCEINLINE PRUint32 FASTCALL 
swap4b(PRUint32 dwd) 
{
    __asm {
    	mov   eax,dwd
	bswap eax
    }
}
*/
Detected encoding: ANSI (CP1252)
Wrong umlauts? -
Assume file is ANSI (CP1252) encoded
Assume file is OEM (CP437) encoded