From 9f00417d8e68c078ba41c844845188e135162ce3 Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Tue, 1 Jun 2021 16:05:30 -0400 Subject: [PATCH 001/142] Improve cpp_tireal_pi_montecarlo --- .../cpp_tireal_pi_montecarlo/src/main.cpp | 51 ++++++++++--------- 1 file changed, 27 insertions(+), 24 deletions(-) diff --git a/examples/standalone/cpp_tireal_pi_montecarlo/src/main.cpp b/examples/standalone/cpp_tireal_pi_montecarlo/src/main.cpp index 1f9352002..998141916 100644 --- a/examples/standalone/cpp_tireal_pi_montecarlo/src/main.cpp +++ b/examples/standalone/cpp_tireal_pi_montecarlo/src/main.cpp @@ -2,54 +2,57 @@ #include #include -#define ITER_MAX 15000 - using namespace ti::literals; -static char buf[24] = "PI is about "; -#define buf_offset 12 - int main(void) { - int count = 0; /* points in the unit circle's first quadrant */ - /* Clear the screen */ os_ClrHomeFull(); /* Set the random seed based off the real time clock */ srand(rtc_Time()); - os_SetCursorPos(0, 0); + unsigned i = 0; + constexpr unsigned iMax = 10'000; + unsigned count = 0; /* points in the unit circle's first quadrant */ + + auto print = [&]() + { + const auto piApprox = ti::real(count) / i * 4; + + char buf[24] = "PI is about "; + constexpr size_t bufOffset = 12; + piApprox.toCString(buf + bufOffset); - for (int i = 0; i < ITER_MAX; i++) + os_PutStrFull(buf); + os_NewLine(); + }; + + while (++i <= iMax) { - const ti::real x = ti::real(rand()) / RAND_MAX; - const ti::real y = ti::real(rand()) / RAND_MAX; - const ti::real z = x*x + y*y; + const auto x = ti::real(rand()) / RAND_MAX; + const auto y = ti::real(rand()) / RAND_MAX; + const auto z = x*x + y*y; if (z <= 1) { count++; } - if (i % 150 == 0) // Just to print some things along the way... - { - (ti::real(count) / ITER_MAX * 4).toCString(buf+buf_offset); - os_PutStrFull(buf); - os_NewLine(); - } + if (os_GetCSC()) { break; } + + if (i % 100 == 0) + { + print(); + } } - (ti::real(count) / ITER_MAX * 4).toCString(buf+buf_offset); - os_PutStrFull(buf); - os_NewLine(); + print(); - os_NewLine(); os_PutStrFull("Press any key to quit"); - os_NewLine(); - while (!os_GetCSC()); + return 0; } From 48f64cb1a61ba7b2d5bed1d3f0a5520e4d6c5f41 Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Tue, 1 Jun 2021 16:06:53 -0400 Subject: [PATCH 002/142] Provide optimized static versions of __smuls and __smulu --- src/std/linked/linked.src | 7 ++++++- src/std/shared/shared.src | 4 +--- src/std/static/smulu.src | 30 ++++++++++++++++++++++++++++++ 3 files changed, 37 insertions(+), 4 deletions(-) create mode 100644 src/std/static/smulu.src diff --git a/src/std/linked/linked.src b/src/std/linked/linked.src index ec745fb82..8f9a99949 100644 --- a/src/std/linked/linked.src +++ b/src/std/linked/linked.src @@ -1,4 +1,5 @@ public __u_flt_info, __u_flt_rnd, _acosf, _asinf, _atanf, _atan2f, _atoff, _ceilf, _cosf, _coshf, _expf, _fabsf, _floorf, _fmodf, _frexpf, _ldexpf, _logf, _log10f, _modff, _powf, _sinf, _sinhf, _sqrtf, _strtodf, _tanf, _tanhf, _acos, _asin, _atan, _atan2, _atof, _ceil, _cos, _cosh, _exp, _fabs, _floor, _fmod, _frexp, _ldexp, _log, _log10, _modf, _pow, _sin, _sinh, _sqrt, _strtod, _tan, _tanh + __u_flt_info := 022128h __u_flt_rnd := 02212Ch _acosf := 022100h @@ -52,4 +53,8 @@ _strtod := 0220E0h _tan := 022120h _tanh := 022138h - +; integer math + public __smuls +__smuls := 0000224h + public __smulu +__smulu := 0000228h diff --git a/src/std/shared/shared.src b/src/std/shared/shared.src index 53272bed5..158bd74b9 100644 --- a/src/std/shared/shared.src +++ b/src/std/shared/shared.src @@ -1,4 +1,4 @@ - public __bldiy, __bshl, __bshru, __bstix, __bstiy, __case, __case16, __case16D, __case24, __case24D, __case8, __case8D, __fadd, __fcmp, __fdiv, __fmul, __fppack, __frbtof, __frftob, __frftoi, __frftos, __frftoub, __frftoui, __frftous, __fritof, __frstof, __frubtof, __fruitof, __frustof, __fsub, __ftol, __iand, __icmpzero, __idivs, __idivu, __idvrmu, __ildix, __ildiy, __imul_b, __imuls, __imulu, __indcall, __ineg, __inot, __ior, __irems, __iremu, __ishl, __ishl_b, __ishrs, __ishrs_b, __ishru, __ishru_b, __istix, __istiy, __itol, __ixor, __ladd, __ladd_b, __land, __lcmps, __lcmpu, __lcmpzero, __ldivs, __ldivu, __ldvrmu, __lldix, __lldiy, __lmuls, __lmulu, __lneg, __lnot, __lor, __lrems, __lremu, __lshl, __lshrs, __lshru, __lstix, __lstiy, __lsub, __ltof, __lxor, __sand, __scmpzero, __sdivs, __sdivu, __seqcase, __seqcaseD, __setflag, __sldix, __sldiy, __smuls, __smulu, __sneg, __snot, __sor, __srems, __sremu, __sshl, __sshl_b, __sshrs, __sshrs_b, __sshru, __sshru_b, __sstix, __sstiy, __stoi, __stoiu, __sxor, __ultof, _longjmp, _memchr, _memcmp, _memcpy, _memmove, _setjmp, ___sprintf, _sqrtf, _strcasecmp, _strcat, _strchr, _strcmp, _strcpy, _strcspn, _strlen, _strncat, _strncmp, _strncpy, _strpbrk, _strrchr, _strspn, _strstr, _strtok, _tolower, _toupper + public __bldiy, __bshl, __bshru, __bstix, __bstiy, __case, __case16, __case16D, __case24, __case24D, __case8, __case8D, __fadd, __fcmp, __fdiv, __fmul, __fppack, __frbtof, __frftob, __frftoi, __frftos, __frftoub, __frftoui, __frftous, __fritof, __frstof, __frubtof, __fruitof, __frustof, __fsub, __ftol, __iand, __icmpzero, __idivs, __idivu, __idvrmu, __ildix, __ildiy, __imul_b, __imuls, __imulu, __indcall, __ineg, __inot, __ior, __irems, __iremu, __ishl, __ishl_b, __ishrs, __ishrs_b, __ishru, __ishru_b, __istix, __istiy, __itol, __ixor, __ladd, __ladd_b, __land, __lcmps, __lcmpu, __lcmpzero, __ldivs, __ldivu, __ldvrmu, __lldix, __lldiy, __lmuls, __lmulu, __lneg, __lnot, __lor, __lrems, __lremu, __lshl, __lshrs, __lshru, __lstix, __lstiy, __lsub, __ltof, __lxor, __sand, __scmpzero, __sdivs, __sdivu, __seqcase, __seqcaseD, __setflag, __sldix, __sldiy, __sneg, __snot, __sor, __srems, __sremu, __sshl, __sshl_b, __sshrs, __sshrs_b, __sshru, __sshru_b, __sstix, __sstiy, __stoi, __stoiu, __sxor, __ultof, _longjmp, _memchr, _memcmp, _memcpy, _memmove, _setjmp, ___sprintf, _sqrtf, _strcasecmp, _strcat, _strchr, _strcmp, _strcpy, _strcspn, _strlen, _strncat, _strncmp, _strncpy, _strpbrk, _strrchr, _strspn, _strstr, _strtok, _tolower, _toupper __bldiy := 0000FCh __bshl := 000100h __bshru := 000104h @@ -91,8 +91,6 @@ __seqcaseD := 000214h __setflag := 000218h __sldix := 00021Ch __sldiy := 000220h -__smuls := 000224h -__smulu := 000228h __sneg := 00022Ch __snot := 000230h __sor := 000234h diff --git a/src/std/static/smulu.src b/src/std/static/smulu.src new file mode 100644 index 000000000..0d7f4f30f --- /dev/null +++ b/src/std/static/smulu.src @@ -0,0 +1,30 @@ + assume adl=1 + + public __smuls +__smuls: + public __smulu +__smulu: + +; Multiplies HL by BC and returns the 16-bit product hl. +; I: BC=multiplier, HL=multiplicand +; O: a=h, ubc=C*H, ude=B*L, hl=DE*HL, hlu=0 +; FO: sz(h), p/v=?, c=? +; CC: 16*r(PC)+(ADL?3*r(SPL):2*r(SPS))+13 + ; a ubc ude uhl +Mul_HL_BC_HL: ;----------------------+-------+-------+------------------------ + ld d,b + ld e,l ; de=BL + ld b,h ; bc=HC + ld h,c ; hl=CL + mlt bc ; ubc=C*H + mlt de ; ude=B*L + mlt hl ; uhl=C*L + ld a,h ; a=(C*L>>8) + add a,c ; a=(C*L>>8)+(C*H) + add a,e ; a=(C*L>>8)+(C*H)+(B*L) + ld h,a ; hl=C*L+((C*H)+(B*L)<<8) + ret + +; (HL*BC)&0xFFFF=(C*L+((B*L)+(C*H)<<8)+(B*H<<16))&0xFFFF +; =((C*L)&0xFFFF)+((B*L)+(C*H)<<8&0xFFFF)+(B*H<<16&0xFFFF) +; =C*L+((B*L)+(C*H)<<8&0xFFFF) From d61333fb49be347af13b2196d619f5d5d7198baa Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Tue, 1 Jun 2021 17:22:00 -0400 Subject: [PATCH 003/142] Touch up smulu.src --- src/std/static/smulu.src | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/src/std/static/smulu.src b/src/std/static/smulu.src index 0d7f4f30f..07be79da3 100644 --- a/src/std/static/smulu.src +++ b/src/std/static/smulu.src @@ -1,5 +1,3 @@ - assume adl=1 - public __smuls __smuls: public __smulu @@ -7,7 +5,7 @@ __smulu: ; Multiplies HL by BC and returns the 16-bit product hl. ; I: BC=multiplier, HL=multiplicand -; O: a=h, ubc=C*H, ude=B*L, hl=DE*HL, hlu=0 +; O: a=h, ubc=H*C, ude=L*B, hl=HL*DE, hlu=0 ; FO: sz(h), p/v=?, c=? ; CC: 16*r(PC)+(ADL?3*r(SPL):2*r(SPS))+13 ; a ubc ude uhl @@ -16,15 +14,15 @@ Mul_HL_BC_HL: ;----------------------+-------+-------+------------------------ ld e,l ; de=BL ld b,h ; bc=HC ld h,c ; hl=CL - mlt bc ; ubc=C*H - mlt de ; ude=B*L - mlt hl ; uhl=C*L - ld a,h ; a=(C*L>>8) - add a,c ; a=(C*L>>8)+(C*H) - add a,e ; a=(C*L>>8)+(C*H)+(B*L) - ld h,a ; hl=C*L+((C*H)+(B*L)<<8) + mlt bc ; ubc=H*C + mlt de ; ude=L*B + mlt hl ; uhl=L*C + ld a,h ; a=L*C>>8 + add a,c ; a=(L*C>>8)+H*C + add a,e ; a=(L*C>>8)+H*C+L*B + ld h,a ; hl=L*C+((H*C)+(L*B)<<8) ret -; (HL*BC)&0xFFFF=(C*L+((B*L)+(C*H)<<8)+(B*H<<16))&0xFFFF -; =((C*L)&0xFFFF)+((B*L)+(C*H)<<8&0xFFFF)+(B*H<<16&0xFFFF) -; =C*L+((B*L)+(C*H)<<8&0xFFFF) +; HL*BC&0xFFFF=C*L+(H*C+B*L<<8)+(H*B<<16)&0xFFFF +; =(C*L&0xFFFF)+(B*L+H*C<<8&0xFFFF)+(B*H<<16&0xFFFF) +; =C*L+(B*L+H*C<<8&0xFFFF) From 8db0d939d048757b2ee64782232ce5b23f02af8f Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Tue, 1 Jun 2021 18:36:05 -0400 Subject: [PATCH 004/142] Provide optimized static versions of __imuls and __imulu --- src/std/linked/linked.src | 4 +++ src/std/shared/shared.src | 4 +-- src/std/static/imulu.src | 55 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 60 insertions(+), 3 deletions(-) create mode 100644 src/std/static/imulu.src diff --git a/src/std/linked/linked.src b/src/std/linked/linked.src index 8f9a99949..8f9c7fbc4 100644 --- a/src/std/linked/linked.src +++ b/src/std/linked/linked.src @@ -54,6 +54,10 @@ _tan := 022120h _tanh := 022138h ; integer math + public __imuls +__imuls := 0000154h + public __imulu +__imulu := 0000158h public __smuls __smuls := 0000224h public __smulu diff --git a/src/std/shared/shared.src b/src/std/shared/shared.src index 158bd74b9..070b28bdb 100644 --- a/src/std/shared/shared.src +++ b/src/std/shared/shared.src @@ -1,4 +1,4 @@ - public __bldiy, __bshl, __bshru, __bstix, __bstiy, __case, __case16, __case16D, __case24, __case24D, __case8, __case8D, __fadd, __fcmp, __fdiv, __fmul, __fppack, __frbtof, __frftob, __frftoi, __frftos, __frftoub, __frftoui, __frftous, __fritof, __frstof, __frubtof, __fruitof, __frustof, __fsub, __ftol, __iand, __icmpzero, __idivs, __idivu, __idvrmu, __ildix, __ildiy, __imul_b, __imuls, __imulu, __indcall, __ineg, __inot, __ior, __irems, __iremu, __ishl, __ishl_b, __ishrs, __ishrs_b, __ishru, __ishru_b, __istix, __istiy, __itol, __ixor, __ladd, __ladd_b, __land, __lcmps, __lcmpu, __lcmpzero, __ldivs, __ldivu, __ldvrmu, __lldix, __lldiy, __lmuls, __lmulu, __lneg, __lnot, __lor, __lrems, __lremu, __lshl, __lshrs, __lshru, __lstix, __lstiy, __lsub, __ltof, __lxor, __sand, __scmpzero, __sdivs, __sdivu, __seqcase, __seqcaseD, __setflag, __sldix, __sldiy, __sneg, __snot, __sor, __srems, __sremu, __sshl, __sshl_b, __sshrs, __sshrs_b, __sshru, __sshru_b, __sstix, __sstiy, __stoi, __stoiu, __sxor, __ultof, _longjmp, _memchr, _memcmp, _memcpy, _memmove, _setjmp, ___sprintf, _sqrtf, _strcasecmp, _strcat, _strchr, _strcmp, _strcpy, _strcspn, _strlen, _strncat, _strncmp, _strncpy, _strpbrk, _strrchr, _strspn, _strstr, _strtok, _tolower, _toupper + public __bldiy, __bshl, __bshru, __bstix, __bstiy, __case, __case16, __case16D, __case24, __case24D, __case8, __case8D, __fadd, __fcmp, __fdiv, __fmul, __fppack, __frbtof, __frftob, __frftoi, __frftos, __frftoub, __frftoui, __frftous, __fritof, __frstof, __frubtof, __fruitof, __frustof, __fsub, __ftol, __iand, __icmpzero, __idivs, __idivu, __idvrmu, __ildix, __ildiy, __imul_b, __indcall, __ineg, __inot, __ior, __irems, __iremu, __ishl, __ishl_b, __ishrs, __ishrs_b, __ishru, __ishru_b, __istix, __istiy, __itol, __ixor, __ladd, __ladd_b, __land, __lcmps, __lcmpu, __lcmpzero, __ldivs, __ldivu, __ldvrmu, __lldix, __lldiy, __lmuls, __lmulu, __lneg, __lnot, __lor, __lrems, __lremu, __lshl, __lshrs, __lshru, __lstix, __lstiy, __lsub, __ltof, __lxor, __sand, __scmpzero, __sdivs, __sdivu, __seqcase, __seqcaseD, __setflag, __sldix, __sldiy, __sneg, __snot, __sor, __srems, __sremu, __sshl, __sshl_b, __sshrs, __sshrs_b, __sshru, __sshru_b, __sstix, __sstiy, __stoi, __stoiu, __sxor, __ultof, _longjmp, _memchr, _memcmp, _memcpy, _memmove, _setjmp, ___sprintf, _sqrtf, _strcasecmp, _strcat, _strchr, _strcmp, _strcpy, _strcspn, _strlen, _strncat, _strncmp, _strncpy, _strpbrk, _strrchr, _strspn, _strstr, _strtok, _tolower, _toupper __bldiy := 0000FCh __bshl := 000100h __bshru := 000104h @@ -38,8 +38,6 @@ __idvrmu := 000144h __ildix := 000148h __ildiy := 00014Ch __imul_b := 000150h -__imuls := 000154h -__imulu := 000158h __indcall := 00015Ch __ineg := 000160h __inot := 000164h diff --git a/src/std/static/imulu.src b/src/std/static/imulu.src new file mode 100644 index 000000000..22acae1b7 --- /dev/null +++ b/src/std/static/imulu.src @@ -0,0 +1,55 @@ + assume adl=1 + + public __imuls +__imuls: + public __imulu +__imulu: + +; Multiplies UHL by UBC and returns the 24-bit product uhl. +; I: UBC=multiplier, UHL=multiplicand, ABL=1 +; O: a=UHL*UBC-L*C>>16, ubc=L*B, ude=L*C, uhl=UHL*UBC +; FO: sz(a), vc(UHL*UBC-L*C&0xFFFFFF)+L*C) +; CC: 43*r(PC)+9*r(SPL)+6*w(SPL)+25 +; ; a ubc ude uhl u(sp) +Mul_UHL_UBC_UHL: ;--------------+-------+-------+-----------------------+-------------------------------- + ld d,b + ld e,h ; de=BH + mlt de ; ude=H*B + ld a,e ; a=H*B + dec sp + push hl + push bc + inc sp ; ?(sp)=[HLU]H + ; ?(sp)=[BCU]B + pop de ; de=[BCU]B x + ld e,l ; de=[BCU]L + mlt de ; ude=L*BCU + add a,e ; a=H*B+L*BCU + pop de ; de=[HLU]H x + ld e,c ; de=[HLU]C + mlt de ; ude=HLU*C + add a,e ; a=HLU*C+H*B+L*BCU + ld d,c + ld e,l ; de=CL + ld l,d ; hl=HC + ld c,e ; bc=BL + mlt bc ; ubc=L*B + mlt de ; ude=L*C + mlt hl ; uhl=H*C + add hl,bc ; uhl=H*C+L*B + add a,h ; a=H*C+L*B>>8+HLU*C+H*B+L*BCU + ld h,a ; hl=H*C+L*B+(HLU*C+H*B+L*BCU<<8) + add hl,hl + add hl,hl + add hl,hl + add hl,hl + add hl,hl + add hl,hl + add hl,hl + add hl,hl ; uhl=(H*C+L*B<<8)+(HLU*C+H*B+L*BCU<<16) + add hl,de ; uhl=L*C+(H*C+L*B<<8)+(HLU*C+H*B+L*BCU<<16) + ret + +; (UHL*UBC)&0xFFFFFF=L*C+(H*C+L*B<<8)+(HLU*C+H*B+L*BCU<<16)+(HLU*B+H*BCU<<24)+(HLU*BCU<<32)&0xFFFFFF +; =(L*C&0xFFFFFF)+(H*C+L*B<<8&0xFFFFFF)+(HLU*C+H*B+L*BCU<<16&0xFFFFFF)+(HLU*B+H*BCU<<24&0xFFFFFF)+(HLU*BCU<<32&0xFFFFFF) +; =L*C+(H*C+L*B<<8)+(HLU*C+H*B+L*BCU<<16&0xFFFFFF) From 508fa7a2ab9df02a9e188c6584269909a279336a Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Tue, 1 Jun 2021 23:35:12 -0400 Subject: [PATCH 005/142] Rework doc in smulu.src and imulu.src --- src/std/static/imulu.src | 47 ++++++++++++++++++++-------------------- src/std/static/smulu.src | 17 +++++++-------- 2 files changed, 31 insertions(+), 33 deletions(-) diff --git a/src/std/static/imulu.src b/src/std/static/imulu.src index 22acae1b7..5d1f45931 100644 --- a/src/std/static/imulu.src +++ b/src/std/static/imulu.src @@ -8,37 +8,37 @@ __imulu: ; Multiplies UHL by UBC and returns the 24-bit product uhl. ; I: UBC=multiplier, UHL=multiplicand, ABL=1 ; O: a=UHL*UBC-L*C>>16, ubc=L*B, ude=L*C, uhl=UHL*UBC -; FO: sz(a), vc(UHL*UBC-L*C&0xFFFFFF)+L*C) +; FO: sz(a), vc((UHL*UBC-L*C&0xFFFFFF)+L*C) ; CC: 43*r(PC)+9*r(SPL)+6*w(SPL)+25 -; ; a ubc ude uhl u(sp) -Mul_UHL_UBC_UHL: ;--------------+-------+-------+-----------------------+-------------------------------- +; ; a ubc ude uhl u(sp) +Mul_UHL_UBC_UHL: ;--------------+---------------+---------------+---------------+---------------- ld d,b - ld e,h ; de=BH - mlt de ; ude=H*B + ld e,h ; de=BH + mlt de ; ude=H*B ld a,e ; a=H*B dec sp push hl push bc - inc sp ; ?(sp)=[HLU]H - ; ?(sp)=[BCU]B - pop de ; de=[BCU]B x - ld e,l ; de=[BCU]L - mlt de ; ude=L*BCU + inc sp ; (sp)=[HLU]H + ; (sp)=[BCU]B + pop de ; de=[BCU]B ------ + ld e,l ; de=[BCU]L + mlt de ; ude=L*BCU add a,e ; a=H*B+L*BCU - pop de ; de=[HLU]H x - ld e,c ; de=[HLU]C - mlt de ; ude=HLU*C + pop de ; de=[HLU]H ------ + ld e,c ; de=[HLU]C + mlt de ; ude=HLU*C add a,e ; a=HLU*C+H*B+L*BCU ld d,c - ld e,l ; de=CL - ld l,d ; hl=HC + ld e,l ; de=CL + ld l,d ; hl=HC ld c,e ; bc=BL mlt bc ; ubc=L*B - mlt de ; ude=L*C - mlt hl ; uhl=H*C - add hl,bc ; uhl=H*C+L*B - add a,h ; a=H*C+L*B>>8+HLU*C+H*B+L*BCU - ld h,a ; hl=H*C+L*B+(HLU*C+H*B+L*BCU<<8) + mlt de ; ude=L*C + mlt hl ; uhl=H*C + add hl,bc ; uhl=H*C+L*B + add a,h ; a=(H*C+L*B>>8)+HLU*C+H*B+L*BCU + ld h,a ; hl=H*C+L*B+(HLU*C+H*B+L*BCU<<8) add hl,hl add hl,hl add hl,hl @@ -46,10 +46,9 @@ Mul_UHL_UBC_UHL: ;--------------+-------+-------+-----------------------+------- add hl,hl add hl,hl add hl,hl - add hl,hl ; uhl=(H*C+L*B<<8)+(HLU*C+H*B+L*BCU<<16) - add hl,de ; uhl=L*C+(H*C+L*B<<8)+(HLU*C+H*B+L*BCU<<16) + add hl,hl ; uhl=(H*C+L*B<<8)+(HLU*C+H*B+L*BCU<<16) + add hl,de ; uhl=L*C+(H*C+L*B<<8)+(HLU*C+H*B+L*BCU<<16) ret ; (UHL*UBC)&0xFFFFFF=L*C+(H*C+L*B<<8)+(HLU*C+H*B+L*BCU<<16)+(HLU*B+H*BCU<<24)+(HLU*BCU<<32)&0xFFFFFF -; =(L*C&0xFFFFFF)+(H*C+L*B<<8&0xFFFFFF)+(HLU*C+H*B+L*BCU<<16&0xFFFFFF)+(HLU*B+H*BCU<<24&0xFFFFFF)+(HLU*BCU<<32&0xFFFFFF) -; =L*C+(H*C+L*B<<8)+(HLU*C+H*B+L*BCU<<16&0xFFFFFF) +; =L*C+(H*C+L*B<<8)+(HLU*C+H*B+L*BCU<<16)&0xFFFFFF diff --git a/src/std/static/smulu.src b/src/std/static/smulu.src index 07be79da3..f857a15bb 100644 --- a/src/std/static/smulu.src +++ b/src/std/static/smulu.src @@ -8,21 +8,20 @@ __smulu: ; O: a=h, ubc=H*C, ude=L*B, hl=HL*DE, hlu=0 ; FO: sz(h), p/v=?, c=? ; CC: 16*r(PC)+(ADL?3*r(SPL):2*r(SPS))+13 - ; a ubc ude uhl -Mul_HL_BC_HL: ;----------------------+-------+-------+------------------------ + ; a ubc ude uhl +Mul_HL_BC_HL: ;--------------+---------------+---------------+---------------- ld d,b ld e,l ; de=BL - ld b,h ; bc=HC - ld h,c ; hl=CL - mlt bc ; ubc=H*C + ld b,h ; bc=HC + ld h,c ; hl=CL + mlt bc ; ubc=H*C mlt de ; ude=L*B - mlt hl ; uhl=L*C + mlt hl ; uhl=L*C ld a,h ; a=L*C>>8 add a,c ; a=(L*C>>8)+H*C add a,e ; a=(L*C>>8)+H*C+L*B - ld h,a ; hl=L*C+((H*C)+(L*B)<<8) + ld h,a ; hl=L*C+((H*C)+(L*B)<<8) ret ; HL*BC&0xFFFF=C*L+(H*C+B*L<<8)+(H*B<<16)&0xFFFF -; =(C*L&0xFFFF)+(B*L+H*C<<8&0xFFFF)+(B*H<<16&0xFFFF) -; =C*L+(B*L+H*C<<8&0xFFFF) +; =C*L+(B*L+H*C<<8)&0xFFFF From 51130a77883f8ea4d4238fa04803c9b7f4e8cfea Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Wed, 2 Jun 2021 13:12:03 -0400 Subject: [PATCH 006/142] Mark static versions of __smuls and __smulu as "fast" --- .../{static/smulu.src => shared/smulu_fast.src} | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) rename src/std/{static/smulu.src => shared/smulu_fast.src} (61%) diff --git a/src/std/static/smulu.src b/src/std/shared/smulu_fast.src similarity index 61% rename from src/std/static/smulu.src rename to src/std/shared/smulu_fast.src index f857a15bb..1c2e526ad 100644 --- a/src/std/static/smulu.src +++ b/src/std/shared/smulu_fast.src @@ -1,15 +1,15 @@ - public __smuls -__smuls: - public __smulu -__smulu: + public __smuls_fast +__smuls_fast: + public __smulu_fast +__smulu_fast: ; Multiplies HL by BC and returns the 16-bit product hl. ; I: BC=multiplier, HL=multiplicand -; O: a=h, ubc=H*C, ude=L*B, hl=HL*DE, hlu=0 +; O: a=h, ubc=H*C, ude=L*B, hl=HL*BC, hlu=0 ; FO: sz(h), p/v=?, c=? ; CC: 16*r(PC)+(ADL?3*r(SPL):2*r(SPS))+13 ; a ubc ude uhl -Mul_HL_BC_HL: ;--------------+---------------+---------------+---------------- +Mul_HL_BC_HL_Fast: ;--------------+---------------+---------------+---------------- ld d,b ld e,l ; de=BL ld b,h ; bc=HC @@ -23,5 +23,5 @@ Mul_HL_BC_HL: ;--------------+---------------+---------------+---------------- ld h,a ; hl=L*C+((H*C)+(L*B)<<8) ret -; HL*BC&0xFFFF=C*L+(H*C+B*L<<8)+(H*B<<16)&0xFFFF -; =C*L+(B*L+H*C<<8)&0xFFFF +; HL*BC&0xFFFF=L*C+(H*C+L*B<<8)+(H*B<<16)&0xFFFF +; =L*C+(H*C+L*B<<8)&0xFFFF From 4dc950ec9f7ccaca6a3d1f967004c2889b676bf6 Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Wed, 2 Jun 2021 13:12:03 -0400 Subject: [PATCH 007/142] Reimplement proper static versions of __smuls and __smulu --- src/std/static/smulu.src | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 src/std/static/smulu.src diff --git a/src/std/static/smulu.src b/src/std/static/smulu.src new file mode 100644 index 000000000..925339872 --- /dev/null +++ b/src/std/static/smulu.src @@ -0,0 +1,30 @@ + public __smuls +__smuls: + public __smulu +__smulu: + +; Multiplies HL by BC and returns the 16-bit product hl. +; I: BC=multiplier, HL=multiplicand +; O: hl=HL*BC, hlu=ADL&&(L*C+(H*C+L*B<<8&0xFFFF)>>16) +; FO: c=!ADL&&(L*C+(H*C+L*B<<8&0xFFFF)>>16) +; CC: 20*r(PC)+(ADL?6*r(SPL)+3*w(SPL):4*r(SPS)+2*w(SPS))+13 + ; ude uhl u(sp) +Mul_HL_BC_HL: ;--------------+---------------+---------------- + push de ; u(sp)=UDE + ld d,h ; d =H + ld e,c ; de=HC + mlt de ; ude=H*C + ld d,l ; d=L + ld h,b ; hl=BL + mlt hl ; uhl=L*B + add hl,de ; uhl=(L<<8)+(H*C&0xFF)+L*B + ld h,l ; + ld l,0 ; uhl=H*C+L*B<<8&0xFFFF + ld e,c ; ude=LC + mlt de ; ude=L*C + add hl,de ; uhl=L*C+(H*C+L*B<<8&0xFFFF) + pop de ; ude=UDE ------ + ret + +; HL*BC&0xFFFF=L*C+(H*C+L*B<<8)+(H*B<<16)&0xFFFF +; =L*C+(H*C+L*B<<8)&0xFFFF From 76980b9baf3b8f14b01a5a561059a432014724d9 Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Wed, 2 Jun 2021 15:31:10 -0400 Subject: [PATCH 008/142] Mark static versions of __imuls and __imulu as "fast" --- .../{static/imulu.src => shared/imulu_fast.src} | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) rename src/std/{static/imulu.src => shared/imulu_fast.src} (83%) diff --git a/src/std/static/imulu.src b/src/std/shared/imulu_fast.src similarity index 83% rename from src/std/static/imulu.src rename to src/std/shared/imulu_fast.src index 5d1f45931..e8c968767 100644 --- a/src/std/static/imulu.src +++ b/src/std/shared/imulu_fast.src @@ -1,9 +1,9 @@ assume adl=1 - public __imuls -__imuls: - public __imulu -__imulu: + public __imuls_fast +__imuls_fast: + public __imulu_fast +__imulu_fast: ; Multiplies UHL by UBC and returns the 24-bit product uhl. ; I: UBC=multiplier, UHL=multiplicand, ABL=1 @@ -11,7 +11,7 @@ __imulu: ; FO: sz(a), vc((UHL*UBC-L*C&0xFFFFFF)+L*C) ; CC: 43*r(PC)+9*r(SPL)+6*w(SPL)+25 ; ; a ubc ude uhl u(sp) -Mul_UHL_UBC_UHL: ;--------------+---------------+---------------+---------------+---------------- +Mul_UHL_UBC_UHL_Fast: ;--------------+---------------+---------------+---------------+---------------- ld d,b ld e,h ; de=BH mlt de ; ude=H*B @@ -21,11 +21,11 @@ Mul_UHL_UBC_UHL: ;--------------+---------------+---------------+--------------- push bc inc sp ; (sp)=[HLU]H ; (sp)=[BCU]B - pop de ; de=[BCU]B ------ + pop de ; de=[BCU]B -- ld e,l ; de=[BCU]L mlt de ; ude=L*BCU add a,e ; a=H*B+L*BCU - pop de ; de=[HLU]H ------ + pop de ; de=[HLU]H -- ld e,c ; de=[HLU]C mlt de ; ude=HLU*C add a,e ; a=HLU*C+H*B+L*BCU From 76f23665ce8513c74a517d44b347194c34e4fa24 Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Wed, 2 Jun 2021 15:31:10 -0400 Subject: [PATCH 009/142] Reimplement proper static versions of __imuls and __imulu --- src/std/static/imulu.src | 60 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 src/std/static/imulu.src diff --git a/src/std/static/imulu.src b/src/std/static/imulu.src new file mode 100644 index 000000000..b8c072152 --- /dev/null +++ b/src/std/static/imulu.src @@ -0,0 +1,60 @@ + assume adl=1 + + public __imuls +__imuls: + public __imulu +__imulu: + +; Multiplies UHL by UBC and returns the 24-bit product uhl. +; I: UBC=multiplier, UHL=multiplicand, ABL=1 +; O: a=UHL*UBC-L*C>>16, ubc=L*B, ude=L*C, uhl=UHL*UBC +; FO: -- +; CC: 49*r(PC)+15*r(SPL)+12*w(SPL)+25 +; ; a ude uhl u(sp) +Mul_UHL_UBC_UHL: ;--------------+---------------+---------------+---------------- + push de ; u(sp)=UDE + push af ; u(sp)=AF + ld d,b + ld e,h ; de=BH + mlt de ; ude=H*B + ld a,e ; a=H*B + dec sp + push hl + push bc + inc sp ; (sp)=[HLU]H + ; (sp)=[BCU]B + pop de ; de=[BCU]B -- + ld e,l ; de=[BCU]L + mlt de ; ude=L*BCU + add a,e ; a=H*B+L*BCU + pop de ; de=[HLU]H -- + ld e,c ; de=[HLU]C + mlt de ; ude=HLU*C + add a,e ; a=HLU*C+H*B+L*BCU + ld e,l ; e=L + ld l,c ; hl=HC + mlt hl ; uhl=H*C + add a,h ; a=(H*C>>8)+HLU*C+H*B+L*BCU + ld h,a ; uhl=H*C+(HLU*C+H*B+L*BCU<<8)&0xFFFF + ld a,e ; a=L + ld d,b ; de=BL + mlt de ; ude=L*B + add hl,de ; uhl=L*B+(H*C+(HLU*C+H*B+L*BCU<<8)&0xFFFF) + ld d,c + ld e,a ; de=CL + mlt de ; ude=L*C + add hl,hl + add hl,hl + add hl,hl + add hl,hl + add hl,hl + add hl,hl + add hl,hl + add hl,hl ; uhl=(H*C+L*B<<8)+(HLU*C+H*B+L*BCU<<16) + add hl,de ; uhl=L*C+(H*C+L*B<<8)+(HLU*C+H*B+L*BCU<<16) + pop af ; a=A -- + pop de ; ude=UDE -- + ret + +; (UHL*UBC)&0xFFFFFF=L*C+(H*C+L*B<<8)+(HLU*C+H*B+L*BCU<<16)+(HLU*B+H*BCU<<24)+(HLU*BCU<<32)&0xFFFFFF +; =L*C+(H*C+L*B<<8)+(HLU*C+H*B+L*BCU<<16)&0xFFFFFF From 370c07f560007771598ffb3b48dbfb9e71ab71f2 Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Wed, 2 Jun 2021 15:50:57 -0400 Subject: [PATCH 010/142] Fix some docs --- src/std/shared/imulu_fast.src | 2 +- src/std/static/imulu.src | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/std/shared/imulu_fast.src b/src/std/shared/imulu_fast.src index e8c968767..a087b62aa 100644 --- a/src/std/shared/imulu_fast.src +++ b/src/std/shared/imulu_fast.src @@ -6,7 +6,7 @@ __imuls_fast: __imulu_fast: ; Multiplies UHL by UBC and returns the 24-bit product uhl. -; I: UBC=multiplier, UHL=multiplicand, ABL=1 +; I: UBC=multiplier, UHL=multiplicand, ADL=1 ; O: a=UHL*UBC-L*C>>16, ubc=L*B, ude=L*C, uhl=UHL*UBC ; FO: sz(a), vc((UHL*UBC-L*C&0xFFFFFF)+L*C) ; CC: 43*r(PC)+9*r(SPL)+6*w(SPL)+25 diff --git a/src/std/static/imulu.src b/src/std/static/imulu.src index b8c072152..f5afdf86c 100644 --- a/src/std/static/imulu.src +++ b/src/std/static/imulu.src @@ -6,8 +6,8 @@ __imuls: __imulu: ; Multiplies UHL by UBC and returns the 24-bit product uhl. -; I: UBC=multiplier, UHL=multiplicand, ABL=1 -; O: a=UHL*UBC-L*C>>16, ubc=L*B, ude=L*C, uhl=UHL*UBC +; I: UBC=multiplier, UHL=multiplicand, ADL=1 +; O: uhl=UHL*UBC ; FO: -- ; CC: 49*r(PC)+15*r(SPL)+12*w(SPL)+25 ; ; a ude uhl u(sp) From 41aaf2ce8a527d2aab688f1259c024660dc089d7 Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Wed, 2 Jun 2021 15:58:12 -0400 Subject: [PATCH 011/142] Reformat asm --- src/std/linked/linked.src | 16 +++--- src/std/shared/imulu_fast.src | 80 +++++++++++++++--------------- src/std/shared/smulu_fast.src | 30 ++++++------ src/std/static/imulu.src | 92 +++++++++++++++++------------------ src/std/static/smulu.src | 36 +++++++------- 5 files changed, 127 insertions(+), 127 deletions(-) diff --git a/src/std/linked/linked.src b/src/std/linked/linked.src index 8f9c7fbc4..e9be7f851 100644 --- a/src/std/linked/linked.src +++ b/src/std/linked/linked.src @@ -54,11 +54,11 @@ _tan := 022120h _tanh := 022138h ; integer math - public __imuls -__imuls := 0000154h - public __imulu -__imulu := 0000158h - public __smuls -__smuls := 0000224h - public __smulu -__smulu := 0000228h + public __imuls +__imuls := 0000154h + public __imulu +__imulu := 0000158h + public __smuls +__smuls := 0000224h + public __smulu +__smulu := 0000228h diff --git a/src/std/shared/imulu_fast.src b/src/std/shared/imulu_fast.src index a087b62aa..2f21bef5c 100644 --- a/src/std/shared/imulu_fast.src +++ b/src/std/shared/imulu_fast.src @@ -1,8 +1,8 @@ assume adl=1 - public __imuls_fast + public __imuls_fast __imuls_fast: - public __imulu_fast + public __imulu_fast __imulu_fast: ; Multiplies UHL by UBC and returns the 24-bit product uhl. @@ -10,44 +10,44 @@ __imulu_fast: ; O: a=UHL*UBC-L*C>>16, ubc=L*B, ude=L*C, uhl=UHL*UBC ; FO: sz(a), vc((UHL*UBC-L*C&0xFFFFFF)+L*C) ; CC: 43*r(PC)+9*r(SPL)+6*w(SPL)+25 -; ; a ubc ude uhl u(sp) -Mul_UHL_UBC_UHL_Fast: ;--------------+---------------+---------------+---------------+---------------- - ld d,b - ld e,h ; de=BH - mlt de ; ude=H*B - ld a,e ; a=H*B - dec sp - push hl - push bc - inc sp ; (sp)=[HLU]H - ; (sp)=[BCU]B - pop de ; de=[BCU]B -- - ld e,l ; de=[BCU]L - mlt de ; ude=L*BCU - add a,e ; a=H*B+L*BCU - pop de ; de=[HLU]H -- - ld e,c ; de=[HLU]C - mlt de ; ude=HLU*C - add a,e ; a=HLU*C+H*B+L*BCU - ld d,c - ld e,l ; de=CL - ld l,d ; hl=HC - ld c,e ; bc=BL - mlt bc ; ubc=L*B - mlt de ; ude=L*C - mlt hl ; uhl=H*C - add hl,bc ; uhl=H*C+L*B - add a,h ; a=(H*C+L*B>>8)+HLU*C+H*B+L*BCU - ld h,a ; hl=H*C+L*B+(HLU*C+H*B+L*BCU<<8) - add hl,hl - add hl,hl - add hl,hl - add hl,hl - add hl,hl - add hl,hl - add hl,hl - add hl,hl ; uhl=(H*C+L*B<<8)+(HLU*C+H*B+L*BCU<<16) - add hl,de ; uhl=L*C+(H*C+L*B<<8)+(HLU*C+H*B+L*BCU<<16) +; ; a ubc ude uhl u(sp) +Mul_UHL_UBC_UHL_Fast: ;--------------+---------------+---------------+---------------+---------------- + ld d, b + ld e, h ; de=BH + mlt de ; ude=H*B + ld a, e ; a=H*B + dec sp + push hl + push bc + inc sp ; (sp)=[HLU]H + ; (sp)=[BCU]B + pop de ; de=[BCU]B -- + ld e, l ; de=[BCU]L + mlt de ; ude=L*BCU + add a, e ; a=H*B+L*BCU + pop de ; de=[HLU]H -- + ld e, c ; de=[HLU]C + mlt de ; ude=HLU*C + add a, e ; a=HLU*C+H*B+L*BCU + ld d, c + ld e, l ; de=CL + ld l, d ; hl=HC + ld c, e ; bc=BL + mlt bc ; ubc=L*B + mlt de ; ude=L*C + mlt hl ; uhl=H*C + add hl, bc ; uhl=H*C+L*B + add a, h ; a=(H*C+L*B>>8)+HLU*C+H*B+L*BCU + ld h, a ; hl=H*C+L*B+(HLU*C+H*B+L*BCU<<8) + add hl, hl + add hl, hl + add hl, hl + add hl, hl + add hl, hl + add hl, hl + add hl, hl + add hl, hl ; uhl=(H*C+L*B<<8)+(HLU*C+H*B+L*BCU<<16) + add hl, de ; uhl=L*C+(H*C+L*B<<8)+(HLU*C+H*B+L*BCU<<16) ret ; (UHL*UBC)&0xFFFFFF=L*C+(H*C+L*B<<8)+(HLU*C+H*B+L*BCU<<16)+(HLU*B+H*BCU<<24)+(HLU*BCU<<32)&0xFFFFFF diff --git a/src/std/shared/smulu_fast.src b/src/std/shared/smulu_fast.src index 1c2e526ad..4a1c286a4 100644 --- a/src/std/shared/smulu_fast.src +++ b/src/std/shared/smulu_fast.src @@ -1,6 +1,6 @@ - public __smuls_fast + public __smuls_fast __smuls_fast: - public __smulu_fast + public __smulu_fast __smulu_fast: ; Multiplies HL by BC and returns the 16-bit product hl. @@ -8,19 +8,19 @@ __smulu_fast: ; O: a=h, ubc=H*C, ude=L*B, hl=HL*BC, hlu=0 ; FO: sz(h), p/v=?, c=? ; CC: 16*r(PC)+(ADL?3*r(SPL):2*r(SPS))+13 - ; a ubc ude uhl -Mul_HL_BC_HL_Fast: ;--------------+---------------+---------------+---------------- - ld d,b - ld e,l ; de=BL - ld b,h ; bc=HC - ld h,c ; hl=CL - mlt bc ; ubc=H*C - mlt de ; ude=L*B - mlt hl ; uhl=L*C - ld a,h ; a=L*C>>8 - add a,c ; a=(L*C>>8)+H*C - add a,e ; a=(L*C>>8)+H*C+L*B - ld h,a ; hl=L*C+((H*C)+(L*B)<<8) + ; a ubc ude uhl +Mul_HL_BC_HL_Fast: ;--------------+---------------+---------------+---------------- + ld d, b + ld e, l ; de=BL + ld b, h ; bc=HC + ld h, c ; hl=CL + mlt bc ; ubc=H*C + mlt de ; ude=L*B + mlt hl ; uhl=L*C + ld a, h ; a=L*C>>8 + add a, c ; a=(L*C>>8)+H*C + add a, e ; a=(L*C>>8)+H*C+L*B + ld h, a ; hl=L*C+((H*C)+(L*B)<<8) ret ; HL*BC&0xFFFF=L*C+(H*C+L*B<<8)+(H*B<<16)&0xFFFF diff --git a/src/std/static/imulu.src b/src/std/static/imulu.src index f5afdf86c..f29cb6a09 100644 --- a/src/std/static/imulu.src +++ b/src/std/static/imulu.src @@ -1,8 +1,8 @@ assume adl=1 - public __imuls + public __imuls __imuls: - public __imulu + public __imulu __imulu: ; Multiplies UHL by UBC and returns the 24-bit product uhl. @@ -10,50 +10,50 @@ __imulu: ; O: uhl=UHL*UBC ; FO: -- ; CC: 49*r(PC)+15*r(SPL)+12*w(SPL)+25 -; ; a ude uhl u(sp) -Mul_UHL_UBC_UHL: ;--------------+---------------+---------------+---------------- - push de ; u(sp)=UDE - push af ; u(sp)=AF - ld d,b - ld e,h ; de=BH - mlt de ; ude=H*B - ld a,e ; a=H*B - dec sp - push hl - push bc - inc sp ; (sp)=[HLU]H - ; (sp)=[BCU]B - pop de ; de=[BCU]B -- - ld e,l ; de=[BCU]L - mlt de ; ude=L*BCU - add a,e ; a=H*B+L*BCU - pop de ; de=[HLU]H -- - ld e,c ; de=[HLU]C - mlt de ; ude=HLU*C - add a,e ; a=HLU*C+H*B+L*BCU - ld e,l ; e=L - ld l,c ; hl=HC - mlt hl ; uhl=H*C - add a,h ; a=(H*C>>8)+HLU*C+H*B+L*BCU - ld h,a ; uhl=H*C+(HLU*C+H*B+L*BCU<<8)&0xFFFF - ld a,e ; a=L - ld d,b ; de=BL - mlt de ; ude=L*B - add hl,de ; uhl=L*B+(H*C+(HLU*C+H*B+L*BCU<<8)&0xFFFF) - ld d,c - ld e,a ; de=CL - mlt de ; ude=L*C - add hl,hl - add hl,hl - add hl,hl - add hl,hl - add hl,hl - add hl,hl - add hl,hl - add hl,hl ; uhl=(H*C+L*B<<8)+(HLU*C+H*B+L*BCU<<16) - add hl,de ; uhl=L*C+(H*C+L*B<<8)+(HLU*C+H*B+L*BCU<<16) - pop af ; a=A -- - pop de ; ude=UDE -- +; ; a ude uhl u(sp) +Mul_UHL_UBC_UHL: ;--------------+---------------+---------------+---------------- + push de ; u(sp)=UDE + push af ; u(sp)=AF + ld d, b + ld e, h ; de=BH + mlt de ; ude=H*B + ld a, e ; a=H*B + dec sp + push hl + push bc + inc sp ; (sp)=[HLU]H + ; (sp)=[BCU]B + pop de ; de=[BCU]B -- + ld e, l ; de=[BCU]L + mlt de ; ude=L*BCU + add a, e ; a=H*B+L*BCU + pop de ; de=[HLU]H -- + ld e, c ; de=[HLU]C + mlt de ; ude=HLU*C + add a, e ; a=HLU*C+H*B+L*BCU + ld e, l ; e=L + ld l, c ; hl=HC + mlt hl ; uhl=H*C + add a, h ; a=(H*C>>8)+HLU*C+H*B+L*BCU + ld h, a ; uhl=H*C+(HLU*C+H*B+L*BCU<<8)&0xFFFF + ld a, e ; a=L + ld d, b ; de=BL + mlt de ; ude=L*B + add hl, de ; uhl=L*B+(H*C+(HLU*C+H*B+L*BCU<<8)&0xFFFF) + ld d, c + ld e, a ; de=CL + mlt de ; ude=L*C + add hl, hl + add hl, hl + add hl, hl + add hl, hl + add hl, hl + add hl, hl + add hl, hl + add hl, hl ; uhl=(H*C+L*B<<8)+(HLU*C+H*B+L*BCU<<16) + add hl, de ; uhl=L*C+(H*C+L*B<<8)+(HLU*C+H*B+L*BCU<<16) + pop af ; a=A -- + pop de ; ude=UDE -- ret ; (UHL*UBC)&0xFFFFFF=L*C+(H*C+L*B<<8)+(HLU*C+H*B+L*BCU<<16)+(HLU*B+H*BCU<<24)+(HLU*BCU<<32)&0xFFFFFF diff --git a/src/std/static/smulu.src b/src/std/static/smulu.src index 925339872..9f604a156 100644 --- a/src/std/static/smulu.src +++ b/src/std/static/smulu.src @@ -1,6 +1,6 @@ - public __smuls + public __smuls __smuls: - public __smulu + public __smulu __smulu: ; Multiplies HL by BC and returns the 16-bit product hl. @@ -8,22 +8,22 @@ __smulu: ; O: hl=HL*BC, hlu=ADL&&(L*C+(H*C+L*B<<8&0xFFFF)>>16) ; FO: c=!ADL&&(L*C+(H*C+L*B<<8&0xFFFF)>>16) ; CC: 20*r(PC)+(ADL?6*r(SPL)+3*w(SPL):4*r(SPS)+2*w(SPS))+13 - ; ude uhl u(sp) -Mul_HL_BC_HL: ;--------------+---------------+---------------- - push de ; u(sp)=UDE - ld d,h ; d =H - ld e,c ; de=HC - mlt de ; ude=H*C - ld d,l ; d=L - ld h,b ; hl=BL - mlt hl ; uhl=L*B - add hl,de ; uhl=(L<<8)+(H*C&0xFF)+L*B - ld h,l ; - ld l,0 ; uhl=H*C+L*B<<8&0xFFFF - ld e,c ; ude=LC - mlt de ; ude=L*C - add hl,de ; uhl=L*C+(H*C+L*B<<8&0xFFFF) - pop de ; ude=UDE ------ + ; ude uhl u(sp) +Mul_HL_BC_HL: ;--------------+---------------+---------------- + push de ; u(sp)=UDE + ld d, h ; d =H + ld e, c ; de=HC + mlt de ; ude=H*C + ld d, l ; d=L + ld h, b ; hl=BL + mlt hl ; uhl=L*B + add hl, de ; uhl=(L<<8)+(H*C&0xFF)+L*B + ld h, l + ld l, 0 ; uhl=H*C+L*B<<8&0xFFFF + ld e, c ; ude=LC + mlt de ; ude=L*C + add hl, de ; uhl=L*C+(H*C+L*B<<8&0xFFFF) + pop de ; ude=UDE ------ ret ; HL*BC&0xFFFF=L*C+(H*C+L*B<<8)+(H*B<<16)&0xFFFF From 0e7425dea7796fb89fcd9ece62746602a57d1435 Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Wed, 2 Jun 2021 16:03:01 -0400 Subject: [PATCH 012/142] This looks nicer --- src/std/shared/imulu_fast.src | 2 +- src/std/static/imulu.src | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/std/shared/imulu_fast.src b/src/std/shared/imulu_fast.src index 2f21bef5c..99ee2750f 100644 --- a/src/std/shared/imulu_fast.src +++ b/src/std/shared/imulu_fast.src @@ -11,7 +11,7 @@ __imulu_fast: ; FO: sz(a), vc((UHL*UBC-L*C&0xFFFFFF)+L*C) ; CC: 43*r(PC)+9*r(SPL)+6*w(SPL)+25 ; ; a ubc ude uhl u(sp) -Mul_UHL_UBC_UHL_Fast: ;--------------+---------------+---------------+---------------+---------------- +Mul_UHL_UBC_UHL_Fast: ;--------------+---------------+---------------+---------------+------------------------ ld d, b ld e, h ; de=BH mlt de ; ude=H*B diff --git a/src/std/static/imulu.src b/src/std/static/imulu.src index f29cb6a09..76c7800bd 100644 --- a/src/std/static/imulu.src +++ b/src/std/static/imulu.src @@ -11,7 +11,7 @@ __imulu: ; FO: -- ; CC: 49*r(PC)+15*r(SPL)+12*w(SPL)+25 ; ; a ude uhl u(sp) -Mul_UHL_UBC_UHL: ;--------------+---------------+---------------+---------------- +Mul_UHL_UBC_UHL: ;--------------+---------------+---------------+---------------------------------------- push de ; u(sp)=UDE push af ; u(sp)=AF ld d, b From 857a45c5d863849c17d7602d700755e15bc56430 Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Wed, 2 Jun 2021 16:16:36 -0400 Subject: [PATCH 013/142] Implement __lmuls_fast and __lmulu_fast --- src/std/shared/lmulu_fast.src | 82 +++++++++++++++++++++++++++++++++++ 1 file changed, 82 insertions(+) create mode 100644 src/std/shared/lmulu_fast.src diff --git a/src/std/shared/lmulu_fast.src b/src/std/shared/lmulu_fast.src new file mode 100644 index 000000000..d426c5c70 --- /dev/null +++ b/src/std/shared/lmulu_fast.src @@ -0,0 +1,82 @@ + assume adl=1 + + public __lmuls_fast +__lmuls_fast: + public __lmulu_fast +__lmulu_fast: + +; Multiplies EUHL by AUBC and returns the 32-bit product euhl. +; I: AUBC=multiplier, EUHL=multiplicand, ADL=1 +; O: a=EUHL*AUBC-L*C>>24, ubc=L*C, deu=0, d=H*C>>8, euhl=EUHL*AUBC +; FO: sz(e), vc((EUHL*AUBC-L*C&0xFFFFFFFF)+L*C) +; CC: (86+2*cf)*r(PC)+9*w(SPL)+9*r(SPL)+42 +; ; a ubc ude uhl uix u(sp) +Mul_EUHL_AUBC_EUHL_Fast: ;--------------+---------------+---------------+---------------+---------------+------------------------ + push ix ; u(sp)=UIX + ld d, c ; de=CE + mlt de ; ude=E*C + ld d, a ; d =A + ld a, e ; a=E*C + ld e, l ; de=AL + ld ixl, e ; ixl=L + mlt de ; ude=L*A + add a, e ; a=E*C+L*A + dec sp + push bc + inc sp + inc sp + push hl + inc sp + inc sp ; (sp)=[BCU][HLU] + ld d, h ; d =H + ld ixh, d ; ix=HL + pop hl ; hl=[BCU][HLU] -- + ld e, h ; de=H[BCU] + mlt de ; ude=H*BCU + add a, e ; a=E*C+H*BCU+L*A + ld d, b ; d =B + ld e, l ; de=B[HLU] + mlt de ; ude=HLU*B + add a, e ; a=E*C+HLU*B+H*BCU+L*A + ld d, h ; d =BCU + ld e, ixl ; de=[BCU]L + ld h, c ; hl=C[HLU] + mlt de ; ude=L*BCU + mlt hl ; uhl=HLU*C + add hl, de ; uhl=HLU*C+L*BCU + ld d, b ; d =B + ld e, ixh ; de=BH + mlt de ; ude=H*B + add hl, de ; uhl=HLU*C+H*B+L*BCU + add a, h ; a=(HLU*C+H*B+L*BCU>>8)+E*C+HLU*B+H*BCU+L*A + ld h, l ; h =HLU*C+H*B+L*BCU + ld l, 0 ; hl=HLU*C+H*B+L*BCU<<8 + ld d, b ; d =B + ld e, ixl ; de=BL + mlt de ; ude=L*B + add.s hl, de ; uhl=L*B+(HLU*C+H*B+L*BCU<<8)&0xFFFF + adc a, 0 ; a=L*B+(HLU*C+H*B+L*BCU<<8)+(E*C+HLU*B+H*BCU+L*A<<16)>>16 + ld d, ixh ; d =H + ld e, c ; de=HC + mlt de ; ude=H*C + add hl, de ; uhl=H*C+(L*B+(HLU*C+H*B+L*BCU<<8)&0xFFFF) + add hl, hl + add hl, hl + add hl, hl + add hl, hl + add hl, hl + add hl, hl + add hl, hl + add hl, hl ; uhl=(H*C+L*B<<8)+(HLU*C+H*B+L*BCU<<16) + adc a, l ; a=H*C+L*B+(HLU*C+H*B+L*BCU<<8)+(E*C+HLU*B+H*BCU+L*A<<16)>>16 + ld b, ixl ; bc =LC + mlt bc ; ubc=L*C + add hl, bc ; uhl=L*C+(H*C+L*B<<8)+(HLU*C+H*B+L*BCU<<16) + ld e, a ; e=H*C+L*B+(HLU*C+H*B+L*BCU<<8)+(E*C+HLU*B+H*BCU+L*A<<16)>>16 + pop ix ; uix=UIX -- + ret nc ; nc => e=L*C+(H*C+L*B<<8)+(HLU*C+H*B+L*BCU<<16)+(E*C+HLU*B+H*BCU+L*A<<24)>>24 + inc e ; e=L*C+(H*C+L*B<<8)+(HLU*C+H*B+L*BCU<<16)+(E*C+HLU*B+H*BCU+L*A<<24)>>24 + ret + +; (EUHL*AUBC)&0xFFFFFFFF=L*C+(H*C+L*B<<8)+(HLU*C+H*B+L*BCU<<16)+(E*C+HLU*B+H*BCU+L*A<<24)+(E*B+HLU*BCU+H*A<<32)+(E*BCU+HLU*A<<40)+(E*A<<48)&0xFFFFFFFF +; =L*C+(H*C+L*B<<8)+(HLU*C+H*B+L*BCU<<16)+(E*C+HLU*B+H*BCU+L*A<<24)&0xFFFFFFFF From 9326b145ee2353c361943d8f27b7a72e59f66496 Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Wed, 2 Jun 2021 18:21:10 -0400 Subject: [PATCH 014/142] Provide optimized static versions of __lmuls and __lmulu --- src/std/linked/linked.src | 4 ++ src/std/shared/lmulu_fast.src | 4 +- src/std/shared/shared.src | 2 +- src/std/static/imulu.src | 14 +++--- src/std/static/lmulu.src | 86 +++++++++++++++++++++++++++++++++++ 5 files changed, 100 insertions(+), 10 deletions(-) create mode 100644 src/std/static/lmulu.src diff --git a/src/std/linked/linked.src b/src/std/linked/linked.src index e9be7f851..8175d390d 100644 --- a/src/std/linked/linked.src +++ b/src/std/linked/linked.src @@ -58,6 +58,10 @@ _tanh := 022138h __imuls := 0000154h public __imulu __imulu := 0000158h + public __lmuls +__lmuls := 00001C8h + public __lmulu +__lmulu := 00001CCh public __smuls __smuls := 0000224h public __smulu diff --git a/src/std/shared/lmulu_fast.src b/src/std/shared/lmulu_fast.src index d426c5c70..4d827a4da 100644 --- a/src/std/shared/lmulu_fast.src +++ b/src/std/shared/lmulu_fast.src @@ -18,7 +18,7 @@ Mul_EUHL_AUBC_EUHL_Fast: ;--------------+---------------+---------------+------- ld d, a ; d =A ld a, e ; a=E*C ld e, l ; de=AL - ld ixl, e ; ixl=L + ld ixl, e ; ixl=L mlt de ; ude=L*A add a, e ; a=E*C+L*A dec sp @@ -72,8 +72,8 @@ Mul_EUHL_AUBC_EUHL_Fast: ;--------------+---------------+---------------+------- ld b, ixl ; bc =LC mlt bc ; ubc=L*C add hl, bc ; uhl=L*C+(H*C+L*B<<8)+(HLU*C+H*B+L*BCU<<16) - ld e, a ; e=H*C+L*B+(HLU*C+H*B+L*BCU<<8)+(E*C+HLU*B+H*BCU+L*A<<16)>>16 pop ix ; uix=UIX -- + ld e, a ; e=H*C+L*B+(HLU*C+H*B+L*BCU<<8)+(E*C+HLU*B+H*BCU+L*A<<16)>>16 ret nc ; nc => e=L*C+(H*C+L*B<<8)+(HLU*C+H*B+L*BCU<<16)+(E*C+HLU*B+H*BCU+L*A<<24)>>24 inc e ; e=L*C+(H*C+L*B<<8)+(HLU*C+H*B+L*BCU<<16)+(E*C+HLU*B+H*BCU+L*A<<24)>>24 ret diff --git a/src/std/shared/shared.src b/src/std/shared/shared.src index 070b28bdb..2d5a65a29 100644 --- a/src/std/shared/shared.src +++ b/src/std/shared/shared.src @@ -1,4 +1,4 @@ - public __bldiy, __bshl, __bshru, __bstix, __bstiy, __case, __case16, __case16D, __case24, __case24D, __case8, __case8D, __fadd, __fcmp, __fdiv, __fmul, __fppack, __frbtof, __frftob, __frftoi, __frftos, __frftoub, __frftoui, __frftous, __fritof, __frstof, __frubtof, __fruitof, __frustof, __fsub, __ftol, __iand, __icmpzero, __idivs, __idivu, __idvrmu, __ildix, __ildiy, __imul_b, __indcall, __ineg, __inot, __ior, __irems, __iremu, __ishl, __ishl_b, __ishrs, __ishrs_b, __ishru, __ishru_b, __istix, __istiy, __itol, __ixor, __ladd, __ladd_b, __land, __lcmps, __lcmpu, __lcmpzero, __ldivs, __ldivu, __ldvrmu, __lldix, __lldiy, __lmuls, __lmulu, __lneg, __lnot, __lor, __lrems, __lremu, __lshl, __lshrs, __lshru, __lstix, __lstiy, __lsub, __ltof, __lxor, __sand, __scmpzero, __sdivs, __sdivu, __seqcase, __seqcaseD, __setflag, __sldix, __sldiy, __sneg, __snot, __sor, __srems, __sremu, __sshl, __sshl_b, __sshrs, __sshrs_b, __sshru, __sshru_b, __sstix, __sstiy, __stoi, __stoiu, __sxor, __ultof, _longjmp, _memchr, _memcmp, _memcpy, _memmove, _setjmp, ___sprintf, _sqrtf, _strcasecmp, _strcat, _strchr, _strcmp, _strcpy, _strcspn, _strlen, _strncat, _strncmp, _strncpy, _strpbrk, _strrchr, _strspn, _strstr, _strtok, _tolower, _toupper + public __bldiy, __bshl, __bshru, __bstix, __bstiy, __case, __case16, __case16D, __case24, __case24D, __case8, __case8D, __fadd, __fcmp, __fdiv, __fmul, __fppack, __frbtof, __frftob, __frftoi, __frftos, __frftoub, __frftoui, __frftous, __fritof, __frstof, __frubtof, __fruitof, __frustof, __fsub, __ftol, __iand, __icmpzero, __idivs, __idivu, __idvrmu, __ildix, __ildiy, __imul_b, __indcall, __ineg, __inot, __ior, __irems, __iremu, __ishl, __ishl_b, __ishrs, __ishrs_b, __ishru, __ishru_b, __istix, __istiy, __itol, __ixor, __ladd, __ladd_b, __land, __lcmps, __lcmpu, __lcmpzero, __ldivs, __ldivu, __ldvrmu, __lldix, __lldiy, __lneg, __lnot, __lor, __lrems, __lremu, __lshl, __lshrs, __lshru, __lstix, __lstiy, __lsub, __ltof, __lxor, __sand, __scmpzero, __sdivs, __sdivu, __seqcase, __seqcaseD, __setflag, __sldix, __sldiy, __sneg, __snot, __sor, __srems, __sremu, __sshl, __sshl_b, __sshrs, __sshrs_b, __sshru, __sshru_b, __sstix, __sstiy, __stoi, __stoiu, __sxor, __ultof, _longjmp, _memchr, _memcmp, _memcpy, _memmove, _setjmp, ___sprintf, _sqrtf, _strcasecmp, _strcat, _strchr, _strcmp, _strcpy, _strcspn, _strlen, _strncat, _strncmp, _strncpy, _strpbrk, _strrchr, _strspn, _strstr, _strtok, _tolower, _toupper __bldiy := 0000FCh __bshl := 000100h __bshru := 000104h diff --git a/src/std/static/imulu.src b/src/std/static/imulu.src index 76c7800bd..764304e53 100644 --- a/src/std/static/imulu.src +++ b/src/std/static/imulu.src @@ -12,8 +12,8 @@ __imulu: ; CC: 49*r(PC)+15*r(SPL)+12*w(SPL)+25 ; ; a ude uhl u(sp) Mul_UHL_UBC_UHL: ;--------------+---------------+---------------+---------------------------------------- - push de ; u(sp)=UDE - push af ; u(sp)=AF + push af ; u(sp)=AF + push de ; u(sp)=UDE ld d, b ld e, h ; de=BH mlt de ; ude=H*B @@ -40,9 +40,6 @@ Mul_UHL_UBC_UHL: ;--------------+---------------+---------------+-------------- ld d, b ; de=BL mlt de ; ude=L*B add hl, de ; uhl=L*B+(H*C+(HLU*C+H*B+L*BCU<<8)&0xFFFF) - ld d, c - ld e, a ; de=CL - mlt de ; ude=L*C add hl, hl add hl, hl add hl, hl @@ -51,9 +48,12 @@ Mul_UHL_UBC_UHL: ;--------------+---------------+---------------+-------------- add hl, hl add hl, hl add hl, hl ; uhl=(H*C+L*B<<8)+(HLU*C+H*B+L*BCU<<16) + ld d, a ; d =L + ld e, c ; de=LC + mlt de ; ude=L*C add hl, de ; uhl=L*C+(H*C+L*B<<8)+(HLU*C+H*B+L*BCU<<16) - pop af ; a=A -- - pop de ; ude=UDE -- + pop de ; ude=UDE -- + pop af ; a=A -- ret ; (UHL*UBC)&0xFFFFFF=L*C+(H*C+L*B<<8)+(HLU*C+H*B+L*BCU<<16)+(HLU*B+H*BCU<<24)+(HLU*BCU<<32)&0xFFFFFF diff --git a/src/std/static/lmulu.src b/src/std/static/lmulu.src new file mode 100644 index 000000000..fb199fd60 --- /dev/null +++ b/src/std/static/lmulu.src @@ -0,0 +1,86 @@ + assume adl=1 + + public __lmuls +__lmuls: + public __lmulu +__lmulu: + +; Multiplies EUHL by AUBC and returns the 32-bit product euhl. +; I: AUBC=multiplier, EUHL=multiplicand, ADL=1 +; O: euhl=EUHL*AUBC +; FO: -- +; CC: 93*r(PC)+15*w(SPL)+15*r(SPL)+41 +; ; a ude uhl uix u(sp) +Mul_EUHL_AUBC_EUHL: ;--------------+---------------+---------------+---------------+---------------------------------------- + push af ; u(sp)=AF + push de ; u(sp)=UDE + push ix ; u(sp)=UIX + ld d, c ; de=CE + mlt de ; ude=E*C + ld d, a ; d =A + ld a, e ; a=E*C + ld e, l ; de=AL + ld ixl, e ; ixl=L + mlt de ; ude=L*A + add a, e ; a=E*C+L*A + dec sp + push bc + inc sp + inc sp + push hl + inc sp + inc sp ; (sp)=[BCU][HLU] + ld d, h ; d =H + ld ixh, d ; ix=HL + pop hl ; hl=[BCU][HLU] -- + ld e, h ; de=H[BCU] + mlt de ; ude=H*BCU + add a, e ; a=E*C+H*BCU+L*A + ld d, b ; d =B + ld e, l ; de=B[HLU] + mlt de ; ude=HLU*B + add a, e ; a=E*C+HLU*B+H*BCU+L*A + ld d, h ; d =BCU + ld e, ixl ; de=[BCU]L + ld h, c ; hl=C[HLU] + mlt de ; ude=L*BCU + mlt hl ; uhl=HLU*C + add hl, de ; uhl=HLU*C+L*BCU + ld d, b ; d =B + ld e, ixh ; de=BH + mlt de ; ude=H*B + add hl, de ; uhl=HLU*C+H*B+L*BCU + add a, h ; a=(HLU*C+H*B+L*BCU>>8)+E*C+HLU*B+H*BCU+L*A + ld h, l ; h =HLU*C+H*B+L*BCU + ld l, 0 ; hl=HLU*C+H*B+L*BCU<<8 + ld d, b ; d =B + ld e, ixl ; de=BL + mlt de ; ude=L*B + add.s hl, de ; uhl=L*B+(HLU*C+H*B+L*BCU<<8)&0xFFFF + adc a, 0 ; a=L*B+(HLU*C+H*B+L*BCU<<8)+(E*C+HLU*B+H*BCU+L*A<<16)>>16 + ld d, ixh ; d =H + ld e, c ; de=HC + mlt de ; ude=H*C + add hl, de ; uhl=H*C+(L*B+(HLU*C+H*B+L*BCU<<8)&0xFFFF) + add hl, hl + add hl, hl + add hl, hl + add hl, hl + add hl, hl + add hl, hl + add hl, hl + add hl, hl ; uhl=(H*C+L*B<<8)+(HLU*C+H*B+L*BCU<<16) + adc a, l ; a=H*C+L*B+(HLU*C+H*B+L*BCU<<8)+(E*C+HLU*B+H*BCU+L*A<<16)>>16 + ld d, ixl ; d =L + ld e, c ; de=LC + mlt de ; ude=L*C + add hl, de ; uhl=L*C+(H*C+L*B<<8)+(HLU*C+H*B+L*BCU<<16) + adc a, 0 ; a=L*C+(H*C+L*B<<8)+(HLU*C+H*B+L*BCU<<16)+(E*C+HLU*B+H*BCU+L*A<<24)>>24 + pop ix ; uix=UIX -- + pop de ; ude=UDE -- + ld e, a ; e=H*C+L*B+(HLU*C+H*B+L*BCU<<8)+(E*C+HLU*B+H*BCU+L*A<<16)>>16 + pop af ; a=A -- + ret + +; (EUHL*AUBC)&0xFFFFFFFF=L*C+(H*C+L*B<<8)+(HLU*C+H*B+L*BCU<<16)+(E*C+HLU*B+H*BCU+L*A<<24)+(E*B+HLU*BCU+H*A<<32)+(E*BCU+HLU*A<<40)+(E*A<<48)&0xFFFFFFFF +; =L*C+(H*C+L*B<<8)+(HLU*C+H*B+L*BCU<<16)+(E*C+HLU*B+H*BCU+L*A<<24)&0xFFFFFFFF From 36ebb4b864bcb801e16d8a9e4cf203b258d55c2a Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Wed, 2 Jun 2021 22:09:53 -0400 Subject: [PATCH 015/142] Provide optimized versions of __sand, __sor, and __sxor --- src/std/linked/linked.src | 18 ++++++++++++------ src/std/shared/sand_fast.src | 9 +++++++++ src/std/shared/shared.src | 5 +---- src/std/shared/sor_fast.src | 9 +++++++++ src/std/shared/sxor_fast.src | 9 +++++++++ src/std/static/sand.src | 11 +++++++++++ src/std/static/sor.src | 11 +++++++++++ src/std/static/sxor.src | 11 +++++++++++ 8 files changed, 73 insertions(+), 10 deletions(-) create mode 100644 src/std/shared/sand_fast.src create mode 100644 src/std/shared/sor_fast.src create mode 100644 src/std/shared/sxor_fast.src create mode 100644 src/std/static/sand.src create mode 100644 src/std/static/sor.src create mode 100644 src/std/static/sxor.src diff --git a/src/std/linked/linked.src b/src/std/linked/linked.src index 8175d390d..c2125fa88 100644 --- a/src/std/linked/linked.src +++ b/src/std/linked/linked.src @@ -55,14 +55,20 @@ _tanh := 022138h ; integer math public __imuls -__imuls := 0000154h +__imuls := 000154h public __imulu -__imulu := 0000158h +__imulu := 000158h public __lmuls -__lmuls := 00001C8h +__lmuls := 0001C8h public __lmulu -__lmulu := 00001CCh +__lmulu := 0001CCh + public __sand +__sand := 000200h public __smuls -__smuls := 0000224h +__smuls := 000224h public __smulu -__smulu := 0000228h +__smulu := 000228h + public __sor +__sor := 000234h + public __sxor +__sxor := 000268h diff --git a/src/std/shared/sand_fast.src b/src/std/shared/sand_fast.src new file mode 100644 index 000000000..f5a142f07 --- /dev/null +++ b/src/std/shared/sand_fast.src @@ -0,0 +1,9 @@ + public __sand_fast +__sand_fast: + ld a, l + and a, c + ld l, a + ld a, h + and a, b + ld h, a + ret diff --git a/src/std/shared/shared.src b/src/std/shared/shared.src index 2d5a65a29..b35650726 100644 --- a/src/std/shared/shared.src +++ b/src/std/shared/shared.src @@ -1,4 +1,4 @@ - public __bldiy, __bshl, __bshru, __bstix, __bstiy, __case, __case16, __case16D, __case24, __case24D, __case8, __case8D, __fadd, __fcmp, __fdiv, __fmul, __fppack, __frbtof, __frftob, __frftoi, __frftos, __frftoub, __frftoui, __frftous, __fritof, __frstof, __frubtof, __fruitof, __frustof, __fsub, __ftol, __iand, __icmpzero, __idivs, __idivu, __idvrmu, __ildix, __ildiy, __imul_b, __indcall, __ineg, __inot, __ior, __irems, __iremu, __ishl, __ishl_b, __ishrs, __ishrs_b, __ishru, __ishru_b, __istix, __istiy, __itol, __ixor, __ladd, __ladd_b, __land, __lcmps, __lcmpu, __lcmpzero, __ldivs, __ldivu, __ldvrmu, __lldix, __lldiy, __lneg, __lnot, __lor, __lrems, __lremu, __lshl, __lshrs, __lshru, __lstix, __lstiy, __lsub, __ltof, __lxor, __sand, __scmpzero, __sdivs, __sdivu, __seqcase, __seqcaseD, __setflag, __sldix, __sldiy, __sneg, __snot, __sor, __srems, __sremu, __sshl, __sshl_b, __sshrs, __sshrs_b, __sshru, __sshru_b, __sstix, __sstiy, __stoi, __stoiu, __sxor, __ultof, _longjmp, _memchr, _memcmp, _memcpy, _memmove, _setjmp, ___sprintf, _sqrtf, _strcasecmp, _strcat, _strchr, _strcmp, _strcpy, _strcspn, _strlen, _strncat, _strncmp, _strncpy, _strpbrk, _strrchr, _strspn, _strstr, _strtok, _tolower, _toupper + public __bldiy, __bshl, __bshru, __bstix, __bstiy, __case, __case16, __case16D, __case24, __case24D, __case8, __case8D, __fadd, __fcmp, __fdiv, __fmul, __fppack, __frbtof, __frftob, __frftoi, __frftos, __frftoub, __frftoui, __frftous, __fritof, __frstof, __frubtof, __fruitof, __frustof, __fsub, __ftol, __iand, __icmpzero, __idivs, __idivu, __idvrmu, __ildix, __ildiy, __imul_b, __indcall, __ineg, __inot, __ior, __irems, __iremu, __ishl, __ishl_b, __ishrs, __ishrs_b, __ishru, __ishru_b, __istix, __istiy, __itol, __ixor, __ladd, __ladd_b, __land, __lcmps, __lcmpu, __lcmpzero, __ldivs, __ldivu, __ldvrmu, __lldix, __lldiy, __lneg, __lnot, __lor, __lrems, __lremu, __lshl, __lshrs, __lshru, __lstix, __lstiy, __lsub, __ltof, __lxor, __scmpzero, __sdivs, __sdivu, __seqcase, __seqcaseD, __setflag, __sldix, __sldiy, __sneg, __snot, __srems, __sremu, __sshl, __sshl_b, __sshrs, __sshrs_b, __sshru, __sshru_b, __sstix, __sstiy, __stoi, __stoiu, __ultof, _longjmp, _memchr, _memcmp, _memcpy, _memmove, _setjmp, ___sprintf, _sqrtf, _strcasecmp, _strcat, _strchr, _strcmp, _strcpy, _strcspn, _strlen, _strncat, _strncmp, _strncpy, _strpbrk, _strrchr, _strspn, _strstr, _strtok, _tolower, _toupper __bldiy := 0000FCh __bshl := 000100h __bshru := 000104h @@ -80,7 +80,6 @@ __lstiy := 0001F4h __lsub := 0001F8h __ltof := 000284h __lxor := 0001FCh -__sand := 000200h __scmpzero := 000204h __sdivs := 000208h __sdivu := 00020Ch @@ -91,7 +90,6 @@ __sldix := 00021Ch __sldiy := 000220h __sneg := 00022Ch __snot := 000230h -__sor := 000234h __srems := 000238h __sremu := 00023Ch __sshl := 000240h @@ -104,7 +102,6 @@ __sstix := 000258h __sstiy := 00025Ch __stoi := 000260h __stoiu := 000264h -__sxor := 000268h __ultof := 000280h _longjmp := 000098h _memchr := 00009Ch diff --git a/src/std/shared/sor_fast.src b/src/std/shared/sor_fast.src new file mode 100644 index 000000000..282efe5e3 --- /dev/null +++ b/src/std/shared/sor_fast.src @@ -0,0 +1,9 @@ + public __sor_fast +__sor_fast: + ld a, l + or a, c + ld l, a + ld a, h + or a, b + ld h, a + ret diff --git a/src/std/shared/sxor_fast.src b/src/std/shared/sxor_fast.src new file mode 100644 index 000000000..c2aaee782 --- /dev/null +++ b/src/std/shared/sxor_fast.src @@ -0,0 +1,9 @@ + public __sxor_fast +__sxor_fast: + ld a, l + xor a, c + ld l, a + ld a, h + xor a, b + ld h, a + ret diff --git a/src/std/static/sand.src b/src/std/static/sand.src new file mode 100644 index 000000000..eb10df189 --- /dev/null +++ b/src/std/static/sand.src @@ -0,0 +1,11 @@ + public __sand +__sand: + push af + ld a, l + and a, c + ld l, a + ld a, h + and a, b + ld h, a + pop af + ret diff --git a/src/std/static/sor.src b/src/std/static/sor.src new file mode 100644 index 000000000..5b6d3c3a3 --- /dev/null +++ b/src/std/static/sor.src @@ -0,0 +1,11 @@ + public __sor +__sor: + push af + ld a, l + or a, c + ld l, a + ld a, h + or a, b + ld h, a + pop af + ret diff --git a/src/std/static/sxor.src b/src/std/static/sxor.src new file mode 100644 index 000000000..b59537cc5 --- /dev/null +++ b/src/std/static/sxor.src @@ -0,0 +1,11 @@ + public __sxor +__sxor: + push af + ld a, l + xor a, c + ld l, a + ld a, h + xor a, b + ld h, a + pop af + ret From 62f65076d7f5538c848a3074c99f33a05b164869 Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Wed, 2 Jun 2021 23:40:09 -0400 Subject: [PATCH 016/142] This order may eventually better allow for hijacking --- src/std/shared/sand_fast.src | 6 +++--- src/std/shared/sor_fast.src | 6 +++--- src/std/shared/sxor_fast.src | 6 +++--- src/std/static/sand.src | 6 +++--- src/std/static/sor.src | 6 +++--- src/std/static/sxor.src | 6 +++--- 6 files changed, 18 insertions(+), 18 deletions(-) diff --git a/src/std/shared/sand_fast.src b/src/std/shared/sand_fast.src index f5a142f07..bf4cf05a1 100644 --- a/src/std/shared/sand_fast.src +++ b/src/std/shared/sand_fast.src @@ -1,9 +1,9 @@ public __sand_fast __sand_fast: - ld a, l - and a, c - ld l, a ld a, h and a, b ld h, a + ld a, l + and a, c + ld l, a ret diff --git a/src/std/shared/sor_fast.src b/src/std/shared/sor_fast.src index 282efe5e3..d10f20acf 100644 --- a/src/std/shared/sor_fast.src +++ b/src/std/shared/sor_fast.src @@ -1,9 +1,9 @@ public __sor_fast __sor_fast: - ld a, l - or a, c - ld l, a ld a, h or a, b ld h, a + ld a, l + or a, c + ld l, a ret diff --git a/src/std/shared/sxor_fast.src b/src/std/shared/sxor_fast.src index c2aaee782..09551cdec 100644 --- a/src/std/shared/sxor_fast.src +++ b/src/std/shared/sxor_fast.src @@ -1,9 +1,9 @@ public __sxor_fast __sxor_fast: - ld a, l - xor a, c - ld l, a ld a, h xor a, b ld h, a + ld a, l + xor a, c + ld l, a ret diff --git a/src/std/static/sand.src b/src/std/static/sand.src index eb10df189..89d91fa04 100644 --- a/src/std/static/sand.src +++ b/src/std/static/sand.src @@ -1,11 +1,11 @@ public __sand __sand: push af - ld a, l - and a, c - ld l, a ld a, h and a, b ld h, a + ld a, l + and a, c + ld l, a pop af ret diff --git a/src/std/static/sor.src b/src/std/static/sor.src index 5b6d3c3a3..4c8087e0b 100644 --- a/src/std/static/sor.src +++ b/src/std/static/sor.src @@ -1,11 +1,11 @@ public __sor __sor: push af - ld a, l - or a, c - ld l, a ld a, h or a, b ld h, a + ld a, l + or a, c + ld l, a pop af ret diff --git a/src/std/static/sxor.src b/src/std/static/sxor.src index b59537cc5..cd1acb351 100644 --- a/src/std/static/sxor.src +++ b/src/std/static/sxor.src @@ -1,11 +1,11 @@ public __sxor __sxor: push af - ld a, l - xor a, c - ld l, a ld a, h xor a, b ld h, a + ld a, l + xor a, c + ld l, a pop af ret From becfdac3605a187eb59972881609a3e84d4e459e Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Thu, 3 Jun 2021 00:05:35 -0400 Subject: [PATCH 017/142] Provide optimized versions of __iand, __ior, and __ixor --- src/std/linked/linked.src | 28 +++++++++++++++++++--------- src/std/shared/iand_fast.src | 22 ++++++++++++++++++++++ src/std/shared/ior_fast.src | 22 ++++++++++++++++++++++ src/std/shared/ixor_fast.src | 22 ++++++++++++++++++++++ src/std/shared/shared.src | 5 +---- src/std/static/iand.src | 24 ++++++++++++++++++++++++ src/std/static/ior.src | 24 ++++++++++++++++++++++++ src/std/static/ixor.src | 24 ++++++++++++++++++++++++ 8 files changed, 158 insertions(+), 13 deletions(-) create mode 100644 src/std/shared/iand_fast.src create mode 100644 src/std/shared/ior_fast.src create mode 100644 src/std/shared/ixor_fast.src create mode 100644 src/std/static/iand.src create mode 100644 src/std/static/ior.src create mode 100644 src/std/static/ixor.src diff --git a/src/std/linked/linked.src b/src/std/linked/linked.src index c2125fa88..7ba90d106 100644 --- a/src/std/linked/linked.src +++ b/src/std/linked/linked.src @@ -53,15 +53,7 @@ _strtod := 0220E0h _tan := 022120h _tanh := 022138h -; integer math - public __imuls -__imuls := 000154h - public __imulu -__imulu := 000158h - public __lmuls -__lmuls := 0001C8h - public __lmulu -__lmulu := 0001CCh +; short (16-bit) math public __sand __sand := 000200h public __smuls @@ -72,3 +64,21 @@ __smulu := 000228h __sor := 000234h public __sxor __sxor := 000268h + +; int (24-bit) math + public __iand +__iand := 000134h + public __imuls +__imuls := 000154h + public __imulu +__imulu := 000158h + public __ior +__ior := 000168h + public __ixor +__ixor := 000198h + +; long (32-bit) math + public __lmuls +__lmuls := 0001C8h + public __lmulu +__lmulu := 0001CCh diff --git a/src/std/shared/iand_fast.src b/src/std/shared/iand_fast.src new file mode 100644 index 000000000..0cae57084 --- /dev/null +++ b/src/std/shared/iand_fast.src @@ -0,0 +1,22 @@ + public __iand_fast +__iand_fast: + push bc + inc sp + inc sp + push hl + ld hl, 2 + add hl, sp + ld a, (hl) + inc hl + and a, (hl) + dec hl + ld (hl), a + pop hl + inc sp + ld a, h + and a, b + ld h, a + ld a, l + and a, c + ld l, a + ret diff --git a/src/std/shared/ior_fast.src b/src/std/shared/ior_fast.src new file mode 100644 index 000000000..cd115bdb8 --- /dev/null +++ b/src/std/shared/ior_fast.src @@ -0,0 +1,22 @@ + public __ior_fast +__ior_fast: + push bc + inc sp + inc sp + push hl + ld hl, 2 + add hl, sp + ld a, (hl) + inc hl + or a, (hl) + dec hl + ld (hl), a + pop hl + inc sp + ld a, h + or a, b + ld h, a + ld a, l + or a, c + ld l, a + ret diff --git a/src/std/shared/ixor_fast.src b/src/std/shared/ixor_fast.src new file mode 100644 index 000000000..b906049de --- /dev/null +++ b/src/std/shared/ixor_fast.src @@ -0,0 +1,22 @@ + public __ixor_fast +__ixor_fast: + push bc + inc sp + inc sp + push hl + ld hl, 2 + add hl, sp + ld a, (hl) + inc hl + xor a, (hl) + dec hl + ld (hl), a + pop hl + inc sp + ld a, h + xor a, b + ld h, a + ld a, l + xor a, c + ld l, a + ret diff --git a/src/std/shared/shared.src b/src/std/shared/shared.src index b35650726..658fc8008 100644 --- a/src/std/shared/shared.src +++ b/src/std/shared/shared.src @@ -1,4 +1,4 @@ - public __bldiy, __bshl, __bshru, __bstix, __bstiy, __case, __case16, __case16D, __case24, __case24D, __case8, __case8D, __fadd, __fcmp, __fdiv, __fmul, __fppack, __frbtof, __frftob, __frftoi, __frftos, __frftoub, __frftoui, __frftous, __fritof, __frstof, __frubtof, __fruitof, __frustof, __fsub, __ftol, __iand, __icmpzero, __idivs, __idivu, __idvrmu, __ildix, __ildiy, __imul_b, __indcall, __ineg, __inot, __ior, __irems, __iremu, __ishl, __ishl_b, __ishrs, __ishrs_b, __ishru, __ishru_b, __istix, __istiy, __itol, __ixor, __ladd, __ladd_b, __land, __lcmps, __lcmpu, __lcmpzero, __ldivs, __ldivu, __ldvrmu, __lldix, __lldiy, __lneg, __lnot, __lor, __lrems, __lremu, __lshl, __lshrs, __lshru, __lstix, __lstiy, __lsub, __ltof, __lxor, __scmpzero, __sdivs, __sdivu, __seqcase, __seqcaseD, __setflag, __sldix, __sldiy, __sneg, __snot, __srems, __sremu, __sshl, __sshl_b, __sshrs, __sshrs_b, __sshru, __sshru_b, __sstix, __sstiy, __stoi, __stoiu, __ultof, _longjmp, _memchr, _memcmp, _memcpy, _memmove, _setjmp, ___sprintf, _sqrtf, _strcasecmp, _strcat, _strchr, _strcmp, _strcpy, _strcspn, _strlen, _strncat, _strncmp, _strncpy, _strpbrk, _strrchr, _strspn, _strstr, _strtok, _tolower, _toupper + public __bldiy, __bshl, __bshru, __bstix, __bstiy, __case, __case16, __case16D, __case24, __case24D, __case8, __case8D, __fadd, __fcmp, __fdiv, __fmul, __fppack, __frbtof, __frftob, __frftoi, __frftos, __frftoub, __frftoui, __frftous, __fritof, __frstof, __frubtof, __fruitof, __frustof, __fsub, __ftol, __icmpzero, __idivs, __idivu, __idvrmu, __ildix, __ildiy, __imul_b, __indcall, __ineg, __inot, __irems, __iremu, __ishl, __ishl_b, __ishrs, __ishrs_b, __ishru, __ishru_b, __istix, __istiy, __itol, __ladd, __ladd_b, __land, __lcmps, __lcmpu, __lcmpzero, __ldivs, __ldivu, __ldvrmu, __lldix, __lldiy, __lneg, __lnot, __lor, __lrems, __lremu, __lshl, __lshrs, __lshru, __lstix, __lstiy, __lsub, __ltof, __lxor, __scmpzero, __sdivs, __sdivu, __seqcase, __seqcaseD, __setflag, __sldix, __sldiy, __sneg, __snot, __srems, __sremu, __sshl, __sshl_b, __sshrs, __sshrs_b, __sshru, __sshru_b, __sstix, __sstiy, __stoi, __stoiu, __ultof, _longjmp, _memchr, _memcmp, _memcpy, _memmove, _setjmp, ___sprintf, _sqrtf, _strcasecmp, _strcat, _strchr, _strcmp, _strcpy, _strcspn, _strlen, _strncat, _strncmp, _strncpy, _strpbrk, _strrchr, _strspn, _strstr, _strtok, _tolower, _toupper __bldiy := 0000FCh __bshl := 000100h __bshru := 000104h @@ -30,7 +30,6 @@ __fruitof := 0002BCh __frustof := 0002C8h __fsub := 000290h __ftol := 00027Ch -__iand := 000134h __icmpzero := 000138h __idivs := 00013Ch __idivu := 000140h @@ -41,7 +40,6 @@ __imul_b := 000150h __indcall := 00015Ch __ineg := 000160h __inot := 000164h -__ior := 000168h __irems := 00016Ch __iremu := 000170h __ishl := 000174h @@ -53,7 +51,6 @@ __ishru_b := 000188h __istix := 00018Ch __istiy := 000190h __itol := 000194h -__ixor := 000198h __ladd := 00019Ch __ladd_b := 0001A0h __land := 0001A4h diff --git a/src/std/static/iand.src b/src/std/static/iand.src new file mode 100644 index 000000000..863eee6cc --- /dev/null +++ b/src/std/static/iand.src @@ -0,0 +1,24 @@ + public __iand +__iand: + push af + push bc + inc sp + inc sp + push hl + ld hl, 2 + add hl, sp + ld a, (hl) + inc hl + and a, (hl) + dec hl + ld (hl), a + pop hl + inc sp + ld a, h + and a, b + ld h, a + ld a, l + and a, c + ld l, a + pop af + ret diff --git a/src/std/static/ior.src b/src/std/static/ior.src new file mode 100644 index 000000000..8f94e8b3c --- /dev/null +++ b/src/std/static/ior.src @@ -0,0 +1,24 @@ + public __ior +__ior: + push af + push bc + inc sp + inc sp + push hl + ld hl, 2 + add hl, sp + ld a, (hl) + inc hl + or a, (hl) + dec hl + ld (hl), a + pop hl + inc sp + ld a, h + or a, b + ld h, a + ld a, l + or a, c + ld l, a + pop af + ret diff --git a/src/std/static/ixor.src b/src/std/static/ixor.src new file mode 100644 index 000000000..58c76548f --- /dev/null +++ b/src/std/static/ixor.src @@ -0,0 +1,24 @@ + public __ixor +__ixor: + push af + push bc + inc sp + inc sp + push hl + ld hl, 2 + add hl, sp + ld a, (hl) + inc hl + xor a, (hl) + dec hl + ld (hl), a + pop hl + inc sp + ld a, h + xor a, b + ld h, a + ld a, l + xor a, c + ld l, a + pop af + ret From 73124069e8ed494ee08bf47cc701ceea7ae6dbd4 Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Thu, 3 Jun 2021 00:09:24 -0400 Subject: [PATCH 018/142] Provide optimized versions of __land, __lor, and __lxor --- src/std/linked/linked.src | 6 ++++++ src/std/shared/land_fast.src | 24 ++++++++++++++++++++++++ src/std/shared/lor_fast.src | 24 ++++++++++++++++++++++++ src/std/shared/lxor_fast.src | 24 ++++++++++++++++++++++++ src/std/shared/shared.src | 5 +---- src/std/static/land.src | 26 ++++++++++++++++++++++++++ src/std/static/lor.src | 26 ++++++++++++++++++++++++++ src/std/static/lxor.src | 26 ++++++++++++++++++++++++++ 8 files changed, 157 insertions(+), 4 deletions(-) create mode 100644 src/std/shared/land_fast.src create mode 100644 src/std/shared/lor_fast.src create mode 100644 src/std/shared/lxor_fast.src create mode 100644 src/std/static/land.src create mode 100644 src/std/static/lor.src create mode 100644 src/std/static/lxor.src diff --git a/src/std/linked/linked.src b/src/std/linked/linked.src index 7ba90d106..ea3190c36 100644 --- a/src/std/linked/linked.src +++ b/src/std/linked/linked.src @@ -78,7 +78,13 @@ __ior := 000168h __ixor := 000198h ; long (32-bit) math + public __land +__land := 0001A4h public __lmuls __lmuls := 0001C8h public __lmulu __lmulu := 0001CCh + public __lor +__lor := 0001D8h + public __lxor +__lxor := 0001FCh diff --git a/src/std/shared/land_fast.src b/src/std/shared/land_fast.src new file mode 100644 index 000000000..4e92c6e8e --- /dev/null +++ b/src/std/shared/land_fast.src @@ -0,0 +1,24 @@ + public __land_fast +__land_fast: + and a, e + ld e, a + push bc + inc sp + inc sp + push hl + ld hl, 2 + add hl, sp + ld a, (hl) + inc hl + and a, (hl) + dec hl + ld (hl), a + pop hl + inc sp + ld a, h + and a, b + ld h, a + ld a, l + and a, c + ld l, a + ret diff --git a/src/std/shared/lor_fast.src b/src/std/shared/lor_fast.src new file mode 100644 index 000000000..a824bfe32 --- /dev/null +++ b/src/std/shared/lor_fast.src @@ -0,0 +1,24 @@ + public __lor_fast +__lor_fast: + or a, e + ld e, a + push bc + inc sp + inc sp + push hl + ld hl, 2 + add hl, sp + ld a, (hl) + inc hl + or a, (hl) + dec hl + ld (hl), a + pop hl + inc sp + ld a, h + or a, b + ld h, a + ld a, l + or a, c + ld l, a + ret diff --git a/src/std/shared/lxor_fast.src b/src/std/shared/lxor_fast.src new file mode 100644 index 000000000..f47c219fe --- /dev/null +++ b/src/std/shared/lxor_fast.src @@ -0,0 +1,24 @@ + public __lxor_fast +__lxor_fast: + xor a, e + ld e, a + push bc + inc sp + inc sp + push hl + ld hl, 2 + add hl, sp + ld a, (hl) + inc hl + xor a, (hl) + dec hl + ld (hl), a + pop hl + inc sp + ld a, h + xor a, b + ld h, a + ld a, l + xor a, c + ld l, a + ret diff --git a/src/std/shared/shared.src b/src/std/shared/shared.src index 658fc8008..5095bef7f 100644 --- a/src/std/shared/shared.src +++ b/src/std/shared/shared.src @@ -1,4 +1,4 @@ - public __bldiy, __bshl, __bshru, __bstix, __bstiy, __case, __case16, __case16D, __case24, __case24D, __case8, __case8D, __fadd, __fcmp, __fdiv, __fmul, __fppack, __frbtof, __frftob, __frftoi, __frftos, __frftoub, __frftoui, __frftous, __fritof, __frstof, __frubtof, __fruitof, __frustof, __fsub, __ftol, __icmpzero, __idivs, __idivu, __idvrmu, __ildix, __ildiy, __imul_b, __indcall, __ineg, __inot, __irems, __iremu, __ishl, __ishl_b, __ishrs, __ishrs_b, __ishru, __ishru_b, __istix, __istiy, __itol, __ladd, __ladd_b, __land, __lcmps, __lcmpu, __lcmpzero, __ldivs, __ldivu, __ldvrmu, __lldix, __lldiy, __lneg, __lnot, __lor, __lrems, __lremu, __lshl, __lshrs, __lshru, __lstix, __lstiy, __lsub, __ltof, __lxor, __scmpzero, __sdivs, __sdivu, __seqcase, __seqcaseD, __setflag, __sldix, __sldiy, __sneg, __snot, __srems, __sremu, __sshl, __sshl_b, __sshrs, __sshrs_b, __sshru, __sshru_b, __sstix, __sstiy, __stoi, __stoiu, __ultof, _longjmp, _memchr, _memcmp, _memcpy, _memmove, _setjmp, ___sprintf, _sqrtf, _strcasecmp, _strcat, _strchr, _strcmp, _strcpy, _strcspn, _strlen, _strncat, _strncmp, _strncpy, _strpbrk, _strrchr, _strspn, _strstr, _strtok, _tolower, _toupper + public __bldiy, __bshl, __bshru, __bstix, __bstiy, __case, __case16, __case16D, __case24, __case24D, __case8, __case8D, __fadd, __fcmp, __fdiv, __fmul, __fppack, __frbtof, __frftob, __frftoi, __frftos, __frftoub, __frftoui, __frftous, __fritof, __frstof, __frubtof, __fruitof, __frustof, __fsub, __ftol, __icmpzero, __idivs, __idivu, __idvrmu, __ildix, __ildiy, __imul_b, __indcall, __ineg, __inot, __irems, __iremu, __ishl, __ishl_b, __ishrs, __ishrs_b, __ishru, __ishru_b, __istix, __istiy, __itol, __ladd, __ladd_b, __lcmps, __lcmpu, __lcmpzero, __ldivs, __ldivu, __ldvrmu, __lldix, __lldiy, __lneg, __lnot, __lrems, __lremu, __lshl, __lshrs, __lshru, __lstix, __lstiy, __lsub, __ltof, __scmpzero, __sdivs, __sdivu, __seqcase, __seqcaseD, __setflag, __sldix, __sldiy, __sneg, __snot, __srems, __sremu, __sshl, __sshl_b, __sshrs, __sshrs_b, __sshru, __sshru_b, __sstix, __sstiy, __stoi, __stoiu, __ultof, _longjmp, _memchr, _memcmp, _memcpy, _memmove, _setjmp, ___sprintf, _sqrtf, _strcasecmp, _strcat, _strchr, _strcmp, _strcpy, _strcspn, _strlen, _strncat, _strncmp, _strncpy, _strpbrk, _strrchr, _strspn, _strstr, _strtok, _tolower, _toupper __bldiy := 0000FCh __bshl := 000100h __bshru := 000104h @@ -53,7 +53,6 @@ __istiy := 000190h __itol := 000194h __ladd := 00019Ch __ladd_b := 0001A0h -__land := 0001A4h __lcmps := 0001A8h __lcmpu := 0001ACh __lcmpzero := 0001B0h @@ -66,7 +65,6 @@ __lmuls := 0001C8h __lmulu := 0001CCh __lneg := 0001D0h __lnot := 0001D4h -__lor := 0001D8h __lrems := 0001DCh __lremu := 0001E0h __lshl := 0001E4h @@ -76,7 +74,6 @@ __lstix := 0001F0h __lstiy := 0001F4h __lsub := 0001F8h __ltof := 000284h -__lxor := 0001FCh __scmpzero := 000204h __sdivs := 000208h __sdivu := 00020Ch diff --git a/src/std/static/land.src b/src/std/static/land.src new file mode 100644 index 000000000..ccd1ef8d4 --- /dev/null +++ b/src/std/static/land.src @@ -0,0 +1,26 @@ + public __land +__land: + push af + and a, e + ld e, a + push bc + inc sp + inc sp + push hl + ld hl, 2 + add hl, sp + ld a, (hl) + inc hl + and a, (hl) + dec hl + ld (hl), a + pop hl + inc sp + ld a, h + and a, b + ld h, a + ld a, l + and a, c + ld l, a + pop af + ret diff --git a/src/std/static/lor.src b/src/std/static/lor.src new file mode 100644 index 000000000..abbcf929e --- /dev/null +++ b/src/std/static/lor.src @@ -0,0 +1,26 @@ + public __lor +__lor: + push af + or a, e + ld e, a + push bc + inc sp + inc sp + push hl + ld hl, 2 + add hl, sp + ld a, (hl) + inc hl + or a, (hl) + dec hl + ld (hl), a + pop hl + inc sp + ld a, h + or a, b + ld h, a + ld a, l + or a, c + ld l, a + pop af + ret diff --git a/src/std/static/lxor.src b/src/std/static/lxor.src new file mode 100644 index 000000000..6b0cfbb8d --- /dev/null +++ b/src/std/static/lxor.src @@ -0,0 +1,26 @@ + public __lxor +__lxor: + push af + xor a, e + ld e, a + push bc + inc sp + inc sp + push hl + ld hl, 2 + add hl, sp + ld a, (hl) + inc hl + xor a, (hl) + dec hl + ld (hl), a + pop hl + inc sp + ld a, h + xor a, b + ld h, a + ld a, l + xor a, c + ld l, a + pop af + ret From f9e57aa25d629bbb375317feb3d978ad92c61e09 Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Thu, 3 Jun 2021 03:21:34 -0400 Subject: [PATCH 019/142] Provide optimized versions of __scmpzero, __icmpzero, __lcmpzero, and __llcmpzero --- src/std/linked/linked.src | 6 ++++++ src/std/shared/icmpzero_fast.src | 7 +++++++ src/std/shared/lcmpzero_fast.src | 10 ++++++++++ src/std/shared/llcmpzero.src | 27 +++++++++++++++++++++++++++ src/std/shared/llcmpzero_fast.src | 15 +++++++++++++++ src/std/shared/scmpzero_fast.src | 7 +++++++ src/std/shared/shared.src | 5 +---- src/std/static/icmpzero.src | 8 ++++++++ src/std/static/lcmpzero.src | 17 +++++++++++++++++ src/std/static/scmpzero.src | 12 ++++++++++++ 10 files changed, 110 insertions(+), 4 deletions(-) create mode 100644 src/std/shared/icmpzero_fast.src create mode 100644 src/std/shared/lcmpzero_fast.src create mode 100644 src/std/shared/llcmpzero.src create mode 100644 src/std/shared/llcmpzero_fast.src create mode 100644 src/std/shared/scmpzero_fast.src create mode 100644 src/std/static/icmpzero.src create mode 100644 src/std/static/lcmpzero.src create mode 100644 src/std/static/scmpzero.src diff --git a/src/std/linked/linked.src b/src/std/linked/linked.src index ea3190c36..d0bc0cf2f 100644 --- a/src/std/linked/linked.src +++ b/src/std/linked/linked.src @@ -56,6 +56,8 @@ _tanh := 022138h ; short (16-bit) math public __sand __sand := 000200h + public __scmpzero +__scmpzero := 000204h public __smuls __smuls := 000224h public __smulu @@ -68,6 +70,8 @@ __sxor := 000268h ; int (24-bit) math public __iand __iand := 000134h + public __icmpzero +__icmpzero := 000138h public __imuls __imuls := 000154h public __imulu @@ -80,6 +84,8 @@ __ixor := 000198h ; long (32-bit) math public __land __land := 0001A4h + public __lcmpzero +__lcmpzero := 0001B0h public __lmuls __lmuls := 0001C8h public __lmulu diff --git a/src/std/shared/icmpzero_fast.src b/src/std/shared/icmpzero_fast.src new file mode 100644 index 000000000..a40f8ee80 --- /dev/null +++ b/src/std/shared/icmpzero_fast.src @@ -0,0 +1,7 @@ + public __icmpzero_fast +__icmpzero_fast: + xor a, a + ld c, a + mlt bc + sbc hl, bc + ret diff --git a/src/std/shared/lcmpzero_fast.src b/src/std/shared/lcmpzero_fast.src new file mode 100644 index 000000000..0efee8eb2 --- /dev/null +++ b/src/std/shared/lcmpzero_fast.src @@ -0,0 +1,10 @@ + public __lcmpzero_fast +__lcmpzero_fast: + xor a, a + add a, e + ret nz + mlt de + sbc hl, de + ret p + inc e + ret diff --git a/src/std/shared/llcmpzero.src b/src/std/shared/llcmpzero.src new file mode 100644 index 000000000..d70c9e707 --- /dev/null +++ b/src/std/shared/llcmpzero.src @@ -0,0 +1,27 @@ + public __llcmpzero +__llcmpzero: + inc b + djnz .b_nz + dec c + inc c + jr nz, .c_nz + mlt bc + adc hl, bc + jr nz, .udeuhl_nz + adc hl, de + ret z +.udeuhl_nz: + inc c + dec bc + ret +.b_nz: + dec b + ret po + dec b + inc b + ret +.c_nz: + ret p + dec c + inc bc + ret diff --git a/src/std/shared/llcmpzero_fast.src b/src/std/shared/llcmpzero_fast.src new file mode 100644 index 000000000..165b3119a --- /dev/null +++ b/src/std/shared/llcmpzero_fast.src @@ -0,0 +1,15 @@ + public __llcmpzero_fast +__llcmpzero_fast: + xor a, a + add a, b + ret nz + cp a, c + jr nz, .nz + mlt bc + adc hl, bc + jr nz, .nz + adc hl, de + ret z +.nz: + inc a + ret diff --git a/src/std/shared/scmpzero_fast.src b/src/std/shared/scmpzero_fast.src new file mode 100644 index 000000000..8a46474b6 --- /dev/null +++ b/src/std/shared/scmpzero_fast.src @@ -0,0 +1,7 @@ + public __scmpzero_fast +__scmpzero_fast: + xor a, a + ld c, a + ld b, a + sbc.s hl, bc + ret diff --git a/src/std/shared/shared.src b/src/std/shared/shared.src index 5095bef7f..715256cfb 100644 --- a/src/std/shared/shared.src +++ b/src/std/shared/shared.src @@ -1,4 +1,4 @@ - public __bldiy, __bshl, __bshru, __bstix, __bstiy, __case, __case16, __case16D, __case24, __case24D, __case8, __case8D, __fadd, __fcmp, __fdiv, __fmul, __fppack, __frbtof, __frftob, __frftoi, __frftos, __frftoub, __frftoui, __frftous, __fritof, __frstof, __frubtof, __fruitof, __frustof, __fsub, __ftol, __icmpzero, __idivs, __idivu, __idvrmu, __ildix, __ildiy, __imul_b, __indcall, __ineg, __inot, __irems, __iremu, __ishl, __ishl_b, __ishrs, __ishrs_b, __ishru, __ishru_b, __istix, __istiy, __itol, __ladd, __ladd_b, __lcmps, __lcmpu, __lcmpzero, __ldivs, __ldivu, __ldvrmu, __lldix, __lldiy, __lneg, __lnot, __lrems, __lremu, __lshl, __lshrs, __lshru, __lstix, __lstiy, __lsub, __ltof, __scmpzero, __sdivs, __sdivu, __seqcase, __seqcaseD, __setflag, __sldix, __sldiy, __sneg, __snot, __srems, __sremu, __sshl, __sshl_b, __sshrs, __sshrs_b, __sshru, __sshru_b, __sstix, __sstiy, __stoi, __stoiu, __ultof, _longjmp, _memchr, _memcmp, _memcpy, _memmove, _setjmp, ___sprintf, _sqrtf, _strcasecmp, _strcat, _strchr, _strcmp, _strcpy, _strcspn, _strlen, _strncat, _strncmp, _strncpy, _strpbrk, _strrchr, _strspn, _strstr, _strtok, _tolower, _toupper + public __bldiy, __bshl, __bshru, __bstix, __bstiy, __case, __case16, __case16D, __case24, __case24D, __case8, __case8D, __fadd, __fcmp, __fdiv, __fmul, __fppack, __frbtof, __frftob, __frftoi, __frftos, __frftoub, __frftoui, __frftous, __fritof, __frstof, __frubtof, __fruitof, __frustof, __fsub, __ftol, __idivs, __idivu, __idvrmu, __ildix, __ildiy, __imul_b, __indcall, __ineg, __inot, __irems, __iremu, __ishl, __ishl_b, __ishrs, __ishrs_b, __ishru, __ishru_b, __istix, __istiy, __itol, __ladd, __ladd_b, __lcmps, __lcmpu, __ldivs, __ldivu, __ldvrmu, __lldix, __lldiy, __lneg, __lnot, __lrems, __lremu, __lshl, __lshrs, __lshru, __lstix, __lstiy, __lsub, __ltof, __sdivs, __sdivu, __seqcase, __seqcaseD, __setflag, __sldix, __sldiy, __sneg, __snot, __srems, __sremu, __sshl, __sshl_b, __sshrs, __sshrs_b, __sshru, __sshru_b, __sstix, __sstiy, __stoi, __stoiu, __ultof, _longjmp, _memchr, _memcmp, _memcpy, _memmove, _setjmp, ___sprintf, _sqrtf, _strcasecmp, _strcat, _strchr, _strcmp, _strcpy, _strcspn, _strlen, _strncat, _strncmp, _strncpy, _strpbrk, _strrchr, _strspn, _strstr, _strtok, _tolower, _toupper __bldiy := 0000FCh __bshl := 000100h __bshru := 000104h @@ -30,7 +30,6 @@ __fruitof := 0002BCh __frustof := 0002C8h __fsub := 000290h __ftol := 00027Ch -__icmpzero := 000138h __idivs := 00013Ch __idivu := 000140h __idvrmu := 000144h @@ -55,7 +54,6 @@ __ladd := 00019Ch __ladd_b := 0001A0h __lcmps := 0001A8h __lcmpu := 0001ACh -__lcmpzero := 0001B0h __ldivs := 0001B4h __ldivu := 0001B8h __ldvrmu := 0001BCh @@ -74,7 +72,6 @@ __lstix := 0001F0h __lstiy := 0001F4h __lsub := 0001F8h __ltof := 000284h -__scmpzero := 000204h __sdivs := 000208h __sdivu := 00020Ch __seqcase := 000210h diff --git a/src/std/static/icmpzero.src b/src/std/static/icmpzero.src new file mode 100644 index 000000000..eba3a276e --- /dev/null +++ b/src/std/static/icmpzero.src @@ -0,0 +1,8 @@ + public __icmpzero +__icmpzero: + push bc + ld bc, 0 + or a, a + sbc hl, bc + pop bc + ret diff --git a/src/std/static/lcmpzero.src b/src/std/static/lcmpzero.src new file mode 100644 index 000000000..31b4ad26b --- /dev/null +++ b/src/std/static/lcmpzero.src @@ -0,0 +1,17 @@ + public __lcmpzero +__lcmpzero: + inc e + dec e + jr nz, .e_nz + add hl, bc + or a, a + sbc hl, bc + ret z + inc e + dec de + ret +.e_nz: + ret po + dec e + inc e + ret diff --git a/src/std/static/scmpzero.src b/src/std/static/scmpzero.src new file mode 100644 index 000000000..2adf837d4 --- /dev/null +++ b/src/std/static/scmpzero.src @@ -0,0 +1,12 @@ + public __scmpzero +__scmpzero: + add hl, bc + or a, a + sbc.s hl, bc + ret po + inc h + dec h + ret po + dec h + inc h + ret From f6f95e26d72d965762a5ae874f6a81c38fc1633d Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Thu, 3 Jun 2021 03:36:56 -0400 Subject: [PATCH 020/142] Provide optimized versions of __ladd and __lladd --- src/std/linked/linked.src | 2 ++ src/std/shared/ladd_fast.src | 6 ++++++ src/std/shared/lladd.src | 18 ++++++++++++++++++ src/std/shared/lladd_fast.src | 16 ++++++++++++++++ src/std/shared/shared.src | 3 +-- src/std/static/ladd.src | 8 ++++++++ 6 files changed, 51 insertions(+), 2 deletions(-) create mode 100644 src/std/shared/ladd_fast.src create mode 100644 src/std/shared/lladd.src create mode 100644 src/std/shared/lladd_fast.src create mode 100644 src/std/static/ladd.src diff --git a/src/std/linked/linked.src b/src/std/linked/linked.src index d0bc0cf2f..ab7ee8261 100644 --- a/src/std/linked/linked.src +++ b/src/std/linked/linked.src @@ -82,6 +82,8 @@ __ior := 000168h __ixor := 000198h ; long (32-bit) math + public __ladd +__ladd := 00019Ch public __land __land := 0001A4h public __lcmpzero diff --git a/src/std/shared/ladd_fast.src b/src/std/shared/ladd_fast.src new file mode 100644 index 000000000..36fa8fcb3 --- /dev/null +++ b/src/std/shared/ladd_fast.src @@ -0,0 +1,6 @@ + public __ladd_fast +__ladd_fast: + add hl, bc + add a, e + ld e, a + ret diff --git a/src/std/shared/lladd.src b/src/std/shared/lladd.src new file mode 100644 index 000000000..f81a0a92e --- /dev/null +++ b/src/std/shared/lladd.src @@ -0,0 +1,18 @@ + public __lladd +__lladd: + push iy + ld iy, 0 + add iy, sp + push bc + ld bc, (iy) + add hl, bc + ex de, hl + ld bc, (iy + 3) + add hl, bc + ex de, hl + pop bc + ld iy, (iy + 6) + add iy, bc + lea bc, iy + pop iy + ret diff --git a/src/std/shared/lladd_fast.src b/src/std/shared/lladd_fast.src new file mode 100644 index 000000000..7e824a942 --- /dev/null +++ b/src/std/shared/lladd_fast.src @@ -0,0 +1,16 @@ + public __lladd_fast +__lladd_fast: + ld iy, 0 + add iy, sp + push bc + ld bc, (iy) + add hl, bc + ex de, hl + ld bc, (iy + 3) + add hl, bc + ex de, hl + pop bc + ld iy, (iy + 6) + add iy, bc + lea bc, iy + ret diff --git a/src/std/shared/shared.src b/src/std/shared/shared.src index 715256cfb..3a2ba5da8 100644 --- a/src/std/shared/shared.src +++ b/src/std/shared/shared.src @@ -1,4 +1,4 @@ - public __bldiy, __bshl, __bshru, __bstix, __bstiy, __case, __case16, __case16D, __case24, __case24D, __case8, __case8D, __fadd, __fcmp, __fdiv, __fmul, __fppack, __frbtof, __frftob, __frftoi, __frftos, __frftoub, __frftoui, __frftous, __fritof, __frstof, __frubtof, __fruitof, __frustof, __fsub, __ftol, __idivs, __idivu, __idvrmu, __ildix, __ildiy, __imul_b, __indcall, __ineg, __inot, __irems, __iremu, __ishl, __ishl_b, __ishrs, __ishrs_b, __ishru, __ishru_b, __istix, __istiy, __itol, __ladd, __ladd_b, __lcmps, __lcmpu, __ldivs, __ldivu, __ldvrmu, __lldix, __lldiy, __lneg, __lnot, __lrems, __lremu, __lshl, __lshrs, __lshru, __lstix, __lstiy, __lsub, __ltof, __sdivs, __sdivu, __seqcase, __seqcaseD, __setflag, __sldix, __sldiy, __sneg, __snot, __srems, __sremu, __sshl, __sshl_b, __sshrs, __sshrs_b, __sshru, __sshru_b, __sstix, __sstiy, __stoi, __stoiu, __ultof, _longjmp, _memchr, _memcmp, _memcpy, _memmove, _setjmp, ___sprintf, _sqrtf, _strcasecmp, _strcat, _strchr, _strcmp, _strcpy, _strcspn, _strlen, _strncat, _strncmp, _strncpy, _strpbrk, _strrchr, _strspn, _strstr, _strtok, _tolower, _toupper + public __bldiy, __bshl, __bshru, __bstix, __bstiy, __case, __case16, __case16D, __case24, __case24D, __case8, __case8D, __fadd, __fcmp, __fdiv, __fmul, __fppack, __frbtof, __frftob, __frftoi, __frftos, __frftoub, __frftoui, __frftous, __fritof, __frstof, __frubtof, __fruitof, __frustof, __fsub, __ftol, __idivs, __idivu, __idvrmu, __ildix, __ildiy, __imul_b, __indcall, __ineg, __inot, __irems, __iremu, __ishl, __ishl_b, __ishrs, __ishrs_b, __ishru, __ishru_b, __istix, __istiy, __itol, __ladd_b, __lcmps, __lcmpu, __ldivs, __ldivu, __ldvrmu, __lldix, __lldiy, __lneg, __lnot, __lrems, __lremu, __lshl, __lshrs, __lshru, __lstix, __lstiy, __lsub, __ltof, __sdivs, __sdivu, __seqcase, __seqcaseD, __setflag, __sldix, __sldiy, __sneg, __snot, __srems, __sremu, __sshl, __sshl_b, __sshrs, __sshrs_b, __sshru, __sshru_b, __sstix, __sstiy, __stoi, __stoiu, __ultof, _longjmp, _memchr, _memcmp, _memcpy, _memmove, _setjmp, ___sprintf, _sqrtf, _strcasecmp, _strcat, _strchr, _strcmp, _strcpy, _strcspn, _strlen, _strncat, _strncmp, _strncpy, _strpbrk, _strrchr, _strspn, _strstr, _strtok, _tolower, _toupper __bldiy := 0000FCh __bshl := 000100h __bshru := 000104h @@ -50,7 +50,6 @@ __ishru_b := 000188h __istix := 00018Ch __istiy := 000190h __itol := 000194h -__ladd := 00019Ch __ladd_b := 0001A0h __lcmps := 0001A8h __lcmpu := 0001ACh diff --git a/src/std/static/ladd.src b/src/std/static/ladd.src new file mode 100644 index 000000000..6552d3818 --- /dev/null +++ b/src/std/static/ladd.src @@ -0,0 +1,8 @@ + public __ladd +__ladd: + push af + add hl, bc + add a, e + ld e, a + pop af + ret From 2da942ffb1a37333af71cb86c1a61bd8714de5d1 Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Thu, 3 Jun 2021 15:38:32 -0400 Subject: [PATCH 021/142] Don't forget to propagate carry --- src/std/shared/ladd_fast.src | 2 +- src/std/shared/lladd.src | 5 ++++- src/std/shared/lladd_fast.src | 5 ++++- src/std/static/ladd.src | 2 +- 4 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/std/shared/ladd_fast.src b/src/std/shared/ladd_fast.src index 36fa8fcb3..4974ef002 100644 --- a/src/std/shared/ladd_fast.src +++ b/src/std/shared/ladd_fast.src @@ -1,6 +1,6 @@ public __ladd_fast __ladd_fast: add hl, bc - add a, e + adc a, e ld e, a ret diff --git a/src/std/shared/lladd.src b/src/std/shared/lladd.src index f81a0a92e..ca91760f2 100644 --- a/src/std/shared/lladd.src +++ b/src/std/shared/lladd.src @@ -8,9 +8,12 @@ __lladd: add hl, bc ex de, hl ld bc, (iy + 3) - add hl, bc + adc hl, bc ex de, hl pop bc + jr nc, .nc48 + inc bc +.nc48: ld iy, (iy + 6) add iy, bc lea bc, iy diff --git a/src/std/shared/lladd_fast.src b/src/std/shared/lladd_fast.src index 7e824a942..73f378d8c 100644 --- a/src/std/shared/lladd_fast.src +++ b/src/std/shared/lladd_fast.src @@ -7,9 +7,12 @@ __lladd_fast: add hl, bc ex de, hl ld bc, (iy + 3) - add hl, bc + adc hl, bc ex de, hl pop bc + jr nc, .nc48 + inc bc +.nc48: ld iy, (iy + 6) add iy, bc lea bc, iy diff --git a/src/std/static/ladd.src b/src/std/static/ladd.src index 6552d3818..79cfaae73 100644 --- a/src/std/static/ladd.src +++ b/src/std/static/ladd.src @@ -2,7 +2,7 @@ __ladd: push af add hl, bc - add a, e + adc a, e ld e, a pop af ret From d1337cd09d52d56bbf40d7555bcbb9ac858c0116 Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Thu, 3 Jun 2021 15:59:37 -0400 Subject: [PATCH 022/142] Provide optimized versions of __lsub and __llsub --- src/std/linked/linked.src | 2 ++ src/std/shared/llsub.src | 20 ++++++++++++++++++++ src/std/shared/llsub_fast.src | 19 +++++++++++++++++++ src/std/shared/lsub_fast.src | 9 +++++++++ src/std/shared/shared.src | 3 +-- src/std/static/lsub.src | 11 +++++++++++ 6 files changed, 62 insertions(+), 2 deletions(-) create mode 100644 src/std/shared/llsub.src create mode 100644 src/std/shared/llsub_fast.src create mode 100644 src/std/shared/lsub_fast.src create mode 100644 src/std/static/lsub.src diff --git a/src/std/linked/linked.src b/src/std/linked/linked.src index ab7ee8261..24f8eb6df 100644 --- a/src/std/linked/linked.src +++ b/src/std/linked/linked.src @@ -94,5 +94,7 @@ __lmuls := 0001C8h __lmulu := 0001CCh public __lor __lor := 0001D8h + public __lsub +__lsub := 0001F8h public __lxor __lxor := 0001FCh diff --git a/src/std/shared/llsub.src b/src/std/shared/llsub.src new file mode 100644 index 000000000..dd34b9707 --- /dev/null +++ b/src/std/shared/llsub.src @@ -0,0 +1,20 @@ + public __llsub +__llsub: + push iy + ld iy, 0 + add iy, sp + push bc + ld bc, (iy) + sbc hl, bc + ex de, hl + ld bc, (iy + 3) + sbc hl, bc + ex de, hl + ex (sp),hl + ld bc, (iy + 6) + adc hl, bc + ld c, l + ld b, h + pop hl + pop iy + ret diff --git a/src/std/shared/llsub_fast.src b/src/std/shared/llsub_fast.src new file mode 100644 index 000000000..c0cf421df --- /dev/null +++ b/src/std/shared/llsub_fast.src @@ -0,0 +1,19 @@ + public __llsub_fast +__llsub_fast: + ld iy, 0 + add iy, sp + push bc + ld bc, (iy) + sbc hl, bc + ex de, hl + ld bc, (iy + 3) + sbc hl, bc + ex de, hl + pop bc + ld a, c + sbc a, (iy + 6) + ld c, a + ld a, b + sbc a, (iy + 7) + ld b, a + ret diff --git a/src/std/shared/lsub_fast.src b/src/std/shared/lsub_fast.src new file mode 100644 index 000000000..12e5824a8 --- /dev/null +++ b/src/std/shared/lsub_fast.src @@ -0,0 +1,9 @@ + public __lsub_fast +__lsub_fast: + or a, a + sbc hl, bc + cpl + ccf + adc a, e + ld e, a + ret diff --git a/src/std/shared/shared.src b/src/std/shared/shared.src index 3a2ba5da8..68e326bfb 100644 --- a/src/std/shared/shared.src +++ b/src/std/shared/shared.src @@ -1,4 +1,4 @@ - public __bldiy, __bshl, __bshru, __bstix, __bstiy, __case, __case16, __case16D, __case24, __case24D, __case8, __case8D, __fadd, __fcmp, __fdiv, __fmul, __fppack, __frbtof, __frftob, __frftoi, __frftos, __frftoub, __frftoui, __frftous, __fritof, __frstof, __frubtof, __fruitof, __frustof, __fsub, __ftol, __idivs, __idivu, __idvrmu, __ildix, __ildiy, __imul_b, __indcall, __ineg, __inot, __irems, __iremu, __ishl, __ishl_b, __ishrs, __ishrs_b, __ishru, __ishru_b, __istix, __istiy, __itol, __ladd_b, __lcmps, __lcmpu, __ldivs, __ldivu, __ldvrmu, __lldix, __lldiy, __lneg, __lnot, __lrems, __lremu, __lshl, __lshrs, __lshru, __lstix, __lstiy, __lsub, __ltof, __sdivs, __sdivu, __seqcase, __seqcaseD, __setflag, __sldix, __sldiy, __sneg, __snot, __srems, __sremu, __sshl, __sshl_b, __sshrs, __sshrs_b, __sshru, __sshru_b, __sstix, __sstiy, __stoi, __stoiu, __ultof, _longjmp, _memchr, _memcmp, _memcpy, _memmove, _setjmp, ___sprintf, _sqrtf, _strcasecmp, _strcat, _strchr, _strcmp, _strcpy, _strcspn, _strlen, _strncat, _strncmp, _strncpy, _strpbrk, _strrchr, _strspn, _strstr, _strtok, _tolower, _toupper + public __bldiy, __bshl, __bshru, __bstix, __bstiy, __case, __case16, __case16D, __case24, __case24D, __case8, __case8D, __fadd, __fcmp, __fdiv, __fmul, __fppack, __frbtof, __frftob, __frftoi, __frftos, __frftoub, __frftoui, __frftous, __fritof, __frstof, __frubtof, __fruitof, __frustof, __fsub, __ftol, __idivs, __idivu, __idvrmu, __ildix, __ildiy, __imul_b, __indcall, __ineg, __inot, __irems, __iremu, __ishl, __ishl_b, __ishrs, __ishrs_b, __ishru, __ishru_b, __istix, __istiy, __itol, __ladd_b, __lcmps, __lcmpu, __ldivs, __ldivu, __ldvrmu, __lldix, __lldiy, __lneg, __lnot, __lrems, __lremu, __lshl, __lshrs, __lshru, __lstix, __lstiy, __ltof, __sdivs, __sdivu, __seqcase, __seqcaseD, __setflag, __sldix, __sldiy, __sneg, __snot, __srems, __sremu, __sshl, __sshl_b, __sshrs, __sshrs_b, __sshru, __sshru_b, __sstix, __sstiy, __stoi, __stoiu, __ultof, _longjmp, _memchr, _memcmp, _memcpy, _memmove, _setjmp, ___sprintf, _sqrtf, _strcasecmp, _strcat, _strchr, _strcmp, _strcpy, _strcspn, _strlen, _strncat, _strncmp, _strncpy, _strpbrk, _strrchr, _strspn, _strstr, _strtok, _tolower, _toupper __bldiy := 0000FCh __bshl := 000100h __bshru := 000104h @@ -69,7 +69,6 @@ __lshrs := 0001E8h __lshru := 0001ECh __lstix := 0001F0h __lstiy := 0001F4h -__lsub := 0001F8h __ltof := 000284h __sdivs := 000208h __sdivu := 00020Ch diff --git a/src/std/static/lsub.src b/src/std/static/lsub.src new file mode 100644 index 000000000..6f8f2abfa --- /dev/null +++ b/src/std/static/lsub.src @@ -0,0 +1,11 @@ + public __lsub +__lsub: + push af + or a, a + sbc hl, bc + cpl + ccf + adc a, e + ld e, a + pop af + ret From 8a37d3f87fc1b56b811f68dee3933b8284e49bb2 Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Thu, 3 Jun 2021 16:05:53 -0400 Subject: [PATCH 023/142] Optimize __lladd_fast --- src/std/shared/lladd_fast.src | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/std/shared/lladd_fast.src b/src/std/shared/lladd_fast.src index 73f378d8c..b987043af 100644 --- a/src/std/shared/lladd_fast.src +++ b/src/std/shared/lladd_fast.src @@ -10,10 +10,10 @@ __lladd_fast: adc hl, bc ex de, hl pop bc - jr nc, .nc48 - inc bc -.nc48: - ld iy, (iy + 6) - add iy, bc - lea bc, iy + ld a, c + adc a, (iy + 6) + ld c, a + ld a, b + adc a, (iy + 7) + ld b, a ret From 673ba0066ef36c7511b30c8add8f6f5c6e84b2fb Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Thu, 3 Jun 2021 16:06:27 -0400 Subject: [PATCH 024/142] Fix stack frame bugs --- src/std/shared/lladd.src | 6 +++--- src/std/shared/lladd_fast.src | 8 ++++---- src/std/shared/llsub.src | 6 +++--- src/std/shared/llsub_fast.src | 8 ++++---- 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/src/std/shared/lladd.src b/src/std/shared/lladd.src index ca91760f2..11891a6d9 100644 --- a/src/std/shared/lladd.src +++ b/src/std/shared/lladd.src @@ -4,17 +4,17 @@ __lladd: ld iy, 0 add iy, sp push bc - ld bc, (iy) + ld bc, (iy + 6) add hl, bc ex de, hl - ld bc, (iy + 3) + ld bc, (iy + 9) adc hl, bc ex de, hl pop bc jr nc, .nc48 inc bc .nc48: - ld iy, (iy + 6) + ld iy, (iy + 12) add iy, bc lea bc, iy pop iy diff --git a/src/std/shared/lladd_fast.src b/src/std/shared/lladd_fast.src index b987043af..0b15e0b82 100644 --- a/src/std/shared/lladd_fast.src +++ b/src/std/shared/lladd_fast.src @@ -3,17 +3,17 @@ __lladd_fast: ld iy, 0 add iy, sp push bc - ld bc, (iy) + ld bc, (iy + 3) add hl, bc ex de, hl - ld bc, (iy + 3) + ld bc, (iy + 6) adc hl, bc ex de, hl pop bc ld a, c - adc a, (iy + 6) + adc a, (iy + 9) ld c, a ld a, b - adc a, (iy + 7) + adc a, (iy + 10) ld b, a ret diff --git a/src/std/shared/llsub.src b/src/std/shared/llsub.src index dd34b9707..ddb79d928 100644 --- a/src/std/shared/llsub.src +++ b/src/std/shared/llsub.src @@ -4,14 +4,14 @@ __llsub: ld iy, 0 add iy, sp push bc - ld bc, (iy) + ld bc, (iy + 6) sbc hl, bc ex de, hl - ld bc, (iy + 3) + ld bc, (iy + 9) sbc hl, bc ex de, hl ex (sp),hl - ld bc, (iy + 6) + ld bc, (iy + 12) adc hl, bc ld c, l ld b, h diff --git a/src/std/shared/llsub_fast.src b/src/std/shared/llsub_fast.src index c0cf421df..1d83cd036 100644 --- a/src/std/shared/llsub_fast.src +++ b/src/std/shared/llsub_fast.src @@ -3,17 +3,17 @@ __llsub_fast: ld iy, 0 add iy, sp push bc - ld bc, (iy) + ld bc, (iy + 3) sbc hl, bc ex de, hl - ld bc, (iy + 3) + ld bc, (iy + 6) sbc hl, bc ex de, hl pop bc ld a, c - sbc a, (iy + 6) + sbc a, (iy + 9) ld c, a ld a, b - sbc a, (iy + 7) + sbc a, (iy + 10) ld b, a ret From dfa34fd7305e04007e751785f1765483c47a6817 Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Thu, 3 Jun 2021 17:02:09 -0400 Subject: [PATCH 025/142] Provide optimized versions of __lland, __llor, and __llxor --- src/std/shared/lland.src | 43 +++++++++++++++++++++++++++++++++++ src/std/shared/lland_fast.src | 41 +++++++++++++++++++++++++++++++++ src/std/shared/llor.src | 43 +++++++++++++++++++++++++++++++++++ src/std/shared/llor_fast.src | 41 +++++++++++++++++++++++++++++++++ src/std/shared/llxor.src | 43 +++++++++++++++++++++++++++++++++++ src/std/shared/llxor_fast.src | 41 +++++++++++++++++++++++++++++++++ 6 files changed, 252 insertions(+) create mode 100644 src/std/shared/lland.src create mode 100644 src/std/shared/lland_fast.src create mode 100644 src/std/shared/llor.src create mode 100644 src/std/shared/llor_fast.src create mode 100644 src/std/shared/llxor.src create mode 100644 src/std/shared/llxor_fast.src diff --git a/src/std/shared/lland.src b/src/std/shared/lland.src new file mode 100644 index 000000000..ce6ff5a75 --- /dev/null +++ b/src/std/shared/lland.src @@ -0,0 +1,43 @@ + public __lland +__lland: +; CC: 60*r(PC)+22*r(SPL)+11*w(SPL)+1 + push iy + ld iy, 0 + add iy, sp + push hl + push de + lea hl, iy + 13 + ld a, b + and a, (hl) + ld b, a + dec hl + ld a, c + and a, (hl) + ld c, a + dec hl + lea de, iy - 4 + ld a, (de) + and a, (hl) + ld (de), a + pop de + dec hl + ld a, d + and a, (hl) + ld d, a + dec hl + ld a, e + and a, (hl) + ld e, a + dec hl + ld a, (iy - 1) + and a, (hl) + ld (iy - 1), a + pop hl + ld a, h + and a, (iy + 7) + ld h, a + ld a, l + and a, (iy + 6) + ld l, a + pop iy + ret diff --git a/src/std/shared/lland_fast.src b/src/std/shared/lland_fast.src new file mode 100644 index 000000000..bad06317f --- /dev/null +++ b/src/std/shared/lland_fast.src @@ -0,0 +1,41 @@ + public __lland_fast +__lland_fast: +; CC: 56*r(PC)+19*r(SPL)+8*w(SPL)+1 + ld iy, 0 + add iy, sp + push hl + push de + lea hl, iy + 10 + ld a, b + and a, (hl) + ld b, a + dec hl + ld a, c + and a, (hl) + ld c, a + dec hl + lea de, iy - 4 + ld a, (de) + and a, (hl) + ld (de), a + pop de + dec hl + ld a, d + and a, (hl) + ld d, a + dec hl + ld a, e + and a, (hl) + ld e, a + dec hl + ld a, (iy - 1) + and a, (hl) + ld (iy - 1), a + pop hl + ld a, h + and a, (iy + 4) + ld h, a + ld a, l + and a, (iy + 3) + ld l, a + ret diff --git a/src/std/shared/llor.src b/src/std/shared/llor.src new file mode 100644 index 000000000..369c5d1ca --- /dev/null +++ b/src/std/shared/llor.src @@ -0,0 +1,43 @@ + public __llor +__llor: +; CC: 60*r(PC)+22*r(SPL)+11*w(SPL)+1 + push iy + ld iy, 0 + add iy, sp + push hl + push de + lea hl, iy + 13 + ld a, b + or a, (hl) + ld b, a + dec hl + ld a, c + or a, (hl) + ld c, a + dec hl + lea de, iy - 4 + ld a, (de) + or a, (hl) + ld (de), a + pop de + dec hl + ld a, d + or a, (hl) + ld d, a + dec hl + ld a, e + or a, (hl) + ld e, a + dec hl + ld a, (iy - 1) + or a, (hl) + ld (iy - 1), a + pop hl + ld a, h + or a, (iy + 7) + ld h, a + ld a, l + or a, (iy + 6) + ld l, a + pop iy + ret diff --git a/src/std/shared/llor_fast.src b/src/std/shared/llor_fast.src new file mode 100644 index 000000000..0b15d7d30 --- /dev/null +++ b/src/std/shared/llor_fast.src @@ -0,0 +1,41 @@ + public __llor_fast +__llor_fast: +; CC: 56*r(PC)+19*r(SPL)+8*w(SPL)+1 + ld iy, 0 + add iy, sp + push hl + push de + lea hl, iy + 10 + ld a, b + or a, (hl) + ld b, a + dec hl + ld a, c + or a, (hl) + ld c, a + dec hl + lea de, iy - 4 + ld a, (de) + or a, (hl) + ld (de), a + pop de + dec hl + ld a, d + or a, (hl) + ld d, a + dec hl + ld a, e + or a, (hl) + ld e, a + dec hl + ld a, (iy - 1) + or a, (hl) + ld (iy - 1), a + pop hl + ld a, h + or a, (iy + 4) + ld h, a + ld a, l + or a, (iy + 3) + ld l, a + ret diff --git a/src/std/shared/llxor.src b/src/std/shared/llxor.src new file mode 100644 index 000000000..7d1f48432 --- /dev/null +++ b/src/std/shared/llxor.src @@ -0,0 +1,43 @@ + public __llxor +__llxor: +; CC: 60*r(PC)+22*r(SPL)+11*w(SPL)+1 + push iy + ld iy, 0 + add iy, sp + push hl + push de + lea hl, iy + 13 + ld a, b + xor a, (hl) + ld b, a + dec hl + ld a, c + xor a, (hl) + ld c, a + dec hl + lea de, iy - 4 + ld a, (de) + xor a, (hl) + ld (de), a + pop de + dec hl + ld a, d + xor a, (hl) + ld d, a + dec hl + ld a, e + xor a, (hl) + ld e, a + dec hl + ld a, (iy - 1) + xor a, (hl) + ld (iy - 1), a + pop hl + ld a, h + xor a, (iy + 7) + ld h, a + ld a, l + xor a, (iy + 6) + ld l, a + pop iy + ret diff --git a/src/std/shared/llxor_fast.src b/src/std/shared/llxor_fast.src new file mode 100644 index 000000000..cdae789b2 --- /dev/null +++ b/src/std/shared/llxor_fast.src @@ -0,0 +1,41 @@ + public __llxor_fast +__llxor_fast: +; CC: 56*r(PC)+19*r(SPL)+8*w(SPL)+1 + ld iy, 0 + add iy, sp + push hl + push de + lea hl, iy + 10 + ld a, b + xor a, (hl) + ld b, a + dec hl + ld a, c + xor a, (hl) + ld c, a + dec hl + lea de, iy - 4 + ld a, (de) + xor a, (hl) + ld (de), a + pop de + dec hl + ld a, d + xor a, (hl) + ld d, a + dec hl + ld a, e + xor a, (hl) + ld e, a + dec hl + ld a, (iy - 1) + xor a, (hl) + ld (iy - 1), a + pop hl + ld a, h + xor a, (iy + 4) + ld h, a + ld a, l + xor a, (iy + 3) + ld l, a + ret From c5ca508049f7b0d9020a6d731665bd69a66c900d Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Fri, 4 Jun 2021 13:03:17 -0400 Subject: [PATCH 026/142] Implement enough junk for my long long test program to link --- src/std/linked/linked.src | 4 + src/std/shared/lcmps_fast.src | 12 +++ src/std/shared/lcmpu_fast.src | 8 ++ src/std/shared/llcmpu.src | 53 +++++++++++ src/std/shared/llcmpu_fast.src | 53 +++++++++++ src/std/shared/lldivu_b_fast.src | 27 ++++++ src/std/shared/llmulu_b_fast.src | 63 +++++++++++++ src/std/shared/llshrs.src | 31 +++++++ src/std/shared/llshrs_fast.src | 150 +++++++++++++++++++++++++++++++ src/std/shared/printf.c | 4 +- src/std/shared/shared.src | 4 +- src/std/static/lcmps.src | 35 ++++++++ src/std/static/lcmpu.src | 9 ++ 13 files changed, 448 insertions(+), 5 deletions(-) create mode 100644 src/std/shared/lcmps_fast.src create mode 100644 src/std/shared/lcmpu_fast.src create mode 100644 src/std/shared/llcmpu.src create mode 100644 src/std/shared/llcmpu_fast.src create mode 100644 src/std/shared/lldivu_b_fast.src create mode 100644 src/std/shared/llmulu_b_fast.src create mode 100644 src/std/shared/llshrs.src create mode 100644 src/std/shared/llshrs_fast.src create mode 100644 src/std/static/lcmps.src create mode 100644 src/std/static/lcmpu.src diff --git a/src/std/linked/linked.src b/src/std/linked/linked.src index 24f8eb6df..c42267b98 100644 --- a/src/std/linked/linked.src +++ b/src/std/linked/linked.src @@ -86,6 +86,10 @@ __ixor := 000198h __ladd := 00019Ch public __land __land := 0001A4h + public __lcmps +__lcmps := 0001A8h + public __lcmpu +__lcmpu := 0001ACh public __lcmpzero __lcmpzero := 0001B0h public __lmuls diff --git a/src/std/shared/lcmps_fast.src b/src/std/shared/lcmps_fast.src new file mode 100644 index 000000000..3875dc17c --- /dev/null +++ b/src/std/shared/lcmps_fast.src @@ -0,0 +1,12 @@ + public __lcmps_fast +__lcmps_fast: + ld d, a + ld a, e + sub a, d + ret nz + sbc hl, bc + ret z + sbc a, a + ret nz + inc a + ret diff --git a/src/std/shared/lcmpu_fast.src b/src/std/shared/lcmpu_fast.src new file mode 100644 index 000000000..9158a1d37 --- /dev/null +++ b/src/std/shared/lcmpu_fast.src @@ -0,0 +1,8 @@ + public __lcmpu_fast +__lcmpu_fast: + sub a, e + ccf + ret nz + or a, a + sbc hl, bc + ret diff --git a/src/std/shared/llcmpu.src b/src/std/shared/llcmpu.src new file mode 100644 index 000000000..1a4cb64ba --- /dev/null +++ b/src/std/shared/llcmpu.src @@ -0,0 +1,53 @@ + public __llcmpu +__llcmpu: +if 0 + ld iy, 0 + add iy, sp + ld a, b + sub a, (iy + 10) + ret nz + ld a, c + sub a, (iy + 9) + ret nz + ex de, hl + ld bc, (iy + 6) + sbc hl, bc + ret nz + ex de, hl + ld bc, (iy + 3) + sbc hl, bc + ret +else + ld a, c + ld iyh, b + pop bc + or a, a + sbc hl, bc + ex de, hl + pop bc + pop de + jr nz, .ne24 + sbc hl, bc + jr nz, .ne48 + sbc a, e + jr nz, .ne56 + ld a, iyh + sbc a, d + ld hl, 9 + add hl, sp + ld sp, hl + ret +.ne24: + sbc hl, bc +.ne48: + sbc a, e +.ne56: + ld a, iyh + sbc a, d + ld hl, 9 + add hl, sp + ld sp, hl + ret nz + inc a + ret +end if diff --git a/src/std/shared/llcmpu_fast.src b/src/std/shared/llcmpu_fast.src new file mode 100644 index 000000000..1e6828f40 --- /dev/null +++ b/src/std/shared/llcmpu_fast.src @@ -0,0 +1,53 @@ + public __llcmpu_fast +__llcmpu_fast: +if 1 + ld iy, 0 + add iy, sp + ld a, b + sub a, (iy + 10) + ret nz + ld a, c + sub a, (iy + 9) + ret nz + ex de, hl + ld bc, (iy + 6) + sbc hl, bc + ret nz + ex de, hl + ld bc, (iy + 3) + sbc hl, bc + ret +else + ld a, c + ld iyh, b + pop bc + or a, a + sbc hl, bc + ex de, hl + pop bc + pop de + jr nz, .ne24 + sbc hl, bc + jr nz, .ne48 + sbc a, e + jr nz, .ne56 + ld a, iyh + sbc a, d + ld hl, 9 + add hl, sp + ld sp, hl + ret +.ne24: + sbc hl, bc +.ne48: + sbc a, e +.ne56: + ld a, iyh + sbc a, d + ld hl, 9 + add hl, sp + ld sp, hl + ret nz + inc a + ret +end if diff --git a/src/std/shared/lldivu_b_fast.src b/src/std/shared/lldivu_b_fast.src new file mode 100644 index 000000000..9c582f9e7 --- /dev/null +++ b/src/std/shared/lldivu_b_fast.src @@ -0,0 +1,27 @@ + public __lldivu +__lldivu: + public __lldivu_b_fast +__lldivu_b_fast: +; Not well-optimized + ld iy, 0 + add iy, sp + ld iy, (iy + 3) + ld iyh, 64 +.loop: + adc hl, hl + ex de, hl + adc hl, hl + ex de, hl + rl c + rl b + rla + jr c, .1 + cp a, iyl + jr nc, .0 +.1: + sub a, iyl + inc l +.0: + dec iyh + jr nz, .loop + ret diff --git a/src/std/shared/llmulu_b_fast.src b/src/std/shared/llmulu_b_fast.src new file mode 100644 index 000000000..75855ff97 --- /dev/null +++ b/src/std/shared/llmulu_b_fast.src @@ -0,0 +1,63 @@ + public __llmulu +__llmulu: + public __llmulu_b_fast +__llmulu_b_fast: + ld iy, 0 + add iy, sp + push de + push hl + ld a, (iy + 3) + ld h, a + mlt hl + ld (iy - 6), l + ld d, a + ld e, (iy - 5) + mlt de + ld l, h + ld h, 0 + add hl, de + ld (iy - 5), l + ld d, a + ld e, (iy - 4) + mlt de + ld l, h + ld h, 0 + add hl, de + ld (iy - 4), l + ld d, a + ld e, (iy - 3) + mlt de + ld l, h + ld h, 0 + add hl, de + ld (iy - 3), l + ld d, a + ld e, (iy - 2) + mlt de + ld l, h + ld h, 0 + add hl, de + ld (iy - 2), l + ld d, a + ld e, (iy - 1) + mlt de + ld l, h + ld h, 0 + add hl, de + ld (iy - 1), l + ld d, a + ld e, c + mlt de + ld l, h + ld h, 0 + add hl, de + ld c, l + ld d, a + ld e, b + mlt de + ld a, h + add a, e + ld b, a + pop hl + pop de + ret diff --git a/src/std/shared/llshrs.src b/src/std/shared/llshrs.src new file mode 100644 index 000000000..42b2fee83 --- /dev/null +++ b/src/std/shared/llshrs.src @@ -0,0 +1,31 @@ + public __llshrs +__llshrs: +; Not well-optimized, but it probably works + push iy + ld iy, 0 + add iy, sp + push af + ld a, c + ld c, b + ld b, (iy + 6) + inc b + dec b + jr z, .finish + push de + push hl +.loop: + sra c + rra + rr (iy - 1) + rr d + rr e + rr (iy - 4) + rr h + rr l + djnz .loop + ld b, c + ld c, a +.finish: + pop af + pop iy + ret diff --git a/src/std/shared/llshrs_fast.src b/src/std/shared/llshrs_fast.src new file mode 100644 index 000000000..d764af43a --- /dev/null +++ b/src/std/shared/llshrs_fast.src @@ -0,0 +1,150 @@ + public __llshrs_fast +__llshrs_fast: +; Not well-optimized, but it probably works + ld iy, 0 + add iy, sp + ld a, c + ld c, b + ld b, (iy + 3) + inc b + dec b + ret z + push de + push hl +.loop: + sra c + rra + rr (iy - 1) + rr d + rr e + rr (iy - 4) + rr h + rr l + djnz .loop + ld b, c + ld c, a + ret + + +if 0 + push bc + ld iy, 0 + add iy, sp + ld a, (iy + 6) + ld c, a + dec a + and a, 00111000b + ld (.byteshift_smc) + xor a, a + sub a, c + jr $+1 +.byteshift_smc := $-1 +.shr1_8: + ld c, b + ld iyh, a + push de + push hl + inc sp + ld a, l + pop hl + pop de + inc sp + inc sp +.shr1_8_loop: + add a, a + adc hl, hl + ex de, hl + adc hl, hl + ex de, hl + rl c + rl b + dec iyh + jr nz, .shr1_8_loop + +.shr9_16: + push de + push hl + inc sp + inc sp + ld a, h + pop hl + pop de + inc sp + +.shr17_24: + dec sp + push hl + inc sp + pop af + ex de, hl + ld e, c + ld d, b + +.shr25_32: + push de + inc sp + ld a, e + pop hl + ld e, b +.shr33_40: +.shr41_48: + rl b + pop bc + sbc hl, hl + ex de, hl + sbc hl, hl + ld l, c + ld h, b + and a, 00000111b + jr z, .shr48 + +.shr49_56_loop: + add a, a + adc hl, hl + djnz .shr49_56_loop +.shr48: + +.shr49_56: + rl b + pop bc + sbc hl, hl + ex de, hl + sbc hl, hl + ld l, b + and a, 00000111b + jr z, .shr56 + ld b, a + ld a, c +.shr49_56_loop: + add a, a + adc hl, hl + djnz .shr49_56_loop +.shr56: + ld c, d + ld b, c + ret + +.shr57_63_0: + and a, 00000111b + jr z, .shr0 +.shr57_63: + rl b + pop bc + sbc hl, hl + ex de, hl + sbc hl, hl + ld l, b + ld b, a +.shr57_63_loop: + add hl, hl + djnz .shr57_63_loop + ld l, h + ld h, e + ld c, d + ld b, c + ret + +.shr0: + pop bc + ret +end if \ No newline at end of file diff --git a/src/std/shared/printf.c b/src/std/shared/printf.c index a62113d8a..9ee319ee3 100644 --- a/src/std/shared/printf.c +++ b/src/std/shared/printf.c @@ -38,8 +38,8 @@ #include // Exclude these from printf support because no one uses them -#define PRINTF_DISABLE_SUPPORT_PTRDIFF_T -#define PRINTF_DISABLE_SUPPORT_LONG_LONG +// #define PRINTF_DISABLE_SUPPORT_PTRDIFF_T +// #define PRINTF_DISABLE_SUPPORT_LONG_LONG // 'ntoa' conversion buffer size, this must be big enough to hold one converted // numeric number including padded zeros (dynamically created on stack) diff --git a/src/std/shared/shared.src b/src/std/shared/shared.src index 68e326bfb..107837de5 100644 --- a/src/std/shared/shared.src +++ b/src/std/shared/shared.src @@ -1,4 +1,4 @@ - public __bldiy, __bshl, __bshru, __bstix, __bstiy, __case, __case16, __case16D, __case24, __case24D, __case8, __case8D, __fadd, __fcmp, __fdiv, __fmul, __fppack, __frbtof, __frftob, __frftoi, __frftos, __frftoub, __frftoui, __frftous, __fritof, __frstof, __frubtof, __fruitof, __frustof, __fsub, __ftol, __idivs, __idivu, __idvrmu, __ildix, __ildiy, __imul_b, __indcall, __ineg, __inot, __irems, __iremu, __ishl, __ishl_b, __ishrs, __ishrs_b, __ishru, __ishru_b, __istix, __istiy, __itol, __ladd_b, __lcmps, __lcmpu, __ldivs, __ldivu, __ldvrmu, __lldix, __lldiy, __lneg, __lnot, __lrems, __lremu, __lshl, __lshrs, __lshru, __lstix, __lstiy, __ltof, __sdivs, __sdivu, __seqcase, __seqcaseD, __setflag, __sldix, __sldiy, __sneg, __snot, __srems, __sremu, __sshl, __sshl_b, __sshrs, __sshrs_b, __sshru, __sshru_b, __sstix, __sstiy, __stoi, __stoiu, __ultof, _longjmp, _memchr, _memcmp, _memcpy, _memmove, _setjmp, ___sprintf, _sqrtf, _strcasecmp, _strcat, _strchr, _strcmp, _strcpy, _strcspn, _strlen, _strncat, _strncmp, _strncpy, _strpbrk, _strrchr, _strspn, _strstr, _strtok, _tolower, _toupper + public __bldiy, __bshl, __bshru, __bstix, __bstiy, __case, __case16, __case16D, __case24, __case24D, __case8, __case8D, __fadd, __fcmp, __fdiv, __fmul, __fppack, __frbtof, __frftob, __frftoi, __frftos, __frftoub, __frftoui, __frftous, __fritof, __frstof, __frubtof, __fruitof, __frustof, __fsub, __ftol, __idivs, __idivu, __idvrmu, __ildix, __ildiy, __imul_b, __indcall, __ineg, __inot, __irems, __iremu, __ishl, __ishl_b, __ishrs, __ishrs_b, __ishru, __ishru_b, __istix, __istiy, __itol, __ladd_b, __ldivs, __ldivu, __ldvrmu, __lldix, __lldiy, __lneg, __lnot, __lrems, __lremu, __lshl, __lshrs, __lshru, __lstix, __lstiy, __ltof, __sdivs, __sdivu, __seqcase, __seqcaseD, __setflag, __sldix, __sldiy, __sneg, __snot, __srems, __sremu, __sshl, __sshl_b, __sshrs, __sshrs_b, __sshru, __sshru_b, __sstix, __sstiy, __stoi, __stoiu, __ultof, _longjmp, _memchr, _memcmp, _memcpy, _memmove, _setjmp, ___sprintf, _sqrtf, _strcasecmp, _strcat, _strchr, _strcmp, _strcpy, _strcspn, _strlen, _strncat, _strncmp, _strncpy, _strpbrk, _strrchr, _strspn, _strstr, _strtok, _tolower, _toupper __bldiy := 0000FCh __bshl := 000100h __bshru := 000104h @@ -51,8 +51,6 @@ __istix := 00018Ch __istiy := 000190h __itol := 000194h __ladd_b := 0001A0h -__lcmps := 0001A8h -__lcmpu := 0001ACh __ldivs := 0001B4h __ldivu := 0001B8h __ldvrmu := 0001BCh diff --git a/src/std/static/lcmps.src b/src/std/static/lcmps.src new file mode 100644 index 000000000..3983a59a5 --- /dev/null +++ b/src/std/static/lcmps.src @@ -0,0 +1,35 @@ + public __lcmps +__lcmps: +if 1 + push de + ld d, a + ld a, e + sub a, d + jr nz, .finish + sbc hl, bc + add hl, bc + jr z, .finish + sbc a, a + scf + adc a, a +.finish: + ld a, d + pop de + ret +else + push hl + or a, a + sbc hl, bc + ld l, a + ld a, e + jr z, .eq24 + sbc a, l + jr nz, .finish + inc a +.eq24: + sbc a, l +.finish: + ld a, l + pop hl + ret +end if diff --git a/src/std/static/lcmpu.src b/src/std/static/lcmpu.src new file mode 100644 index 000000000..5abcc3a44 --- /dev/null +++ b/src/std/static/lcmpu.src @@ -0,0 +1,9 @@ + public __lcmpu +__lcmpu: + cp a, e + ccf + ret nz + or a, a + sbc hl, bc + add hl, bc + ret From ba8d751343f3da876efb4a7ed059964142164949 Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Fri, 4 Jun 2021 13:04:17 -0400 Subject: [PATCH 027/142] Fix __llsub --- src/std/shared/llsub.src | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/std/shared/llsub.src b/src/std/shared/llsub.src index ddb79d928..7005ae965 100644 --- a/src/std/shared/llsub.src +++ b/src/std/shared/llsub.src @@ -10,9 +10,9 @@ __llsub: ld bc, (iy + 9) sbc hl, bc ex de, hl - ex (sp),hl + ex (sp), hl ld bc, (iy + 12) - adc hl, bc + sbc hl, bc ld c, l ld b, h pop hl From e7d9047fda693ae3c8f5de2f3995024022ac90ce Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Fri, 4 Jun 2021 16:04:15 -0400 Subject: [PATCH 028/142] Fix __lldivu_b and __llmulu_b --- .../shared/{lldivu_b_fast.src => lldivu_b.src} | 16 ++++++++++------ .../shared/{llmulu_b_fast.src => llmulu_b.src} | 10 +++++++--- 2 files changed, 17 insertions(+), 9 deletions(-) rename src/std/shared/{lldivu_b_fast.src => lldivu_b.src} (61%) rename src/std/shared/{llmulu_b_fast.src => llmulu_b.src} (88%) diff --git a/src/std/shared/lldivu_b_fast.src b/src/std/shared/lldivu_b.src similarity index 61% rename from src/std/shared/lldivu_b_fast.src rename to src/std/shared/lldivu_b.src index 9c582f9e7..33e001568 100644 --- a/src/std/shared/lldivu_b_fast.src +++ b/src/std/shared/lldivu_b.src @@ -1,14 +1,16 @@ public __lldivu __lldivu: - public __lldivu_b_fast -__lldivu_b_fast: -; Not well-optimized + public __lldivu_b +__lldivu_b: +; Very unoptimized + push af + push iy ld iy, 0 add iy, sp - ld iy, (iy + 3) + ld iy, (iy + 9) ld iyh, 64 .loop: - adc hl, hl + add hl, hl ex de, hl adc hl, hl ex de, hl @@ -17,11 +19,13 @@ __lldivu_b_fast: rla jr c, .1 cp a, iyl - jr nc, .0 + jr c, .0 .1: sub a, iyl inc l .0: dec iyh jr nz, .loop + pop iy + pop af ret diff --git a/src/std/shared/llmulu_b_fast.src b/src/std/shared/llmulu_b.src similarity index 88% rename from src/std/shared/llmulu_b_fast.src rename to src/std/shared/llmulu_b.src index 75855ff97..86ad43aa6 100644 --- a/src/std/shared/llmulu_b_fast.src +++ b/src/std/shared/llmulu_b.src @@ -1,12 +1,14 @@ public __llmulu __llmulu: - public __llmulu_b_fast -__llmulu_b_fast: + public __llmulu_b +__llmulu_b: + push af + push iy ld iy, 0 add iy, sp push de push hl - ld a, (iy + 3) + ld a, (iy + 9) ld h, a mlt hl ld (iy - 6), l @@ -60,4 +62,6 @@ __llmulu_b_fast: ld b, a pop hl pop de + pop iy + pop af ret From 9f0220c5af45b3da7683324c6c27bc75152898fc Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Fri, 4 Jun 2021 16:42:14 -0400 Subject: [PATCH 029/142] Fix __llcmpu and __llcmpzero --- src/std/shared/llcmpu.src | 62 ++++++++++------------------------ src/std/shared/llcmpu_fast.src | 35 ------------------- src/std/shared/llcmpzero.src | 3 +- 3 files changed, 18 insertions(+), 82 deletions(-) diff --git a/src/std/shared/llcmpu.src b/src/std/shared/llcmpu.src index 1a4cb64ba..7e4373bf8 100644 --- a/src/std/shared/llcmpu.src +++ b/src/std/shared/llcmpu.src @@ -1,53 +1,25 @@ public __llcmpu __llcmpu: -if 0 + push iy ld iy, 0 add iy, sp - ld a, b - sub a, (iy + 10) - ret nz - ld a, c - sub a, (iy + 9) - ret nz - ex de, hl + push hl + ld hl, (iy + 12) + sbc.s hl, bc + jr nz, .bc_ne + ld hl, (iy + 9) + sbc hl, de + jr nz, .ude_ne + pop hl + push bc ld bc, (iy + 6) sbc hl, bc - ret nz - ex de, hl - ld bc, (iy + 3) - sbc hl, bc - ret -else - ld a, c - ld iyh, b - pop bc - or a, a - sbc hl, bc - ex de, hl pop bc - pop de - jr nz, .ne24 - sbc hl, bc - jr nz, .ne48 - sbc a, e - jr nz, .ne56 - ld a, iyh - sbc a, d - ld hl, 9 - add hl, sp - ld sp, hl - ret -.ne24: - sbc hl, bc -.ne48: - sbc a, e -.ne56: - ld a, iyh - sbc a, d - ld hl, 9 - add hl, sp - ld sp, hl - ret nz - inc a + pop iy ret -end if +.bc_ne: +.ude_ne: + ccf + pop hl + pop iy + ret \ No newline at end of file diff --git a/src/std/shared/llcmpu_fast.src b/src/std/shared/llcmpu_fast.src index 1e6828f40..5d8f97c44 100644 --- a/src/std/shared/llcmpu_fast.src +++ b/src/std/shared/llcmpu_fast.src @@ -1,6 +1,5 @@ public __llcmpu_fast __llcmpu_fast: -if 1 ld iy, 0 add iy, sp ld a, b @@ -17,37 +16,3 @@ if 1 ld bc, (iy + 3) sbc hl, bc ret -else - ld a, c - ld iyh, b - pop bc - or a, a - sbc hl, bc - ex de, hl - pop bc - pop de - jr nz, .ne24 - sbc hl, bc - jr nz, .ne48 - sbc a, e - jr nz, .ne56 - ld a, iyh - sbc a, d - ld hl, 9 - add hl, sp - ld sp, hl - ret -.ne24: - sbc hl, bc -.ne48: - sbc a, e -.ne56: - ld a, iyh - sbc a, d - ld hl, 9 - add hl, sp - ld sp, hl - ret nz - inc a - ret -end if diff --git a/src/std/shared/llcmpzero.src b/src/std/shared/llcmpzero.src index d70c9e707..77a81340d 100644 --- a/src/std/shared/llcmpzero.src +++ b/src/std/shared/llcmpzero.src @@ -15,13 +15,12 @@ __llcmpzero: dec bc ret .b_nz: - dec b ret po dec b inc b ret .c_nz: ret p + inc c dec c - inc bc ret From 1217ded0d2186b8106134b425f3fc123beb287d3 Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Fri, 4 Jun 2021 17:27:15 -0400 Subject: [PATCH 030/142] Optimize __bpopcnt, __spopcnt, __ipopcnt, and __lpopcnt --- src/std/shared/bpopcnt.src | 1 - src/std/shared/ipopcnt.src | 12 +++++++----- src/std/shared/lpopcnt.src | 22 ++++++++++++---------- src/std/shared/spopcnt.src | 2 +- 4 files changed, 20 insertions(+), 17 deletions(-) diff --git a/src/std/shared/bpopcnt.src b/src/std/shared/bpopcnt.src index 05e98881e..b2020fe65 100644 --- a/src/std/shared/bpopcnt.src +++ b/src/std/shared/bpopcnt.src @@ -24,4 +24,3 @@ __bpopcnt: and a,$f ; a=A+B+C+D+E+F+G+H pop bc ret - diff --git a/src/std/shared/ipopcnt.src b/src/std/shared/ipopcnt.src index f97573931..c0c551afe 100644 --- a/src/std/shared/ipopcnt.src +++ b/src/std/shared/ipopcnt.src @@ -4,17 +4,19 @@ __ipopcnt: push hl ld a,l - call __bpopcnt + call __bpopcnt ld l,a ld a,h call __bpopcnt - ld h,a - call $21D5C + add a,l + ld l,a + dec sp + push hl + inc sp + pop af call __bpopcnt add a,l - add a,h pop hl ret extern __bpopcnt - diff --git a/src/std/shared/lpopcnt.src b/src/std/shared/lpopcnt.src index 92a2dd11c..b11334943 100644 --- a/src/std/shared/lpopcnt.src +++ b/src/std/shared/lpopcnt.src @@ -2,23 +2,25 @@ public __lpopcnt __lpopcnt: - push hl,de + push hl ld a,l - call __bpopcnt + call __bpopcnt ld l,a ld a,h call __bpopcnt - ld h,a - ld a,e + add a,l + ld l,a + dec sp + push hl + inc sp + pop af call __bpopcnt - ld e,a - call $21D5C + add a,l + ld l,a + ld a,e call __bpopcnt - add a,e add a,l - add a,h - pop de,hl + pop hl ret extern __bpopcnt - diff --git a/src/std/shared/spopcnt.src b/src/std/shared/spopcnt.src index a66cd0c1b..0fe0b3fce 100644 --- a/src/std/shared/spopcnt.src +++ b/src/std/shared/spopcnt.src @@ -4,7 +4,7 @@ __spopcnt: push hl ld a,l - call __bpopcnt + call __bpopcnt ld l,a ld a,h call __bpopcnt From e2389186355ca3c5578bf601b8be5053655e9a22 Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Fri, 4 Jun 2021 17:27:34 -0400 Subject: [PATCH 031/142] Implement __llpopcnt --- src/std/shared/llpopcnt.src | 45 +++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 src/std/shared/llpopcnt.src diff --git a/src/std/shared/llpopcnt.src b/src/std/shared/llpopcnt.src new file mode 100644 index 000000000..49e99fe37 --- /dev/null +++ b/src/std/shared/llpopcnt.src @@ -0,0 +1,45 @@ + assume adl=1 + + public __llpopcnt +__llpopcnt: + push hl + ld a, l + call __bpopcnt + ld l, a + ld a, h + call __bpopcnt + add a, l + ld l, a + dec sp + push hl + inc sp + pop af + call __bpopcnt + add a, l + ld l, a + ld a, e + call __bpopcnt + add a, l + ld l, a + ld a, d + call __bpopcnt + add a, l + ld l, a + dec sp + push de + inc sp + pop af + call __bpopcnt + add a, l + ld l, a + ld a, c + call __bpopcnt + add a, l + ld l, a + ld a, b + call __bpopcnt + add a, l + pop hl + ret + + extern __bpopcnt From 9deabc121cd53f0c35a35fffba70459d76e28938 Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Fri, 4 Jun 2021 18:27:33 -0400 Subject: [PATCH 032/142] Reimagine more optimized versions of __spopcnt, __ipopcnt, __lpopcnt, and __llpopcnt --- src/std/shared/ipopcnt.src | 38 +++++++++++++++----------- src/std/shared/llpopcnt.src | 47 +++++++------------------------- src/std/shared/lpopcnt.src | 43 ++++++++++++++++------------- src/std/shared/spopcnt_fast.src | 48 +++++++++++++++++++++++++++++++++ 4 files changed, 105 insertions(+), 71 deletions(-) create mode 100644 src/std/shared/spopcnt_fast.src diff --git a/src/std/shared/ipopcnt.src b/src/std/shared/ipopcnt.src index c0c551afe..be9ec3b9b 100644 --- a/src/std/shared/ipopcnt.src +++ b/src/std/shared/ipopcnt.src @@ -2,21 +2,29 @@ public __ipopcnt __ipopcnt: + push bc push hl - ld a,l - call __bpopcnt - ld l,a - ld a,h - call __bpopcnt - add a,l - ld l,a - dec sp - push hl - inc sp - pop af - call __bpopcnt - add a,l + xor a, a + ld c, a + ld b, 3 +.loop: + add hl, hl + adc a, c + add hl, hl + adc a, c + add hl, hl + adc a, c + add hl, hl + adc a, c + add hl, hl + adc a, c + add hl, hl + adc a, c + add hl, hl + adc a, c + add hl, hl + adc a, c + djnz .loop pop hl + pop bc ret - - extern __bpopcnt diff --git a/src/std/shared/llpopcnt.src b/src/std/shared/llpopcnt.src index 49e99fe37..363db5754 100644 --- a/src/std/shared/llpopcnt.src +++ b/src/std/shared/llpopcnt.src @@ -2,44 +2,17 @@ public __llpopcnt __llpopcnt: - push hl - ld a, l - call __bpopcnt - ld l, a - ld a, h - call __bpopcnt - add a, l - ld l, a - dec sp - push hl - inc sp - pop af - call __bpopcnt - add a, l - ld l, a - ld a, e - call __bpopcnt - add a, l - ld l, a - ld a, d - call __bpopcnt - add a, l - ld l, a - dec sp + call __lpopcnt push de - inc sp - pop af - call __bpopcnt - add a, l - ld l, a - ld a, c - call __bpopcnt - add a, l - ld l, a - ld a, b - call __bpopcnt - add a, l + push hl + ex de, hl + ld l, b + ld e, c + ld d, a + call __lpopcnt + add a, d pop hl + pop de ret - extern __bpopcnt + extern __lpopcnt diff --git a/src/std/shared/lpopcnt.src b/src/std/shared/lpopcnt.src index b11334943..225f09725 100644 --- a/src/std/shared/lpopcnt.src +++ b/src/std/shared/lpopcnt.src @@ -2,25 +2,30 @@ public __lpopcnt __lpopcnt: + push bc push hl - ld a,l - call __bpopcnt - ld l,a - ld a,h - call __bpopcnt - add a,l - ld l,a - dec sp - push hl - inc sp - pop af - call __bpopcnt - add a,l - ld l,a - ld a,e - call __bpopcnt - add a,l + xor a, a + ld c, a + ld b, 4 +.loop: + add hl, hl + adc a, c + add hl, hl + adc a, c + add hl, hl + adc a, c + add hl, hl + adc a, c + add hl, hl + adc a, c + add hl, hl + adc a, c + add hl, hl + adc a, c + add hl, hl + adc a, c + ld l, e + djnz .loop pop hl + pop bc ret - - extern __bpopcnt diff --git a/src/std/shared/spopcnt_fast.src b/src/std/shared/spopcnt_fast.src new file mode 100644 index 000000000..57d75e86e --- /dev/null +++ b/src/std/shared/spopcnt_fast.src @@ -0,0 +1,48 @@ + assume adl=1 + + public __spopcnt_fast +__spopcnt_fast: + push hl + xor a, a + add hl, hl + add hl, hl + add hl, hl + add hl, hl + add hl, hl + add hl, hl + add hl, hl + add hl, hl + add hl, hl + adc a, l + add hl, hl + adc a, l + add hl, hl + adc a, l + add hl, hl + adc a, l + add hl, hl + adc a, l + add hl, hl + adc a, l + add hl, hl + adc a, l + add hl, hl + adc a, l + add hl, hl + adc a, l + add hl, hl + adc a, l + add hl, hl + adc a, l + add hl, hl + adc a, l + add hl, hl + adc a, l + add hl, hl + adc a, l + add hl, hl + adc a, l + add hl, hl + adc a, l + pop hl + ret From 10545ba9b6bbccfbdb9b7d00fd2dc5a34ab16270 Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Fri, 4 Jun 2021 18:42:15 -0400 Subject: [PATCH 033/142] Hijack __lpopcnt from __ipopcnt --- src/std/shared/ipopcnt.src | 27 +++------------------------ src/std/shared/lpopcnt.src | 4 +++- 2 files changed, 6 insertions(+), 25 deletions(-) diff --git a/src/std/shared/ipopcnt.src b/src/std/shared/ipopcnt.src index be9ec3b9b..17c123493 100644 --- a/src/std/shared/ipopcnt.src +++ b/src/std/shared/ipopcnt.src @@ -3,28 +3,7 @@ public __ipopcnt __ipopcnt: push bc - push hl - xor a, a - ld c, a ld b, 3 -.loop: - add hl, hl - adc a, c - add hl, hl - adc a, c - add hl, hl - adc a, c - add hl, hl - adc a, c - add hl, hl - adc a, c - add hl, hl - adc a, c - add hl, hl - adc a, c - add hl, hl - adc a, c - djnz .loop - pop hl - pop bc - ret + jp __lpopcnt.hijack1 + + extern __lpopcnt.hijack1 diff --git a/src/std/shared/lpopcnt.src b/src/std/shared/lpopcnt.src index 225f09725..ace5cc86b 100644 --- a/src/std/shared/lpopcnt.src +++ b/src/std/shared/lpopcnt.src @@ -3,10 +3,12 @@ public __lpopcnt __lpopcnt: push bc + ld b, 4 + public __lpopcnt.hijack1 +.hijack1: push hl xor a, a ld c, a - ld b, 4 .loop: add hl, hl adc a, c From 3a22ff5d964a7e674d5cd61f25c2da081a35b6fb Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Sat, 5 Jun 2021 12:34:52 -0400 Subject: [PATCH 034/142] Fix and optimize __sbitrev, __ibitrev, and __lbitrev --- src/std/shared/ibitrev.src | 22 +++++---- src/std/shared/ibitrev_fast.src | 61 +++++++++++++++++++++++++ src/std/shared/internal_bitrev_byte.src | 21 +++++++++ src/std/shared/lbitrev.src | 25 +++++----- src/std/shared/sbitrev.src | 17 ++++--- 5 files changed, 115 insertions(+), 31 deletions(-) create mode 100644 src/std/shared/ibitrev_fast.src create mode 100644 src/std/shared/internal_bitrev_byte.src diff --git a/src/std/shared/ibitrev.src b/src/std/shared/ibitrev.src index 59b5c7a05..5fe47e9fc 100644 --- a/src/std/shared/ibitrev.src +++ b/src/std/shared/ibitrev.src @@ -3,16 +3,18 @@ public __ibitrev __ibitrev: push af - ld a,l - call __bbitrev - ld l,a - ld a,h - call __bbitrev - ld h,a - call $21D5C - call __bbitrev - call $21D70 + ld a, h ; a=H + ld h, l + ld l, a ; uhl=[HLU]LH + call __internal_bitrev_byte ; a=HLU', uhl=LH0 + ld l, a ; uhl=LH[HLU'] + call __internal_bitrev_byte ; a=L', uhl=H[HLU']0 + ld l, h + ld h, a ; uhl=HL'[HLU'] + call __internal_bitrev_byte ; a=H', uhl=L'[HLU']0 + ld l, h + ld h, a ; uhl=L'H'[HLU'] pop af ret - extern __bbitrev + extern __internal_bitrev_byte diff --git a/src/std/shared/ibitrev_fast.src b/src/std/shared/ibitrev_fast.src new file mode 100644 index 000000000..0c64c4529 --- /dev/null +++ b/src/std/shared/ibitrev_fast.src @@ -0,0 +1,61 @@ + assume adl=1 + + public __ibitrev_fast +__ibitrev_fast: + add hl, hl + rra + add hl, hl + rra + add hl, hl + rra + add hl, hl + rra + add hl, hl + rra + add hl, hl + rra + add hl, hl + rra + add hl, hl + rra + ld c, a + add hl, hl + rra + add hl, hl + rra + add hl, hl + rra + add hl, hl + rra + add hl, hl + rra + add hl, hl + rra + add hl, hl + rra + add hl, hl + rra + ld b, a + add hl, hl + rra + add hl, hl + rra + add hl, hl + rra + add hl, hl + rra + add hl, hl + rra + add hl, hl + rra + add hl, hl + rra + add hl, hl + rra + push af + dec sp + pop hl + inc sp + ld l, c + ld h, b + ret diff --git a/src/std/shared/internal_bitrev_byte.src b/src/std/shared/internal_bitrev_byte.src new file mode 100644 index 000000000..c551aaa5b --- /dev/null +++ b/src/std/shared/internal_bitrev_byte.src @@ -0,0 +1,21 @@ + assume adl=1 + + public __internal_bitrev_byte +__internal_bitrev_byte: + add hl, hl + rra + add hl, hl + rra + add hl, hl + rra + add hl, hl + rra + add hl, hl + rra + add hl, hl + rra + add hl, hl + rra + add hl, hl + rra + ret diff --git a/src/std/shared/lbitrev.src b/src/std/shared/lbitrev.src index 7acab2fde..974504460 100644 --- a/src/std/shared/lbitrev.src +++ b/src/std/shared/lbitrev.src @@ -3,20 +3,17 @@ public __lbitrev __lbitrev: push af - ld a,l - call __bbitrev - ld l,a - ld a,h - call __bbitrev - ld h,a - call $21D5C - call __bbitrev - call $21D70 - ld a,e - call __bbitrev - ld e,a + call __internal_bitrev_byte ; a=HLU', UHL=HL0 + ld l, e ; uhl=HLE + ld e, a ; e=HLU' + call __internal_bitrev_byte ; a=H', uhl=LE0 + ld l, a ; uhl=LEH' + call __internal_bitrev_byte ; a=L', uhl=EH'0 + ld l, e ; uhl=EH'HLU' + ld e, a ; e=L' + call __internal_bitrev_byte ; a=E', uhl=H'HLU' + ld l, a ; uhl=H'HLU'E' pop af ret - extern __bbitrev - + extern __internal_bitrev_byte diff --git a/src/std/shared/sbitrev.src b/src/std/shared/sbitrev.src index 629907040..de2cc390c 100644 --- a/src/std/shared/sbitrev.src +++ b/src/std/shared/sbitrev.src @@ -3,13 +3,16 @@ public __sbitrev __sbitrev: push af - ld a,l - call __bbitrev - ld l,a - ld a,h - call __bbitrev - ld h,a + push hl + dec sp + pop hl + inc sp ; uhl=HL? + call __internal_bitrev_byte ; a=H', uhl=L?0 + ld l, a ; uhl=L?H' + call __internal_bitrev_byte ; a=L', hl=H'0 + ld l, h + ld h, a ; hl=L'H' pop af ret - extern __bbitrev + extern __internal_bitrev_byte From feeaf4eb90d89f1675d014004dfcf6223bd468bd Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Sat, 5 Jun 2021 12:49:20 -0400 Subject: [PATCH 035/142] Implement __llbitrev --- src/std/shared/llbitrev.src | 41 +++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 src/std/shared/llbitrev.src diff --git a/src/std/shared/llbitrev.src b/src/std/shared/llbitrev.src new file mode 100644 index 000000000..b4a7010a2 --- /dev/null +++ b/src/std/shared/llbitrev.src @@ -0,0 +1,41 @@ + assume adl=1 + + public __llbitrev +__llbitrev: + push af ; stack: 0AF + call __internal_bitrev_byte ; a=HLU', UHL=HL0 + ld l, b ; uhl=HLB + push af ; stack: 0AF0[HLU']? + call __internal_bitrev_byte ; a=H', uhl=LB0 + ld l, c ; uhl=LBC + ld c, a ; c=H' + call __internal_bitrev_byte ; a=L', uhl=BC0 + ld b, a ; bc=L'H' + push de ; stack: 0AF0[HLU']?[DEU]DE + inc sp ; stack: 0AF0[HLU']?[DEU]D + pop af ; a=DEU + ; stack: 0AF0[HLU'] + ld l, a ; uhl=BC[DEU] + call __internal_bitrev_byte ; a=B', uhl=C[DEU]0 + ld l, d ; uhl=C[DEU]D + ld d, a ; d=B' + call __internal_bitrev_byte ; a=C', uhl=[DEU]D0 + ld l, e ; uhl=[DEU]DE + ld e, a ; e=C' + call __internal_bitrev_byte ; a=DEU', uhl=DE0 + ld l, a ; uhl=DE[DEU'] + call __internal_bitrev_byte ; a=D', uhl=E[DEU']0 + ld l, e ; uhl=E[DEU']C' + push af ; stack: 0AF0[HLU']0D'? + inc sp ; stack: 0AF0[HLU']0D' + call __internal_bitrev_byte ; a=E', uhl=[DEU']C'0 + ld l, d ; uhl=[DEU]'C'B' + pop de ; ude=[HLU']0D' + ; stack: 0AF0 + inc sp ; stack: 0AF + ld d, a ; ude=[HLU']E'D' + pop af ; af=AF + ; stack: + ret + + extern __internal_bitrev_byte From c33d063ab29fb102de9bb42f2b424b83e3f3766e Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Sat, 5 Jun 2021 14:06:54 -0400 Subject: [PATCH 036/142] Optimize __snot, __inot, and __lnot --- src/std/linked/linked.src | 6 ++++++ src/std/shared/inot_fast.src | 12 ++++++++++++ src/std/shared/lnot_fast.src | 15 +++++++++++++++ src/std/shared/shared.src | 5 +---- src/std/shared/snot_fast.src | 4 ++++ src/std/static/inot.src | 24 ++++++++++++++++++++++++ src/std/static/lnot.src | 19 +++++++++++++++++++ src/std/static/snot.src | 4 ++++ 8 files changed, 85 insertions(+), 4 deletions(-) create mode 100644 src/std/shared/inot_fast.src create mode 100644 src/std/shared/lnot_fast.src create mode 100644 src/std/shared/snot_fast.src create mode 100644 src/std/static/inot.src create mode 100644 src/std/static/lnot.src create mode 100644 src/std/static/snot.src diff --git a/src/std/linked/linked.src b/src/std/linked/linked.src index c42267b98..a85ffc553 100644 --- a/src/std/linked/linked.src +++ b/src/std/linked/linked.src @@ -62,6 +62,8 @@ __scmpzero := 000204h __smuls := 000224h public __smulu __smulu := 000228h + public __snor +__snot := 000230h public __sor __sor := 000234h public __sxor @@ -76,6 +78,8 @@ __icmpzero := 000138h __imuls := 000154h public __imulu __imulu := 000158h + public __inot +__inot := 000164h public __ior __ior := 000168h public __ixor @@ -96,6 +100,8 @@ __lcmpzero := 0001B0h __lmuls := 0001C8h public __lmulu __lmulu := 0001CCh + public __lnot +__lnot := 0001D4h public __lor __lor := 0001D8h public __lsub diff --git a/src/std/shared/inot_fast.src b/src/std/shared/inot_fast.src new file mode 100644 index 000000000..b8b743557 --- /dev/null +++ b/src/std/shared/inot_fast.src @@ -0,0 +1,12 @@ + assume adl=1 + + public __inot_fast +__inot_fast: + add hl, de ; uhl=UHL+UDE + ex de, hl ; ude=UHL+UDE, uhl=UDE + scf + sbc hl, de ; uhl=UDE-(UHL+UDE)-1 + ; =UDE-UHL-UDE-1 + ; =-UHL-1 + ; =~UHL + ret diff --git a/src/std/shared/lnot_fast.src b/src/std/shared/lnot_fast.src new file mode 100644 index 000000000..2e23cc56e --- /dev/null +++ b/src/std/shared/lnot_fast.src @@ -0,0 +1,15 @@ + assume adl=1 + + public __lnot_fast +__lnot_fast: + ld a, e ; a=E + add hl, de ; uhl=UHL+UDE + ex de, hl ; ude=UHL+UDE, uhl=UDE + scf + sbc hl, de ; uhl=UDE-(UHL+UDE)-1 + ; =UDE-UHL-UDE-1 + ; =-UHL-1 + ; =~UHL + cpl ; a=~E + ld e, a ; e=~E + ret diff --git a/src/std/shared/shared.src b/src/std/shared/shared.src index 107837de5..aa49b5170 100644 --- a/src/std/shared/shared.src +++ b/src/std/shared/shared.src @@ -1,4 +1,4 @@ - public __bldiy, __bshl, __bshru, __bstix, __bstiy, __case, __case16, __case16D, __case24, __case24D, __case8, __case8D, __fadd, __fcmp, __fdiv, __fmul, __fppack, __frbtof, __frftob, __frftoi, __frftos, __frftoub, __frftoui, __frftous, __fritof, __frstof, __frubtof, __fruitof, __frustof, __fsub, __ftol, __idivs, __idivu, __idvrmu, __ildix, __ildiy, __imul_b, __indcall, __ineg, __inot, __irems, __iremu, __ishl, __ishl_b, __ishrs, __ishrs_b, __ishru, __ishru_b, __istix, __istiy, __itol, __ladd_b, __ldivs, __ldivu, __ldvrmu, __lldix, __lldiy, __lneg, __lnot, __lrems, __lremu, __lshl, __lshrs, __lshru, __lstix, __lstiy, __ltof, __sdivs, __sdivu, __seqcase, __seqcaseD, __setflag, __sldix, __sldiy, __sneg, __snot, __srems, __sremu, __sshl, __sshl_b, __sshrs, __sshrs_b, __sshru, __sshru_b, __sstix, __sstiy, __stoi, __stoiu, __ultof, _longjmp, _memchr, _memcmp, _memcpy, _memmove, _setjmp, ___sprintf, _sqrtf, _strcasecmp, _strcat, _strchr, _strcmp, _strcpy, _strcspn, _strlen, _strncat, _strncmp, _strncpy, _strpbrk, _strrchr, _strspn, _strstr, _strtok, _tolower, _toupper + public __bldiy, __bshl, __bshru, __bstix, __bstiy, __case, __case16, __case16D, __case24, __case24D, __case8, __case8D, __fadd, __fcmp, __fdiv, __fmul, __fppack, __frbtof, __frftob, __frftoi, __frftos, __frftoub, __frftoui, __frftous, __fritof, __frstof, __frubtof, __fruitof, __frustof, __fsub, __ftol, __idivs, __idivu, __idvrmu, __ildix, __ildiy, __imul_b, __indcall, __ineg, __irems, __iremu, __ishl, __ishl_b, __ishrs, __ishrs_b, __ishru, __ishru_b, __istix, __istiy, __itol, __ladd_b, __ldivs, __ldivu, __ldvrmu, __lldix, __lldiy, __lneg, __lrems, __lremu, __lshl, __lshrs, __lshru, __lstix, __lstiy, __ltof, __sdivs, __sdivu, __seqcase, __seqcaseD, __setflag, __sldix, __sldiy, __sneg, __srems, __sremu, __sshl, __sshl_b, __sshrs, __sshrs_b, __sshru, __sshru_b, __sstix, __sstiy, __stoi, __stoiu, __ultof, _longjmp, _memchr, _memcmp, _memcpy, _memmove, _setjmp, ___sprintf, _sqrtf, _strcasecmp, _strcat, _strchr, _strcmp, _strcpy, _strcspn, _strlen, _strncat, _strncmp, _strncpy, _strpbrk, _strrchr, _strspn, _strstr, _strtok, _tolower, _toupper __bldiy := 0000FCh __bshl := 000100h __bshru := 000104h @@ -38,7 +38,6 @@ __ildiy := 00014Ch __imul_b := 000150h __indcall := 00015Ch __ineg := 000160h -__inot := 000164h __irems := 00016Ch __iremu := 000170h __ishl := 000174h @@ -59,7 +58,6 @@ __lldiy := 0001C4h __lmuls := 0001C8h __lmulu := 0001CCh __lneg := 0001D0h -__lnot := 0001D4h __lrems := 0001DCh __lremu := 0001E0h __lshl := 0001E4h @@ -76,7 +74,6 @@ __setflag := 000218h __sldix := 00021Ch __sldiy := 000220h __sneg := 00022Ch -__snot := 000230h __srems := 000238h __sremu := 00023Ch __sshl := 000240h diff --git a/src/std/shared/snot_fast.src b/src/std/shared/snot_fast.src new file mode 100644 index 000000000..8856b658b --- /dev/null +++ b/src/std/shared/snot_fast.src @@ -0,0 +1,4 @@ + public __snot_fast +__snot_fast := __inot_fast + + extern __inot_fast diff --git a/src/std/static/inot.src b/src/std/static/inot.src new file mode 100644 index 000000000..6577644d4 --- /dev/null +++ b/src/std/static/inot.src @@ -0,0 +1,24 @@ + public __inot +__inot: +if 1 + push de +end if + add hl, de ; uhl=UHL+UDE + ex de, hl ; ude=UHL+UDE, uhl=UDE + scf + sbc hl, de ; uhl=UDE-(UHL+UDE)-1 + ; =UDE-UHL-UDE-1 + ; =-UHL-1 + ; =~UHL +if 1 + pop de +else + ex de, hl ; ude=~UHL, uhl=UHL+UDE + add hl, de ; uhl=UHL+UDE+~UHL + ; =UHL+UDE+(-UHL-1) + ; =UHL+UDE-UHL-1 + ; =UDE-1 + inc hl ; uhl=UDE + ex de, hl ; ude=UDE, uhl=~UHL +end if + ret diff --git a/src/std/static/lnot.src b/src/std/static/lnot.src new file mode 100644 index 000000000..9cd9c0ef6 --- /dev/null +++ b/src/std/static/lnot.src @@ -0,0 +1,19 @@ + assume adl=1 + + public __lnot +__lnot: + push af + push de + ld a, e ; a=E + add hl, de ; uhl=UHL+UDE + ex de, hl ; ude=UHL+UDE, uhl=UDE + scf + sbc hl, de ; uhl=UDE-(UHL+UDE)-1 + ; =UDE-UHL-UDE-1 + ; =-UHL-1 + ; =~UHL + cpl ; a=~E + pop de + ld e, a ; e=~E + pop af + ret diff --git a/src/std/static/snot.src b/src/std/static/snot.src new file mode 100644 index 000000000..7aa8935a3 --- /dev/null +++ b/src/std/static/snot.src @@ -0,0 +1,4 @@ + public __snot +__snot := __inot + + extern __inot From 1d148f8ac098ea2c7b1fca95fe7c299051963d5a Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Sat, 5 Jun 2021 14:43:38 -0400 Subject: [PATCH 037/142] Implement __llnot --- src/std/shared/llnot.src | 29 +++++++++++++++++++++++++++++ src/std/shared/llnot_fast.src | 27 +++++++++++++++++++++++++++ 2 files changed, 56 insertions(+) create mode 100644 src/std/shared/llnot.src create mode 100644 src/std/shared/llnot_fast.src diff --git a/src/std/shared/llnot.src b/src/std/shared/llnot.src new file mode 100644 index 000000000..d90200ae4 --- /dev/null +++ b/src/std/shared/llnot.src @@ -0,0 +1,29 @@ + assume adl=1 + + public __llnot +__llnot: + push hl ; *spl=UHL + add hl, de ; uhl=UHL+UDE + ex de, hl ; ude=UHL+UDE, uhl=UDE + scf + sbc hl, de ; uhl=UDE-(UHL+UDE)-1 + ; =UDE-UHL-UDE-1 + ; =-UHL-1 + ; =~UHL + ex (sp), hl ; uhl=UHL, *spl=~UHL + scf + sbc hl, de ; uhl=UHL-(UHL+UDE)-1 + ; =UHL-UHL-UDE-1 + ; =-UDE-1 + ; =~UDE + ex de, hl ; ude=~UDE, uhl=UHL+UDE + sbc hl, hl ; uhl=-some_bool + ccf ; cf=!some_bool + sbc hl, bc ; uhl=-some_bool-UBC-!some_bool + ; =-UBC-(some_bool+!some_bool) + ; =-UBC-1 + ; =~UBC + ld c, l + ld b, h ; bc=~BC + pop hl ; uhl=~UHL + ret diff --git a/src/std/shared/llnot_fast.src b/src/std/shared/llnot_fast.src new file mode 100644 index 000000000..410411ce6 --- /dev/null +++ b/src/std/shared/llnot_fast.src @@ -0,0 +1,27 @@ + assume adl=1 + + public __llnot_fast +__llnot_fast: + push hl ; *spl=UHL + add hl, de ; uhl=UHL+UDE + ex de, hl ; ude=UHL+UDE, uhl=UDE + scf + sbc hl, de ; uhl=UDE-(UHL+UDE)-1 + ; =UDE-UHL-UDE-1 + ; =-UHL-1 + ; =~UHL + ex (sp), hl ; uhl=UHL, *spl=~UHL + scf + sbc hl, de ; uhl=UHL-(UHL+UDE)-1 + ; =UHL-UHL-UDE-1 + ; =-UDE-1 + ; =~UDE + ex de, hl ; ude=~UDE, uhl=UHL+UDE + pop hl ; uhl=~UHL + ld a, c ; a=C + cpl ; a=~C + ld c, a ; c=~C + ld a, b ; a=B + cpl ; a=~B + ld b, a ; bc=~BC + ret From a64bcf85d36075276ae90162dd676c1d58755be4 Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Sat, 5 Jun 2021 15:16:42 -0400 Subject: [PATCH 038/142] Tweak __inot and __lnot --- src/std/shared/lnot_fast.src | 12 ++++++------ src/std/static/inot.src | 4 ++-- src/std/static/lnot.src | 16 ++++++++-------- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/src/std/shared/lnot_fast.src b/src/std/shared/lnot_fast.src index 2e23cc56e..4f91b9508 100644 --- a/src/std/shared/lnot_fast.src +++ b/src/std/shared/lnot_fast.src @@ -3,13 +3,13 @@ public __lnot_fast __lnot_fast: ld a, e ; a=E - add hl, de ; uhl=UHL+UDE - ex de, hl ; ude=UHL+UDE, uhl=UDE + cpl ; a=~E + ld e, a ; e=~E + add hl, de ; uhl=UHL+x + ex de, hl ; ude=UHL+x, uhl=x scf - sbc hl, de ; uhl=UDE-(UHL+UDE)-1 - ; =UDE-UHL-UDE-1 + sbc hl, de ; uhl=x-(UHL+x)-1 + ; =x-UHL-x-1 ; =-UHL-1 ; =~UHL - cpl ; a=~E - ld e, a ; e=~E ret diff --git a/src/std/static/inot.src b/src/std/static/inot.src index 6577644d4..eb8b5dd4f 100644 --- a/src/std/static/inot.src +++ b/src/std/static/inot.src @@ -1,6 +1,6 @@ public __inot __inot: -if 1 +if 0 push de end if add hl, de ; uhl=UHL+UDE @@ -10,7 +10,7 @@ end if ; =UDE-UHL-UDE-1 ; =-UHL-1 ; =~UHL -if 1 +if 0 pop de else ex de, hl ; ude=~UHL, uhl=UHL+UDE diff --git a/src/std/static/lnot.src b/src/std/static/lnot.src index 9cd9c0ef6..91797f4bb 100644 --- a/src/std/static/lnot.src +++ b/src/std/static/lnot.src @@ -3,17 +3,17 @@ public __lnot __lnot: push af - push de ld a, e ; a=E - add hl, de ; uhl=UHL+UDE - ex de, hl ; ude=UHL+UDE, uhl=UDE + cpl ; a=~E + ld e, a ; e=~E + pop af + push de + add hl, de ; uhl=UHL+x + ex de, hl ; ude=UHL+x, uhl=x scf - sbc hl, de ; uhl=UDE-(UHL+UDE)-1 - ; =UDE-UHL-UDE-1 + sbc hl, de ; uhl=x-(UHL+x)-1 + ; =x-UHL-x-1 ; =-UHL-1 ; =~UHL - cpl ; a=~E pop de - ld e, a ; e=~E - pop af ret From 300a782da65df4b81834599ae11217c01314a43e Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Sat, 5 Jun 2021 16:13:08 -0400 Subject: [PATCH 039/142] Optimize __sneg, __ineg, and __lneg --- src/std/linked/linked.src | 6 ++++++ src/std/shared/ineg_fast.src | 11 +++++++++++ src/std/shared/lneg_fast.src | 14 ++++++++++++++ src/std/shared/shared.src | 5 +---- src/std/shared/sneg_fast.src | 4 ++++ src/std/static/ineg.src | 20 ++++++++++++++++++++ src/std/static/lneg.src | 18 ++++++++++++++++++ src/std/static/sneg.src | 4 ++++ 8 files changed, 78 insertions(+), 4 deletions(-) create mode 100644 src/std/shared/ineg_fast.src create mode 100644 src/std/shared/lneg_fast.src create mode 100644 src/std/shared/sneg_fast.src create mode 100644 src/std/static/ineg.src create mode 100644 src/std/static/lneg.src create mode 100644 src/std/static/sneg.src diff --git a/src/std/linked/linked.src b/src/std/linked/linked.src index a85ffc553..1fd838f91 100644 --- a/src/std/linked/linked.src +++ b/src/std/linked/linked.src @@ -62,6 +62,8 @@ __scmpzero := 000204h __smuls := 000224h public __smulu __smulu := 000228h + public __sneg +__sneg := 00022Ch public __snor __snot := 000230h public __sor @@ -78,6 +80,8 @@ __icmpzero := 000138h __imuls := 000154h public __imulu __imulu := 000158h + public __ineg +__ineg := 000160h public __inot __inot := 000164h public __ior @@ -100,6 +104,8 @@ __lcmpzero := 0001B0h __lmuls := 0001C8h public __lmulu __lmulu := 0001CCh + public __lneg +__lneg := 0001D0h public __lnot __lnot := 0001D4h public __lor diff --git a/src/std/shared/ineg_fast.src b/src/std/shared/ineg_fast.src new file mode 100644 index 000000000..8caaa34b1 --- /dev/null +++ b/src/std/shared/ineg_fast.src @@ -0,0 +1,11 @@ + assume adl=1 + + public __ineg_fast +__ineg_fast: + add hl, de ; uhl=UHL+UDE + ex de, hl ; ude=UHL+UDE, uhl=UDE + or a, a + sbc hl, de ; uhl=UDE-(UHL+UDE) + ; =UDE-UHL-UDE + ; =-UHL + ret diff --git a/src/std/shared/lneg_fast.src b/src/std/shared/lneg_fast.src new file mode 100644 index 000000000..95608e031 --- /dev/null +++ b/src/std/shared/lneg_fast.src @@ -0,0 +1,14 @@ + assume adl=1 + + public __lneg_fast +__lneg_fast: + xor a, a ; a=0 + sub a, e ; a=-E + ld e, a ; e=-E + add hl, de ; uhl=UHL+x + ex de, hl ; ude=UHL+x, uhl=x + or a, a + sbc hl, de ; uhl=x-(UHL+x) + ; =x-UHL-x + ; =-UHL + ret diff --git a/src/std/shared/shared.src b/src/std/shared/shared.src index aa49b5170..614fe0be7 100644 --- a/src/std/shared/shared.src +++ b/src/std/shared/shared.src @@ -1,4 +1,4 @@ - public __bldiy, __bshl, __bshru, __bstix, __bstiy, __case, __case16, __case16D, __case24, __case24D, __case8, __case8D, __fadd, __fcmp, __fdiv, __fmul, __fppack, __frbtof, __frftob, __frftoi, __frftos, __frftoub, __frftoui, __frftous, __fritof, __frstof, __frubtof, __fruitof, __frustof, __fsub, __ftol, __idivs, __idivu, __idvrmu, __ildix, __ildiy, __imul_b, __indcall, __ineg, __irems, __iremu, __ishl, __ishl_b, __ishrs, __ishrs_b, __ishru, __ishru_b, __istix, __istiy, __itol, __ladd_b, __ldivs, __ldivu, __ldvrmu, __lldix, __lldiy, __lneg, __lrems, __lremu, __lshl, __lshrs, __lshru, __lstix, __lstiy, __ltof, __sdivs, __sdivu, __seqcase, __seqcaseD, __setflag, __sldix, __sldiy, __sneg, __srems, __sremu, __sshl, __sshl_b, __sshrs, __sshrs_b, __sshru, __sshru_b, __sstix, __sstiy, __stoi, __stoiu, __ultof, _longjmp, _memchr, _memcmp, _memcpy, _memmove, _setjmp, ___sprintf, _sqrtf, _strcasecmp, _strcat, _strchr, _strcmp, _strcpy, _strcspn, _strlen, _strncat, _strncmp, _strncpy, _strpbrk, _strrchr, _strspn, _strstr, _strtok, _tolower, _toupper + public __bldiy, __bshl, __bshru, __bstix, __bstiy, __case, __case16, __case16D, __case24, __case24D, __case8, __case8D, __fadd, __fcmp, __fdiv, __fmul, __fppack, __frbtof, __frftob, __frftoi, __frftos, __frftoub, __frftoui, __frftous, __fritof, __frstof, __frubtof, __fruitof, __frustof, __fsub, __ftol, __idivs, __idivu, __idvrmu, __ildix, __ildiy, __imul_b, __indcall, __irems, __iremu, __ishl, __ishl_b, __ishrs, __ishrs_b, __ishru, __ishru_b, __istix, __istiy, __itol, __ladd_b, __ldivs, __ldivu, __ldvrmu, __lldix, __lldiy, __lrems, __lremu, __lshl, __lshrs, __lshru, __lstix, __lstiy, __ltof, __sdivs, __sdivu, __seqcase, __seqcaseD, __setflag, __sldix, __sldiy, __srems, __sremu, __sshl, __sshl_b, __sshrs, __sshrs_b, __sshru, __sshru_b, __sstix, __sstiy, __stoi, __stoiu, __ultof, _longjmp, _memchr, _memcmp, _memcpy, _memmove, _setjmp, ___sprintf, _sqrtf, _strcasecmp, _strcat, _strchr, _strcmp, _strcpy, _strcspn, _strlen, _strncat, _strncmp, _strncpy, _strpbrk, _strrchr, _strspn, _strstr, _strtok, _tolower, _toupper __bldiy := 0000FCh __bshl := 000100h __bshru := 000104h @@ -37,7 +37,6 @@ __ildix := 000148h __ildiy := 00014Ch __imul_b := 000150h __indcall := 00015Ch -__ineg := 000160h __irems := 00016Ch __iremu := 000170h __ishl := 000174h @@ -57,7 +56,6 @@ __lldix := 0001C0h __lldiy := 0001C4h __lmuls := 0001C8h __lmulu := 0001CCh -__lneg := 0001D0h __lrems := 0001DCh __lremu := 0001E0h __lshl := 0001E4h @@ -73,7 +71,6 @@ __seqcaseD := 000214h __setflag := 000218h __sldix := 00021Ch __sldiy := 000220h -__sneg := 00022Ch __srems := 000238h __sremu := 00023Ch __sshl := 000240h diff --git a/src/std/shared/sneg_fast.src b/src/std/shared/sneg_fast.src new file mode 100644 index 000000000..09f9d05d1 --- /dev/null +++ b/src/std/shared/sneg_fast.src @@ -0,0 +1,4 @@ + public __sneg_fast +__sneg_fast := __ineg_fast + + extern __ineg_fast diff --git a/src/std/static/ineg.src b/src/std/static/ineg.src new file mode 100644 index 000000000..2f78ba0be --- /dev/null +++ b/src/std/static/ineg.src @@ -0,0 +1,20 @@ + public __ineg +__ineg: +if 0 + push de +end if + add hl, de ; uhl=UHL+UDE + ex de, hl ; ude=UHL+UDE, uhl=UDE + or a, a + sbc hl, de ; uhl=UDE-(UHL+UDE) + ; =UDE-UHL-UDE + ; =-UHL +if 0 + pop de +else + ex de, hl ; ude=-UHL, uhl=UHL+UDE + add hl, de ; uhl=UHL+UDE+-UHL + ; =UDE + ex de, hl ; ude=UDE, uhl=-UHL +end if + ret diff --git a/src/std/static/lneg.src b/src/std/static/lneg.src new file mode 100644 index 000000000..946ce6b4b --- /dev/null +++ b/src/std/static/lneg.src @@ -0,0 +1,18 @@ + assume adl=1 + + public __lneg +__lneg: + push af + xor a, a ; a=0 + sub a, e ; a=-E + ld e, a ; e=-E + pop af + push de + add hl, de ; uhl=UHL+x + ex de, hl ; ude=UHL+x, uhl=x + or a, a + sbc hl, de ; uhl=x-(UHL+x) + ; =x-UHL-x + ; =-UHL + pop de + ret diff --git a/src/std/static/sneg.src b/src/std/static/sneg.src new file mode 100644 index 000000000..75dcd9d4e --- /dev/null +++ b/src/std/static/sneg.src @@ -0,0 +1,4 @@ + public __sneg +__sneg := __ineg + + extern __ineg From a3254dd8719554d42e8b5fecf949c7153d4a9e2d Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Sat, 5 Jun 2021 16:53:03 -0400 Subject: [PATCH 040/142] Fix __lneg --- src/std/shared/lneg_fast.src | 14 ++++++-------- src/std/static/lneg.src | 17 +++++++---------- 2 files changed, 13 insertions(+), 18 deletions(-) diff --git a/src/std/shared/lneg_fast.src b/src/std/shared/lneg_fast.src index 95608e031..a9dc7c315 100644 --- a/src/std/shared/lneg_fast.src +++ b/src/std/shared/lneg_fast.src @@ -2,13 +2,11 @@ public __lneg_fast __lneg_fast: + ld c, e ; c=E + ex de, hl ; ude=UHL, l=E xor a, a ; a=0 - sub a, e ; a=-E - ld e, a ; e=-E - add hl, de ; uhl=UHL+x - ex de, hl ; ude=UHL+x, uhl=x - or a, a - sbc hl, de ; uhl=x-(UHL+x) - ; =x-UHL-x - ; =-UHL + sbc hl, hl ; uhl=0 + sbc hl, de ; uhl=-UHL + sbc a, c + ld e, a ; euhl=-EUHL ret diff --git a/src/std/static/lneg.src b/src/std/static/lneg.src index 946ce6b4b..5d903ca3f 100644 --- a/src/std/static/lneg.src +++ b/src/std/static/lneg.src @@ -3,16 +3,13 @@ public __lneg __lneg: push af + push de + ex de, hl ; ude=UHL, l=E xor a, a ; a=0 - sub a, e ; a=-E - ld e, a ; e=-E + sbc hl, hl ; uhl=0 + sbc hl, de ; uhl=-UHL + pop de ; e=E + sbc a, e + ld e, a ; euhl=-EUHL pop af - push de - add hl, de ; uhl=UHL+x - ex de, hl ; ude=UHL+x, uhl=x - or a, a - sbc hl, de ; uhl=x-(UHL+x) - ; =x-UHL-x - ; =-UHL - pop de ret From 3541c345ddcfab9825951ae374f4001e52843159 Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Sat, 5 Jun 2021 17:17:27 -0400 Subject: [PATCH 041/142] Implement __llneg --- src/std/shared/llneg.src | 22 ++++++++++++++++++++++ src/std/shared/llneg_fast.src | 22 ++++++++++++++++++++++ 2 files changed, 44 insertions(+) create mode 100644 src/std/shared/llneg.src create mode 100644 src/std/shared/llneg_fast.src diff --git a/src/std/shared/llneg.src b/src/std/shared/llneg.src new file mode 100644 index 000000000..787977752 --- /dev/null +++ b/src/std/shared/llneg.src @@ -0,0 +1,22 @@ + assume adl=1 + + public __llneg +__llneg: + push de ; *spl=UDE + ex de, hl ; ude=UHL, uhl=UDE + or a, a + sbc hl, hl ; uhl=0 + sbc hl, de ; uhl=-UHL + ex (sp), hl ; uhl=UDE, *spl=-UHL + ex de, hl ; ude=UDE, uhl=UHL + sbc hl, hl ; uhl=-(UHL!=0) + or a, a + sbc hl, de ; uhl=-UDE-(UHL!=0) + ex de, hl ; ude=-UDE-(UHL!=0), uhl=UDE + sbc hl, hl ; uhl=-(UDEUHL!=0) + or a, a + sbc hl, bc ; uhl=-UBC-(UDEUHL!=0) + ld c, l + ld b, h ; bc=-BC-(UDEUHL!=0) + pop hl ; bcudeuhl=-BCUDEUHL + ret diff --git a/src/std/shared/llneg_fast.src b/src/std/shared/llneg_fast.src new file mode 100644 index 000000000..db08ac487 --- /dev/null +++ b/src/std/shared/llneg_fast.src @@ -0,0 +1,22 @@ + assume adl=1 + + public __llneg_fast +__llneg_fast: + push de ; *spl=UDE + ex de, hl ; ude=UHL, uhl=UDE + xor a, a ; a=0 + sbc hl, hl ; uhl=0 + sbc hl, de ; uhl=-UHL + ex (sp), hl ; uhl=UDE, *spl=-UHL + ex de, hl ; ude=UDE, uhl=UHL + sbc hl, hl ; uhl=-(UHL!=0) + xor a, a ; a=0 + sbc hl, de ; uhl=-UDE-(UHL!=0) + ex de, hl ; ude=-UDE-(UHL!=0), uhl=UDE + sbc a, c ; a=-C-(UDEUHL!=0) + ld c, a ; c=-C-(UDEUHL!=0) + ld a, 0 ; a=0 + sbc a, b ; a=-B-(CUDEUHL!=0) + ld b, a ; b=-B-(CUDEUHL!=0) + pop hl ; bcudeuhl=-BCUDEUHL + ret From 43616cb71c41f3894304f9e21da8835c7ea6d191 Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Sat, 5 Jun 2021 17:27:59 -0400 Subject: [PATCH 042/142] Implement suboptimal __llshl --- src/std/shared/llshl.src | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 src/std/shared/llshl.src diff --git a/src/std/shared/llshl.src b/src/std/shared/llshl.src new file mode 100644 index 000000000..a640a82b6 --- /dev/null +++ b/src/std/shared/llshl.src @@ -0,0 +1,23 @@ + public __llshl +__llshl: +; Not well-optimized + push af + push iy + ld iy, 0 + add iy, sp + ld a, (iy + 9) + or a, a + jr z, .epilogue +.loop: + add hl, hl + ex de, hl + adc hl, hl + ex de, hl + rl c + rl b + dec a + jr nz, .loop +.epilogue: + pop iy + pop af + ret From 892d3c292f17bceb07b8f9751e8d64d9d51efc8f Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Sat, 5 Jun 2021 17:44:51 -0400 Subject: [PATCH 043/142] Slightly optimize __llshl --- src/std/shared/llshl.src | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/src/std/shared/llshl.src b/src/std/shared/llshl.src index a640a82b6..bcf08e98f 100644 --- a/src/std/shared/llshl.src +++ b/src/std/shared/llshl.src @@ -1,23 +1,25 @@ public __llshl __llshl: -; Not well-optimized +; Suboptimal push af push iy ld iy, 0 add iy, sp - ld a, (iy + 9) - or a, a - jr z, .epilogue + ld a, b + ld b, (iy + 9) + inc b + dec b + jr z, .finish .loop: add hl, hl ex de, hl adc hl, hl ex de, hl rl c - rl b - dec a - jr nz, .loop -.epilogue: + rla + djnz .loop +.finish: + ld b, a pop iy pop af ret From c9f518f1a607f2073fe5051a86c26829bbd5518f Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Sat, 5 Jun 2021 17:49:42 -0400 Subject: [PATCH 044/142] Fix and slightly optimize __llshrs --- src/std/shared/llshrs.src | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/std/shared/llshrs.src b/src/std/shared/llshrs.src index 42b2fee83..17d14226f 100644 --- a/src/std/shared/llshrs.src +++ b/src/std/shared/llshrs.src @@ -1,13 +1,13 @@ public __llshrs __llshrs: -; Not well-optimized, but it probably works +; Suboptimal + push af push iy ld iy, 0 add iy, sp - push af ld a, c ld c, b - ld b, (iy + 6) + ld b, (iy + 9) inc b dec b jr z, .finish @@ -23,9 +23,10 @@ __llshrs: rr h rr l djnz .loop + ld sp, iy +.finish: ld b, c ld c, a -.finish: - pop af pop iy + pop af ret From ce4b81efa6df1ca229a2456fd21aca86a1f53aeb Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Sat, 5 Jun 2021 17:59:34 -0400 Subject: [PATCH 045/142] Implement suboptimal __llshru --- src/std/shared/llshru.src | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 src/std/shared/llshru.src diff --git a/src/std/shared/llshru.src b/src/std/shared/llshru.src new file mode 100644 index 000000000..a8369db78 --- /dev/null +++ b/src/std/shared/llshru.src @@ -0,0 +1,32 @@ + public __llshru +__llshru: +; Suboptimal + push af + push iy + ld iy, 0 + add iy, sp + ld a, c + ld c, b + ld b, (iy + 9) + inc b + dec b + jr z, .finish + push de + push hl +.loop: + srl c + rra + rr (iy - 1) + rr d + rr e + rr (iy - 4) + rr h + rr l + djnz .loop + ld sp, iy +.finish: + ld b, c + ld c, a + pop iy + pop af + ret From d962415ef6c25cbc1f6385daeb5a4ed2ffd34045 Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Sun, 6 Jun 2021 14:59:27 -0400 Subject: [PATCH 046/142] Provide (slightly) optimized versions of __bshl, __sshl, __ishl, and __lshl --- src/std/linked/linked.src | 10 ++++++++++ src/std/shared/llshl.src | 2 +- src/std/shared/shared.src | 6 +----- src/std/static/bshl.src | 13 +++++++++++++ src/std/static/ishl.src | 15 +++++++++++++++ src/std/static/lshl.src | 18 ++++++++++++++++++ src/std/static/sshl.src | 4 ++++ 7 files changed, 62 insertions(+), 6 deletions(-) create mode 100644 src/std/static/bshl.src create mode 100644 src/std/static/ishl.src create mode 100644 src/std/static/lshl.src create mode 100644 src/std/static/sshl.src diff --git a/src/std/linked/linked.src b/src/std/linked/linked.src index 1fd838f91..d48fd320b 100644 --- a/src/std/linked/linked.src +++ b/src/std/linked/linked.src @@ -53,6 +53,10 @@ _strtod := 0220E0h _tan := 022120h _tanh := 022138h +; byte (8-bit) math + public __bshl +__bshl := 000100h + ; short (16-bit) math public __sand __sand := 000200h @@ -68,6 +72,8 @@ __sneg := 00022Ch __snot := 000230h public __sor __sor := 000234h + public __sshl +__sshl := 000240h public __sxor __sxor := 000268h @@ -86,6 +92,8 @@ __ineg := 000160h __inot := 000164h public __ior __ior := 000168h + public __ishl +__ishl := 000174h public __ixor __ixor := 000198h @@ -110,6 +118,8 @@ __lneg := 0001D0h __lnot := 0001D4h public __lor __lor := 0001D8h + public __lshl +__lshl := 0001E4h public __lsub __lsub := 0001F8h public __lxor diff --git a/src/std/shared/llshl.src b/src/std/shared/llshl.src index bcf08e98f..2940aea4b 100644 --- a/src/std/shared/llshl.src +++ b/src/std/shared/llshl.src @@ -1,6 +1,6 @@ public __llshl __llshl: -; Suboptimal +; Suboptimal for large shift amounts push af push iy ld iy, 0 diff --git a/src/std/shared/shared.src b/src/std/shared/shared.src index 614fe0be7..2cc010e2e 100644 --- a/src/std/shared/shared.src +++ b/src/std/shared/shared.src @@ -1,6 +1,5 @@ - public __bldiy, __bshl, __bshru, __bstix, __bstiy, __case, __case16, __case16D, __case24, __case24D, __case8, __case8D, __fadd, __fcmp, __fdiv, __fmul, __fppack, __frbtof, __frftob, __frftoi, __frftos, __frftoub, __frftoui, __frftous, __fritof, __frstof, __frubtof, __fruitof, __frustof, __fsub, __ftol, __idivs, __idivu, __idvrmu, __ildix, __ildiy, __imul_b, __indcall, __irems, __iremu, __ishl, __ishl_b, __ishrs, __ishrs_b, __ishru, __ishru_b, __istix, __istiy, __itol, __ladd_b, __ldivs, __ldivu, __ldvrmu, __lldix, __lldiy, __lrems, __lremu, __lshl, __lshrs, __lshru, __lstix, __lstiy, __ltof, __sdivs, __sdivu, __seqcase, __seqcaseD, __setflag, __sldix, __sldiy, __srems, __sremu, __sshl, __sshl_b, __sshrs, __sshrs_b, __sshru, __sshru_b, __sstix, __sstiy, __stoi, __stoiu, __ultof, _longjmp, _memchr, _memcmp, _memcpy, _memmove, _setjmp, ___sprintf, _sqrtf, _strcasecmp, _strcat, _strchr, _strcmp, _strcpy, _strcspn, _strlen, _strncat, _strncmp, _strncpy, _strpbrk, _strrchr, _strspn, _strstr, _strtok, _tolower, _toupper + public __bldiy, __bshru, __bstix, __bstiy, __case, __case16, __case16D, __case24, __case24D, __case8, __case8D, __fadd, __fcmp, __fdiv, __fmul, __fppack, __frbtof, __frftob, __frftoi, __frftos, __frftoub, __frftoui, __frftous, __fritof, __frstof, __frubtof, __fruitof, __frustof, __fsub, __ftol, __idivs, __idivu, __idvrmu, __ildix, __ildiy, __imul_b, __indcall, __irems, __iremu, __ishl_b, __ishrs, __ishrs_b, __ishru, __ishru_b, __istix, __istiy, __itol, __ladd_b, __ldivs, __ldivu, __ldvrmu, __lldix, __lldiy, __lrems, __lremu, __lshrs, __lshru, __lstix, __lstiy, __ltof, __sdivs, __sdivu, __seqcase, __seqcaseD, __setflag, __sldix, __sldiy, __srems, __sremu, __sshl_b, __sshrs, __sshrs_b, __sshru, __sshru_b, __sstix, __sstiy, __stoi, __stoiu, __ultof, _longjmp, _memchr, _memcmp, _memcpy, _memmove, _setjmp, ___sprintf, _sqrtf, _strcasecmp, _strcat, _strchr, _strcmp, _strcpy, _strcspn, _strlen, _strncat, _strncmp, _strncpy, _strpbrk, _strrchr, _strspn, _strstr, _strtok, _tolower, _toupper __bldiy := 0000FCh -__bshl := 000100h __bshru := 000104h __bstix := 00010Ch __bstiy := 000108h @@ -39,7 +38,6 @@ __imul_b := 000150h __indcall := 00015Ch __irems := 00016Ch __iremu := 000170h -__ishl := 000174h __ishl_b := 000178h __ishrs := 00017Ch __ishrs_b := 000180h @@ -58,7 +56,6 @@ __lmuls := 0001C8h __lmulu := 0001CCh __lrems := 0001DCh __lremu := 0001E0h -__lshl := 0001E4h __lshrs := 0001E8h __lshru := 0001ECh __lstix := 0001F0h @@ -73,7 +70,6 @@ __sldix := 00021Ch __sldiy := 000220h __srems := 000238h __sremu := 00023Ch -__sshl := 000240h __sshl_b := 000244h __sshrs := 000248h __sshrs_b := 00024Ch diff --git a/src/std/static/bshl.src b/src/std/static/bshl.src new file mode 100644 index 000000000..7bae8e816 --- /dev/null +++ b/src/std/static/bshl.src @@ -0,0 +1,13 @@ + public __bshl +__bshl: +; CC: if B!=0: B*(4*r(PC)+1)+6*r(PC)+(ADL?6*r(SPL)+3*w(SPL):4*r(SPS)+2*w(SPS))+1 +; if B==0: 4*r(PC)+(ADL?3*r(SPL):2*r(SPS))+2 + inc b + dec b + ret z + push bc +.loop: + add a, a + djnz .loop + pop bc + ret diff --git a/src/std/static/ishl.src b/src/std/static/ishl.src new file mode 100644 index 000000000..91822d8a1 --- /dev/null +++ b/src/std/static/ishl.src @@ -0,0 +1,15 @@ + public __ishl +__ishl: +; Suboptimal for large shift amounts +; CC: if C!=0: C*(4*r(PC)+1)+7*r(PC)+(ADL?6*r(SPL)+3*w(SPL):4*r(SPS)+2*w(SPS))+1 +; if C==0: 4*r(PC)+(ADL?3*r(SPL):2*r(SPS))+2 + inc c + dec c + ret z + push bc + ld b, c +.loop: + add hl, hl + djnz .loop + pop bc + ret diff --git a/src/std/static/lshl.src b/src/std/static/lshl.src new file mode 100644 index 000000000..c17767998 --- /dev/null +++ b/src/std/static/lshl.src @@ -0,0 +1,18 @@ + public __lshl +__lshl: +; Suboptimal for large shift amounts +; CC: if L!=0: L*(5*r(PC)+1)+9*r(PC)+(ADL?12*r(SPL)+9*w(SPL):8*r(SPS)+6*w(SPS))+1 +; if L==0: 4*r(PC)+(ADL?3*r(SPL):2*r(SPS))+2 + inc l + dec l + ret z + push bc + ld b, l + ex (sp), hl +.loop: + add hl, hl + rla + djnz .loop + ex (sp), hl + pop bc + ret diff --git a/src/std/static/sshl.src b/src/std/static/sshl.src new file mode 100644 index 000000000..7ae40984f --- /dev/null +++ b/src/std/static/sshl.src @@ -0,0 +1,4 @@ + public __sshl +__sshl := __ishl + + extern __ishl From 5db7e5052b292584850ef1702009ddb53b143a11 Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Sun, 6 Jun 2021 18:50:06 -0400 Subject: [PATCH 047/142] Fix and slightly optimize __llshrs and __llshru --- src/std/shared/llshrs.src | 32 ++++++++++++++++---------------- src/std/shared/llshru.src | 32 +++++++++----------------------- 2 files changed, 25 insertions(+), 39 deletions(-) diff --git a/src/std/shared/llshrs.src b/src/std/shared/llshrs.src index 17d14226f..bc7cea7a0 100644 --- a/src/std/shared/llshrs.src +++ b/src/std/shared/llshrs.src @@ -1,32 +1,32 @@ public __llshrs __llshrs: -; Suboptimal +; Suboptimal for large shift amounts push af push iy ld iy, 0 add iy, sp - ld a, c - ld c, b - ld b, (iy + 9) - inc b - dec b + ld a, (iy + 9) + or a, a jr z, .finish push de push hl .loop: - sra c - rra + sra b + public __llshrs.hijack_llshru +.hijack_llshru: + rr c rr (iy - 1) - rr d - rr e + rr (iy - 2) + rr (iy - 3) rr (iy - 4) - rr h - rr l - djnz .loop - ld sp, iy + rr (iy - 5) + rr (iy - 6) + dec a + jr nz, .loop + pop hl + pop de + public __llshrs.finish .finish: - ld b, c - ld c, a pop iy pop af ret diff --git a/src/std/shared/llshru.src b/src/std/shared/llshru.src index a8369db78..15fdb323b 100644 --- a/src/std/shared/llshru.src +++ b/src/std/shared/llshru.src @@ -1,32 +1,18 @@ public __llshru __llshru: -; Suboptimal +; Suboptimal for large shift amounts push af push iy ld iy, 0 add iy, sp - ld a, c - ld c, b - ld b, (iy + 9) - inc b - dec b - jr z, .finish + ld a, (iy + 9) + or a, a + jp z, __llshrs.finish push de push hl .loop: - srl c - rra - rr (iy - 1) - rr d - rr e - rr (iy - 4) - rr h - rr l - djnz .loop - ld sp, iy -.finish: - ld b, c - ld c, a - pop iy - pop af - ret + srl b + jp __llshrs.hijack_llshru + + extern __llshrs.finish + extern __llshrs.hijack_llshru From 1580ad132665156b83ca9210821e407f92293b95 Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Sun, 6 Jun 2021 18:51:16 -0400 Subject: [PATCH 048/142] Provide slightly optimized versions of __bshrs, __bshru, __sshrs, __sshru, __ishrs, __ishru, __lshrs, and __lshru --- src/std/linked/linked.src | 14 ++++++++++++++ src/std/shared/bshrs.src | 36 ++++++++++-------------------------- src/std/shared/shared.src | 9 +-------- src/std/static/bshru.src | 13 +++++++++++++ src/std/static/ishrs.src | 30 ++++++++++++++++++++++++++++++ src/std/static/ishru.src | 23 +++++++++++++++++++++++ src/std/static/lshrs.src | 31 +++++++++++++++++++++++++++++++ src/std/static/lshru.src | 21 +++++++++++++++++++++ src/std/static/sshrs.src | 16 ++++++++++++++++ src/std/static/sshru.src | 16 ++++++++++++++++ 10 files changed, 175 insertions(+), 34 deletions(-) create mode 100644 src/std/static/bshru.src create mode 100644 src/std/static/ishrs.src create mode 100644 src/std/static/ishru.src create mode 100644 src/std/static/lshrs.src create mode 100644 src/std/static/lshru.src create mode 100644 src/std/static/sshrs.src create mode 100644 src/std/static/sshru.src diff --git a/src/std/linked/linked.src b/src/std/linked/linked.src index d48fd320b..23a5fd448 100644 --- a/src/std/linked/linked.src +++ b/src/std/linked/linked.src @@ -56,6 +56,8 @@ _tanh := 022138h ; byte (8-bit) math public __bshl __bshl := 000100h + public __bshru +__bshru := 000104h ; short (16-bit) math public __sand @@ -74,6 +76,10 @@ __snot := 000230h __sor := 000234h public __sshl __sshl := 000240h + public __sshrs +__sshrs := 000248h + public __sshru +__sshru := 000250h public __sxor __sxor := 000268h @@ -94,6 +100,10 @@ __inot := 000164h __ior := 000168h public __ishl __ishl := 000174h + public __ishrs +__ishrs := 00017Ch + public __ishru +__ishru := 000184h public __ixor __ixor := 000198h @@ -120,6 +130,10 @@ __lnot := 0001D4h __lor := 0001D8h public __lshl __lshl := 0001E4h + public __lshrs +__lshrs := 0001E8h + public __lshru +__lshru := 0001ECh public __lsub __lsub := 0001F8h public __lxor diff --git a/src/std/shared/bshrs.src b/src/std/shared/bshrs.src index c49c763d5..25bf0bf13 100644 --- a/src/std/shared/bshrs.src +++ b/src/std/shared/bshrs.src @@ -1,29 +1,13 @@ -; (c) Copyright 2007-2008 Zilog, Inc. -;------------------------------------------------------------------------- -; Byte Shift Right Signed -; Input: -; Operand1: -; A : 8 bit number to shift -; Operand2: -; B : 8 bit shift count -; -; Output: -; Result: A : 8 bit -; Registers Used: -; flags -;------------------------------------------------------------------------- - assume adl=1 - - public __bshrs + public __bshrs __bshrs: - push bc - inc b - jr test - -loop: - sra a -test: - djnz loop - +; CC: if B!=0: B*(5*r(PC)+1)+6*r(PC)+(ADL?6*r(SPL)+3*w(SPL):4*r(SPS)+2*w(SPS))+1 +; if B==0: 4*r(PC)+(ADL?3*r(SPL):2*r(SPS))+2 + inc b + dec b + ret z + push bc +.loop: + sra a + djnz .loop pop bc ret diff --git a/src/std/shared/shared.src b/src/std/shared/shared.src index 2cc010e2e..bc9f36e2f 100644 --- a/src/std/shared/shared.src +++ b/src/std/shared/shared.src @@ -1,6 +1,5 @@ - public __bldiy, __bshru, __bstix, __bstiy, __case, __case16, __case16D, __case24, __case24D, __case8, __case8D, __fadd, __fcmp, __fdiv, __fmul, __fppack, __frbtof, __frftob, __frftoi, __frftos, __frftoub, __frftoui, __frftous, __fritof, __frstof, __frubtof, __fruitof, __frustof, __fsub, __ftol, __idivs, __idivu, __idvrmu, __ildix, __ildiy, __imul_b, __indcall, __irems, __iremu, __ishl_b, __ishrs, __ishrs_b, __ishru, __ishru_b, __istix, __istiy, __itol, __ladd_b, __ldivs, __ldivu, __ldvrmu, __lldix, __lldiy, __lrems, __lremu, __lshrs, __lshru, __lstix, __lstiy, __ltof, __sdivs, __sdivu, __seqcase, __seqcaseD, __setflag, __sldix, __sldiy, __srems, __sremu, __sshl_b, __sshrs, __sshrs_b, __sshru, __sshru_b, __sstix, __sstiy, __stoi, __stoiu, __ultof, _longjmp, _memchr, _memcmp, _memcpy, _memmove, _setjmp, ___sprintf, _sqrtf, _strcasecmp, _strcat, _strchr, _strcmp, _strcpy, _strcspn, _strlen, _strncat, _strncmp, _strncpy, _strpbrk, _strrchr, _strspn, _strstr, _strtok, _tolower, _toupper + public __bldiy, __bstix, __bstiy, __case, __case16, __case16D, __case24, __case24D, __case8, __case8D, __fadd, __fcmp, __fdiv, __fmul, __fppack, __frbtof, __frftob, __frftoi, __frftos, __frftoub, __frftoui, __frftous, __fritof, __frstof, __frubtof, __fruitof, __frustof, __fsub, __ftol, __idivs, __idivu, __idvrmu, __ildix, __ildiy, __imul_b, __indcall, __irems, __iremu, __ishl_b, __ishrs_b, __ishru_b, __istix, __istiy, __itol, __ladd_b, __ldivs, __ldivu, __ldvrmu, __lldix, __lldiy, __lrems, __lremu, __lstix, __lstiy, __ltof, __sdivs, __sdivu, __seqcase, __seqcaseD, __setflag, __sldix, __sldiy, __srems, __sremu, __sshl_b, __sshrs_b, __sshru_b, __sstix, __sstiy, __stoi, __stoiu, __ultof, _longjmp, _memchr, _memcmp, _memcpy, _memmove, _setjmp, ___sprintf, _sqrtf, _strcasecmp, _strcat, _strchr, _strcmp, _strcpy, _strcspn, _strlen, _strncat, _strncmp, _strncpy, _strpbrk, _strrchr, _strspn, _strstr, _strtok, _tolower, _toupper __bldiy := 0000FCh -__bshru := 000104h __bstix := 00010Ch __bstiy := 000108h __case := 000110h @@ -39,9 +38,7 @@ __indcall := 00015Ch __irems := 00016Ch __iremu := 000170h __ishl_b := 000178h -__ishrs := 00017Ch __ishrs_b := 000180h -__ishru := 000184h __ishru_b := 000188h __istix := 00018Ch __istiy := 000190h @@ -56,8 +53,6 @@ __lmuls := 0001C8h __lmulu := 0001CCh __lrems := 0001DCh __lremu := 0001E0h -__lshrs := 0001E8h -__lshru := 0001ECh __lstix := 0001F0h __lstiy := 0001F4h __ltof := 000284h @@ -71,9 +66,7 @@ __sldiy := 000220h __srems := 000238h __sremu := 00023Ch __sshl_b := 000244h -__sshrs := 000248h __sshrs_b := 00024Ch -__sshru := 000250h __sshru_b := 000254h __sstix := 000258h __sstiy := 00025Ch diff --git a/src/std/static/bshru.src b/src/std/static/bshru.src new file mode 100644 index 000000000..817d5ddcc --- /dev/null +++ b/src/std/static/bshru.src @@ -0,0 +1,13 @@ + public __bshru +__bshru: +; CC: if B!=0: B*(5*r(PC)+1)+6*r(PC)+(ADL?6*r(SPL)+3*w(SPL):4*r(SPS)+2*w(SPS))+1 +; if B==0: 4*r(PC)+(ADL?3*r(SPL):2*r(SPS))+2 + inc b + dec b + ret z + push bc +.loop: + srl a + djnz .loop + pop bc + ret diff --git a/src/std/static/ishrs.src b/src/std/static/ishrs.src new file mode 100644 index 000000000..a242e0a26 --- /dev/null +++ b/src/std/static/ishrs.src @@ -0,0 +1,30 @@ + assume adl=1 + + public __ishrs +__ishrs: +; Suboptimal for large shift amounts +; CC: if C!=0: C*(13*r(PC)+3*r(SPL)+3*w(SPL)+4)+13*r(PC)+9*r(SPL)+6*w(SPL)+1 +; if C==0: 4*r(PC)+3*r(SPL)+2 + inc c + dec c + ret z + push bc + push hl + ld b, c + or a, a + sbc hl, hl + add hl, sp +.loop: + inc hl + inc hl + sra (hl) + public __ishrs.hijack_ishru +.hijack_ishru: + dec hl + rr (hl) + dec hl + rr (hl) + djnz .loop + pop hl + pop bc + ret diff --git a/src/std/static/ishru.src b/src/std/static/ishru.src new file mode 100644 index 000000000..e0b1c8d14 --- /dev/null +++ b/src/std/static/ishru.src @@ -0,0 +1,23 @@ + assume adl=1 + + public __ishru +__ishru: +; Suboptimal for large shift amounts +; CC: if C!=0: C*(13*r(PC)+3*r(SPL)+3*w(SPL)+4)+17*r(PC)+9*r(SPL)+6*w(SPL)+2 +; if C==0: 4*r(PC)+3*r(SPL)+2 + inc c + dec c + ret z + push bc + push hl + ld b, c + or a, a + sbc hl, hl + add hl, sp +.loop: + inc hl + inc hl + srl (hl) + jp __ishrs.hijack_ishru + + extern __ishrs.hijack_ishru diff --git a/src/std/static/lshrs.src b/src/std/static/lshrs.src new file mode 100644 index 000000000..4e2b56819 --- /dev/null +++ b/src/std/static/lshrs.src @@ -0,0 +1,31 @@ + assume adl=1 + + public __lshrs +__lshrs: +; Suboptimal for large shift amounts +; CC: if C!=0: C*(15*r(PC)+3*r(SPL)+3*w(SPL)+4)+13*r(PC)+9*r(SPL)+6*w(SPL)+1 +; if C==0: 4*r(PC)+3*r(SPL)+2 + inc l + dec l + ret z + push hl + push bc + ld b, l + or a, a + sbc hl, hl + add hl, sp +.loop: + sra a + public __lshrs.hijack_lshru +.hijack_lshru: + inc hl + inc hl + rr (hl) + dec hl + rr (hl) + dec hl + rr (hl) + djnz .loop + pop bc + pop hl + ret diff --git a/src/std/static/lshru.src b/src/std/static/lshru.src new file mode 100644 index 000000000..39c3826df --- /dev/null +++ b/src/std/static/lshru.src @@ -0,0 +1,21 @@ + assume adl=1 + + public __lshru +__lshru: +; Suboptimal for large shift amounts +; CC: if C!=0: C*(15*r(PC)+3*r(SPL)+3*w(SPL)+4)+17*r(PC)+9*r(SPL)+6*w(SPL)+2 +; if C==0: 4*r(PC)+3*r(SPL)+2 + inc l + dec l + ret z + push hl + push bc + ld b, l + or a, a + sbc hl, hl + add hl, sp +.loop: + srl a + jp __lshrs.hijack_lshru + + extern __lshrs.hijack_lshru diff --git a/src/std/static/sshrs.src b/src/std/static/sshrs.src new file mode 100644 index 000000000..d52a67216 --- /dev/null +++ b/src/std/static/sshrs.src @@ -0,0 +1,16 @@ + public __sshru +__sshru: +; Suboptimal for large shift amounts +; CC: if C!=0: C*(7*r(PC)+1)+7*r(PC)+(ADL?6*r(SPL)+3*w(SPL):4*r(SPS)+2*w(SPS))+1 +; if C==0: 4*r(PC)+(ADL?3*r(SPL):2*r(SPS))+2 + inc c + dec c + ret z + push bc + ld b, c +.loop: + sra h + rr l + djnz .loop + pop bc + ret diff --git a/src/std/static/sshru.src b/src/std/static/sshru.src new file mode 100644 index 000000000..a27e5440d --- /dev/null +++ b/src/std/static/sshru.src @@ -0,0 +1,16 @@ + public __sshru +__sshru: +; Suboptimal for large shift amounts +; CC: if C!=0: C*(7*r(PC)+1)+7*r(PC)+(ADL?6*r(SPL)+3*w(SPL):4*r(SPS)+2*w(SPS))+1 +; if C==0: 4*r(PC)+(ADL?3*r(SPL):2*r(SPS))+2 + inc c + dec c + ret z + push bc + ld b, c +.loop: + srl h + rr l + djnz .loop + pop bc + ret From 1f79f762152268171c82c82202536f50d0c8e431 Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Sun, 6 Jun 2021 22:56:27 -0400 Subject: [PATCH 049/142] Provide optimized versions of _tolower and _toupper --- src/std/linked/linked.src | 6 ++++++ src/std/shared/shared.src | 4 +--- src/std/static/tolower.src | 12 ++++++++++++ src/std/static/toupper.src | 12 ++++++++++++ 4 files changed, 31 insertions(+), 3 deletions(-) create mode 100644 src/std/static/tolower.src create mode 100644 src/std/static/toupper.src diff --git a/src/std/linked/linked.src b/src/std/linked/linked.src index 23a5fd448..afd54ef5a 100644 --- a/src/std/linked/linked.src +++ b/src/std/linked/linked.src @@ -138,3 +138,9 @@ __lshru := 0001ECh __lsub := 0001F8h public __lxor __lxor := 0001FCh + +; C standard library + public _tolower +_tolower := 021E34h + public _toupper +_toupper := 021E38h diff --git a/src/std/shared/shared.src b/src/std/shared/shared.src index bc9f36e2f..707357797 100644 --- a/src/std/shared/shared.src +++ b/src/std/shared/shared.src @@ -1,4 +1,4 @@ - public __bldiy, __bstix, __bstiy, __case, __case16, __case16D, __case24, __case24D, __case8, __case8D, __fadd, __fcmp, __fdiv, __fmul, __fppack, __frbtof, __frftob, __frftoi, __frftos, __frftoub, __frftoui, __frftous, __fritof, __frstof, __frubtof, __fruitof, __frustof, __fsub, __ftol, __idivs, __idivu, __idvrmu, __ildix, __ildiy, __imul_b, __indcall, __irems, __iremu, __ishl_b, __ishrs_b, __ishru_b, __istix, __istiy, __itol, __ladd_b, __ldivs, __ldivu, __ldvrmu, __lldix, __lldiy, __lrems, __lremu, __lstix, __lstiy, __ltof, __sdivs, __sdivu, __seqcase, __seqcaseD, __setflag, __sldix, __sldiy, __srems, __sremu, __sshl_b, __sshrs_b, __sshru_b, __sstix, __sstiy, __stoi, __stoiu, __ultof, _longjmp, _memchr, _memcmp, _memcpy, _memmove, _setjmp, ___sprintf, _sqrtf, _strcasecmp, _strcat, _strchr, _strcmp, _strcpy, _strcspn, _strlen, _strncat, _strncmp, _strncpy, _strpbrk, _strrchr, _strspn, _strstr, _strtok, _tolower, _toupper + public __bldiy, __bstix, __bstiy, __case, __case16, __case16D, __case24, __case24D, __case8, __case8D, __fadd, __fcmp, __fdiv, __fmul, __fppack, __frbtof, __frftob, __frftoi, __frftos, __frftoub, __frftoui, __frftous, __fritof, __frstof, __frubtof, __fruitof, __frustof, __fsub, __ftol, __idivs, __idivu, __idvrmu, __ildix, __ildiy, __imul_b, __indcall, __irems, __iremu, __ishl_b, __ishrs_b, __ishru_b, __istix, __istiy, __itol, __ladd_b, __ldivs, __ldivu, __ldvrmu, __lldix, __lldiy, __lrems, __lremu, __lstix, __lstiy, __ltof, __sdivs, __sdivu, __seqcase, __seqcaseD, __setflag, __sldix, __sldiy, __srems, __sremu, __sshl_b, __sshrs_b, __sshru_b, __sstix, __sstiy, __stoi, __stoiu, __ultof, _longjmp, _memchr, _memcmp, _memcpy, _memmove, _setjmp, ___sprintf, _sqrtf, _strcasecmp, _strcat, _strchr, _strcmp, _strcpy, _strcspn, _strlen, _strncat, _strncmp, _strncpy, _strpbrk, _strrchr, _strspn, _strstr, _strtok __bldiy := 0000FCh __bstix := 00010Ch __bstiy := 000108h @@ -96,5 +96,3 @@ _strrchr := 0000E8h _strspn := 0000ECh _strstr := 0000F0h _strtok := 0000F4h -_tolower := 021E34h -_toupper := 021E38h diff --git a/src/std/static/tolower.src b/src/std/static/tolower.src new file mode 100644 index 000000000..4bc7aea4f --- /dev/null +++ b/src/std/static/tolower.src @@ -0,0 +1,12 @@ + public _tolower +_tolower: + push af + ld a, l + sub a, 'A' + cp a, 1+'Z'-'A' + jr nc, .not_A_Z + add a, 'a' + ld l, a +.not_A_Z: + pop af + ret diff --git a/src/std/static/toupper.src b/src/std/static/toupper.src new file mode 100644 index 000000000..98c83f44e --- /dev/null +++ b/src/std/static/toupper.src @@ -0,0 +1,12 @@ + public _toupper +_toupper: + push af + ld a, l + sub a, 'a' + cp a, 1+'z'-'a' + jr nc, .not_a_z + add a, 'A' + ld l, a +.not_a_z: + pop af + ret From f3e66a6dc423bf7246d2f8e5832d98a0f1555992 Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Sun, 6 Jun 2021 23:06:10 -0400 Subject: [PATCH 050/142] Remove some leftover, unused equates --- src/std/shared/shared.src | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/std/shared/shared.src b/src/std/shared/shared.src index 707357797..bcca7199a 100644 --- a/src/std/shared/shared.src +++ b/src/std/shared/shared.src @@ -49,8 +49,6 @@ __ldivu := 0001B8h __ldvrmu := 0001BCh __lldix := 0001C0h __lldiy := 0001C4h -__lmuls := 0001C8h -__lmulu := 0001CCh __lrems := 0001DCh __lremu := 0001E0h __lstix := 0001F0h From 5f33720f21947c915a24a4e56fb10387b6a739b2 Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Mon, 7 Jun 2021 03:57:36 -0400 Subject: [PATCH 051/142] Fix copy-pate oversight --- src/std/static/sshrs.src | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/std/static/sshrs.src b/src/std/static/sshrs.src index d52a67216..8f8696845 100644 --- a/src/std/static/sshrs.src +++ b/src/std/static/sshrs.src @@ -1,5 +1,5 @@ - public __sshru -__sshru: + public __sshrs +__sshrs: ; Suboptimal for large shift amounts ; CC: if C!=0: C*(7*r(PC)+1)+7*r(PC)+(ADL?6*r(SPL)+3*w(SPL):4*r(SPS)+2*w(SPS))+1 ; if C==0: 4*r(PC)+(ADL?3*r(SPL):2*r(SPS))+2 From 1fd73f6271f6537538a40226c18006f47635cbad Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Sun, 13 Jun 2021 17:11:55 -0400 Subject: [PATCH 052/142] Provide optimized versions of __ladd_b --- src/std/linked/linked.src | 2 ++ src/std/shared/ladd_b_fast.src | 12 ++++++++++++ src/std/shared/shared.src | 3 +-- src/std/static/ladd_b.src | 11 +++++++++++ 4 files changed, 26 insertions(+), 2 deletions(-) create mode 100644 src/std/shared/ladd_b_fast.src create mode 100644 src/std/static/ladd_b.src diff --git a/src/std/linked/linked.src b/src/std/linked/linked.src index afd54ef5a..f068d8d7a 100644 --- a/src/std/linked/linked.src +++ b/src/std/linked/linked.src @@ -110,6 +110,8 @@ __ixor := 000198h ; long (32-bit) math public __ladd __ladd := 00019Ch + public __ladd_b +__ladd_b := 0001A0h public __land __land := 0001A4h public __lcmps diff --git a/src/std/shared/ladd_b_fast.src b/src/std/shared/ladd_b_fast.src new file mode 100644 index 000000000..27f30108c --- /dev/null +++ b/src/std/shared/ladd_b_fast.src @@ -0,0 +1,12 @@ + public __ladd_b_fast +__ladd_b_fast: + add a, l + ld l, a + ret nc + inc h + ret nz + ld bc, 1 shl 16 + add hl, bc + ret nc + inc e + ret diff --git a/src/std/shared/shared.src b/src/std/shared/shared.src index bcca7199a..c9234cc82 100644 --- a/src/std/shared/shared.src +++ b/src/std/shared/shared.src @@ -1,4 +1,4 @@ - public __bldiy, __bstix, __bstiy, __case, __case16, __case16D, __case24, __case24D, __case8, __case8D, __fadd, __fcmp, __fdiv, __fmul, __fppack, __frbtof, __frftob, __frftoi, __frftos, __frftoub, __frftoui, __frftous, __fritof, __frstof, __frubtof, __fruitof, __frustof, __fsub, __ftol, __idivs, __idivu, __idvrmu, __ildix, __ildiy, __imul_b, __indcall, __irems, __iremu, __ishl_b, __ishrs_b, __ishru_b, __istix, __istiy, __itol, __ladd_b, __ldivs, __ldivu, __ldvrmu, __lldix, __lldiy, __lrems, __lremu, __lstix, __lstiy, __ltof, __sdivs, __sdivu, __seqcase, __seqcaseD, __setflag, __sldix, __sldiy, __srems, __sremu, __sshl_b, __sshrs_b, __sshru_b, __sstix, __sstiy, __stoi, __stoiu, __ultof, _longjmp, _memchr, _memcmp, _memcpy, _memmove, _setjmp, ___sprintf, _sqrtf, _strcasecmp, _strcat, _strchr, _strcmp, _strcpy, _strcspn, _strlen, _strncat, _strncmp, _strncpy, _strpbrk, _strrchr, _strspn, _strstr, _strtok + public __bldiy, __bstix, __bstiy, __case, __case16, __case16D, __case24, __case24D, __case8, __case8D, __fadd, __fcmp, __fdiv, __fmul, __fppack, __frbtof, __frftob, __frftoi, __frftos, __frftoub, __frftoui, __frftous, __fritof, __frstof, __frubtof, __fruitof, __frustof, __fsub, __ftol, __idivs, __idivu, __idvrmu, __ildix, __ildiy, __imul_b, __indcall, __irems, __iremu, __ishl_b, __ishrs_b, __ishru_b, __istix, __istiy, __itol, __ldivs, __ldivu, __ldvrmu, __lldix, __lldiy, __lrems, __lremu, __lstix, __lstiy, __ltof, __sdivs, __sdivu, __seqcase, __seqcaseD, __setflag, __sldix, __sldiy, __srems, __sremu, __sshl_b, __sshrs_b, __sshru_b, __sstix, __sstiy, __stoi, __stoiu, __ultof, _longjmp, _memchr, _memcmp, _memcpy, _memmove, _setjmp, ___sprintf, _sqrtf, _strcasecmp, _strcat, _strchr, _strcmp, _strcpy, _strcspn, _strlen, _strncat, _strncmp, _strncpy, _strpbrk, _strrchr, _strspn, _strstr, _strtok __bldiy := 0000FCh __bstix := 00010Ch __bstiy := 000108h @@ -43,7 +43,6 @@ __ishru_b := 000188h __istix := 00018Ch __istiy := 000190h __itol := 000194h -__ladd_b := 0001A0h __ldivs := 0001B4h __ldivu := 0001B8h __ldvrmu := 0001BCh diff --git a/src/std/static/ladd_b.src b/src/std/static/ladd_b.src new file mode 100644 index 000000000..232f13bfc --- /dev/null +++ b/src/std/static/ladd_b.src @@ -0,0 +1,11 @@ + public __ladd_b +__ladd_b: + push bc + inc.s bc + ld c, a + ld b, 0 + add hl, bc + pop bc + ret nc + inc e + ret From 49a332b552659e382abdeb5b71557daddc6c726d Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Sun, 13 Jun 2021 17:12:07 -0400 Subject: [PATCH 053/142] Implement __lladd_b --- src/std/shared/lladd_b.src | 22 ++++++++++++++++++++ src/std/shared/lladd_b_fast.src | 36 +++++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+) create mode 100644 src/std/shared/lladd_b.src create mode 100644 src/std/shared/lladd_b_fast.src diff --git a/src/std/shared/lladd_b.src b/src/std/shared/lladd_b.src new file mode 100644 index 000000000..255ff457c --- /dev/null +++ b/src/std/shared/lladd_b.src @@ -0,0 +1,22 @@ + public __lladd_b +__lladd_b: + push bc + inc.s bc + ld c, a + ld b, 0 + add hl, bc + jr nc, .finish + ld c, 1 + ex de, hl + add hl, bc + ex de, hl + pop bc + ret nc + inc c + ret nz + inc b + ret + +.finish: + pop bc + ret diff --git a/src/std/shared/lladd_b_fast.src b/src/std/shared/lladd_b_fast.src new file mode 100644 index 000000000..d35d8717c --- /dev/null +++ b/src/std/shared/lladd_b_fast.src @@ -0,0 +1,36 @@ + public __lladd_b_fast +__lladd_b_fast: + add a, l ; a=L+A + ld l, a ; l=L+A + ret nc + ; cf=1 + inc h ; h=H+1 + ret nz + ; h=0 + dec h + ld l, h ; uhl=(HLU<<16)+0xFFFF + inc hl ; uhl=HLU+1<<16 + add hl, bc + or a, a + sbc hl, bc + ld l, a ; uhl=(HLU+1<<16)+(L+A&0xFF) + ret nz + ; uhl=L+A&0xFF, cf=0 + inc e ; e=E+1 + ret nz + ; e=0 + inc d ; d=D+1 + ret nz + ; d=0 + dec d + ld e, d ; ude=(DEU<<16)+0xFFFF + inc de ; ude=DEU+1<<16 + sbc hl, de + add hl, de + ret c + ; ude=0 + inc c ; c=C+1 + ret nz + ; c=0 + inc b ; b=B+1 + ret From 153d9e98b052558f43dfc02a3909ad16f082fbd1 Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Sun, 13 Jun 2021 18:14:09 -0400 Subject: [PATCH 054/142] Implement some optimized shifts by one --- src/std/shared/ishrs_1_fast.src | 12 ++++++++++++ src/std/shared/ishru_1_fast.src | 12 ++++++++++++ src/std/shared/llshl_1_fast.src | 11 +++++++++++ src/std/shared/llshrs_1_fast.src | 22 ++++++++++++++++++++++ src/std/shared/llshru_1_fast.src | 22 ++++++++++++++++++++++ src/std/shared/lshrs_1_fast.src | 13 +++++++++++++ src/std/shared/lshru_1_fast.src | 13 +++++++++++++ 7 files changed, 105 insertions(+) create mode 100644 src/std/shared/ishrs_1_fast.src create mode 100644 src/std/shared/ishru_1_fast.src create mode 100644 src/std/shared/llshl_1_fast.src create mode 100644 src/std/shared/llshrs_1_fast.src create mode 100644 src/std/shared/llshru_1_fast.src create mode 100644 src/std/shared/lshrs_1_fast.src create mode 100644 src/std/shared/lshru_1_fast.src diff --git a/src/std/shared/ishrs_1_fast.src b/src/std/shared/ishrs_1_fast.src new file mode 100644 index 000000000..1f6e4a15f --- /dev/null +++ b/src/std/shared/ishrs_1_fast.src @@ -0,0 +1,12 @@ + assume adl=1 + + public __ishru_1_fast +__ishru_1_fast: + push hl + ld hl, 2 + add hl, sp + sra (hl) + pop hl + rr h + rr l + ret diff --git a/src/std/shared/ishru_1_fast.src b/src/std/shared/ishru_1_fast.src new file mode 100644 index 000000000..5fac4f94c --- /dev/null +++ b/src/std/shared/ishru_1_fast.src @@ -0,0 +1,12 @@ + assume adl=1 + + public __ishru_1_fast +__ishru_1_fast: + push hl + ld hl, 2 + add hl, sp + srl (hl) + pop hl + rr h + rr l + ret diff --git a/src/std/shared/llshl_1_fast.src b/src/std/shared/llshl_1_fast.src new file mode 100644 index 000000000..15bc25601 --- /dev/null +++ b/src/std/shared/llshl_1_fast.src @@ -0,0 +1,11 @@ + assume adl=1 + + public __llshl_1_fast +__llshl_1_fast: + add hl, hl + ex de, hl + adc hl, hl + ex de, hl + rl c + rl b + ret diff --git a/src/std/shared/llshrs_1_fast.src b/src/std/shared/llshrs_1_fast.src new file mode 100644 index 000000000..cfc2d8785 --- /dev/null +++ b/src/std/shared/llshrs_1_fast.src @@ -0,0 +1,22 @@ + assume adl=1 + + public __llshrs_1_fast +__llshrs_1_fast: + push hl + push de + ld hl, 2 + add hl, sp + sra b + rr c + rr (hl) + pop de + inc hl + inc hl + inc hl + rr d + rr e + rr (hl) + pop hl + rr h + rr l + ret diff --git a/src/std/shared/llshru_1_fast.src b/src/std/shared/llshru_1_fast.src new file mode 100644 index 000000000..f175fed56 --- /dev/null +++ b/src/std/shared/llshru_1_fast.src @@ -0,0 +1,22 @@ + assume adl=1 + + public __llshru_1_fast +__llshru_1_fast: + push hl + push de + ld hl, 2 + add hl, sp + srl b + rr c + rr (hl) + pop de + inc hl + inc hl + inc hl + rr d + rr e + rr (hl) + pop hl + rr h + rr l + ret diff --git a/src/std/shared/lshrs_1_fast.src b/src/std/shared/lshrs_1_fast.src new file mode 100644 index 000000000..1105217ca --- /dev/null +++ b/src/std/shared/lshrs_1_fast.src @@ -0,0 +1,13 @@ + assume adl=1 + + public __lshru_1_fast +__lshru_1_fast: + push hl + ld hl, 2 + add hl, sp + sra e + rr (hl) + pop hl + rr h + rr l + ret diff --git a/src/std/shared/lshru_1_fast.src b/src/std/shared/lshru_1_fast.src new file mode 100644 index 000000000..edb931a22 --- /dev/null +++ b/src/std/shared/lshru_1_fast.src @@ -0,0 +1,13 @@ + assume adl=1 + + public __lshru_1_fast +__lshru_1_fast: + push hl + ld hl, 2 + add hl, sp + srl e + rr (hl) + pop hl + rr h + rr l + ret From 83a8159cd2111c92ad44a97529fe491beb6b2b09 Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Sun, 13 Jun 2021 18:37:07 -0400 Subject: [PATCH 055/142] Implement optimized add and sub 1 --- src/std/shared/ladd_1.src | 11 +++++++++++ src/std/shared/lladd_1.src | 15 +++++++++++++++ src/std/shared/llsub_1.src | 15 +++++++++++++++ src/std/shared/lsub_1.src | 11 +++++++++++ 4 files changed, 52 insertions(+) create mode 100644 src/std/shared/ladd_1.src create mode 100644 src/std/shared/lladd_1.src create mode 100644 src/std/shared/llsub_1.src create mode 100644 src/std/shared/lsub_1.src diff --git a/src/std/shared/ladd_1.src b/src/std/shared/ladd_1.src new file mode 100644 index 000000000..597ecee1e --- /dev/null +++ b/src/std/shared/ladd_1.src @@ -0,0 +1,11 @@ + assume adl=1 + + public __ladd_1 +__ladd_1: + inc hl + add hl, bc + or a, a + sbc hl, bc + ret nz + inc e + ret diff --git a/src/std/shared/lladd_1.src b/src/std/shared/lladd_1.src new file mode 100644 index 000000000..4a9cfcba1 --- /dev/null +++ b/src/std/shared/lladd_1.src @@ -0,0 +1,15 @@ + assume adl=1 + + public __lladd_1 +__lladd_1: + inc hl + add hl, de + or a, a + sbc hl, de + ret nz + inc de + sbc hl, de + add hl, de + ret nz + inc bc + ret diff --git a/src/std/shared/llsub_1.src b/src/std/shared/llsub_1.src new file mode 100644 index 000000000..27b6a73b3 --- /dev/null +++ b/src/std/shared/llsub_1.src @@ -0,0 +1,15 @@ + assume adl=1 + + public __llsub_1 +__llsub_1: + add hl, de + or a, a + sbc hl, de + dec hl + ret nz + ex de, hl + add hl, de + ex de, hl + ret c + dec bc + ret diff --git a/src/std/shared/lsub_1.src b/src/std/shared/lsub_1.src new file mode 100644 index 000000000..e4db59b63 --- /dev/null +++ b/src/std/shared/lsub_1.src @@ -0,0 +1,11 @@ + assume adl=1 + + public __lsub_1 +__lsub_1: + add hl, bc + or a, a + sbc hl, bc + dec hl + ret nz + dec e + ret From 87712404ac36138a91fbe45fef896c4b603f7d44 Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Sun, 13 Jun 2021 18:44:45 -0400 Subject: [PATCH 056/142] Slightly optimize __lladd_b --- src/std/shared/lladd_b.src | 9 ++------- src/std/shared/lladd_b_fast.src | 5 +---- 2 files changed, 3 insertions(+), 11 deletions(-) diff --git a/src/std/shared/lladd_b.src b/src/std/shared/lladd_b.src index 255ff457c..742699bab 100644 --- a/src/std/shared/lladd_b.src +++ b/src/std/shared/lladd_b.src @@ -10,13 +10,8 @@ __lladd_b: ex de, hl add hl, bc ex de, hl - pop bc - ret nc - inc c - ret nz - inc b - ret - .finish: pop bc + ret nc + inc bc ret diff --git a/src/std/shared/lladd_b_fast.src b/src/std/shared/lladd_b_fast.src index d35d8717c..f73597653 100644 --- a/src/std/shared/lladd_b_fast.src +++ b/src/std/shared/lladd_b_fast.src @@ -29,8 +29,5 @@ __lladd_b_fast: add hl, de ret c ; ude=0 - inc c ; c=C+1 - ret nz - ; c=0 - inc b ; b=B+1 + inc bc ; ubc=UBC+1 ret From 8d35d37bea48e098bb8199a24168d61e35a041dd Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Sat, 17 Jul 2021 19:17:10 -0400 Subject: [PATCH 057/142] Implement really slow __llmuls/__llmulu Except it doesn't quite work. --- src/std/shared/llmulu.src | 75 +++++++++++++++++++++++++++++++++++++ src/std/shared/llmulu_b.src | 2 - 2 files changed, 75 insertions(+), 2 deletions(-) create mode 100644 src/std/shared/llmulu.src diff --git a/src/std/shared/llmulu.src b/src/std/shared/llmulu.src new file mode 100644 index 000000000..eb5291ae8 --- /dev/null +++ b/src/std/shared/llmulu.src @@ -0,0 +1,75 @@ + assume adl=1 + + public __llmuls +__llmuls: + public __llmulu +__llmulu: +; Really slow + push af + push ix + push iy + + push hl + ld l, c + ld h, b + ld.s sp, hl + pop bc + + ld iy, 8 + lea hl, iy + 4 + add hl, sp + xor a, a +.push_loop: + push af + ld a, (hl) + inc hl + dec iyl + jr nz, .push_loop + + lea ix, iy + sbc hl, hl + +.byte_loop: + scf + adc a, a + +.bit_loop: + push af + + add iy, iy + jr c, .shift_c + add ix, ix + db $CA ; jp z, * +.shift_c: + add ix, ix + inc ix + adc hl, hl + + pop af + + jr nc, .add_end + add iy, bc + jr c, .add_c + add ix, de + db $CA ; jp z, * +.add_c: + add ix, de + inc ix + adc.s hl, sp +.add_end: + + add a, a + jr nz, .bit_loop + + pop af + jr nz, .byte_loop + + ld b, h + ld c, l + lea de, ix + lea hl, iy + + pop iy + pop ix + pop af + ret diff --git a/src/std/shared/llmulu_b.src b/src/std/shared/llmulu_b.src index 86ad43aa6..1dcf64224 100644 --- a/src/std/shared/llmulu_b.src +++ b/src/std/shared/llmulu_b.src @@ -1,5 +1,3 @@ - public __llmulu -__llmulu: public __llmulu_b __llmulu_b: push af From f3f65eec4994a8d1a3001f0cd6d38ee59549fa7c Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Sat, 17 Jul 2021 19:58:49 -0400 Subject: [PATCH 058/142] Fix __llmuls/__llmulu --- src/std/shared/llmulu.src | 60 +++++++++++++++++++-------------------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/src/std/shared/llmulu.src b/src/std/shared/llmulu.src index eb5291ae8..298cb7a04 100644 --- a/src/std/shared/llmulu.src +++ b/src/std/shared/llmulu.src @@ -5,29 +5,33 @@ __llmuls: public __llmulu __llmulu: ; Really slow - push af + push ix push iy + push af + ld ix, 0 + lea iy, ix - 6 + add iy, sp ; cf=1 + + push de push hl ld l, c ld h, b ld.s sp, hl - pop bc - ld iy, 8 - lea hl, iy + 4 - add hl, sp - xor a, a + lea hl, iy + 18 + ld b, 8 .push_loop: push af ld a, (hl) inc hl - dec iyl - jr nz, .push_loop + or a, a ; cf=0 + djnz .push_loop - lea ix, iy sbc hl, hl + ld e, l + ld d, h .byte_loop: scf @@ -35,41 +39,37 @@ __llmulu: .bit_loop: push af - - add iy, iy - jr c, .shift_c - add ix, ix - db $CA ; jp z, * -.shift_c: add ix, ix - inc ix adc hl, hl - + ex de, hl + adc hl, hl + ex de, hl pop af jr nc, .add_end - add iy, bc - jr c, .add_c - add ix, de - db $CA ; jp z, * -.add_c: - add ix, de - inc ix + ld bc, (iy) + add ix, bc + ld bc, (iy + 3) + adc hl, bc + ex de, hl adc.s hl, sp + ex de, hl .add_end: add a, a jr nz, .bit_loop pop af - jr nz, .byte_loop + jr nc, .byte_loop - ld b, h - ld c, l - lea de, ix - lea hl, iy + ld b, d + ld c, e + ex de, hl + lea hl, ix + pop af + pop af + pop af pop iy pop ix - pop af ret From 89d9409ee5a952c9e39509a4a2699faf6e7f0ec0 Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Sat, 17 Jul 2021 20:46:26 -0400 Subject: [PATCH 059/142] Fix __llneg and __llneg_fast --- src/std/shared/llneg.src | 9 +++++---- src/std/shared/llneg_fast.src | 12 +++++++----- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/src/std/shared/llneg.src b/src/std/shared/llneg.src index 787977752..96284ba7e 100644 --- a/src/std/shared/llneg.src +++ b/src/std/shared/llneg.src @@ -9,14 +9,15 @@ __llneg: sbc hl, de ; uhl=-UHL ex (sp), hl ; uhl=UDE, *spl=-UHL ex de, hl ; ude=UDE, uhl=UHL - sbc hl, hl ; uhl=-(UHL!=0) - or a, a + ld hl, 0 ; uhl=0 sbc hl, de ; uhl=-UDE-(UHL!=0) - ex de, hl ; ude=-UDE-(UHL!=0), uhl=UDE + ; =-UDEUHL>>24 + ex de, hl ; ude=-UDEUHL>>24, uhl=UDE sbc hl, hl ; uhl=-(UDEUHL!=0) or a, a sbc hl, bc ; uhl=-UBC-(UDEUHL!=0) + ; =-UBCUDEUHL>>48 ld c, l - ld b, h ; bc=-BC-(UDEUHL!=0) + ld b, h ; bc=-BCUDEUHL>>48 pop hl ; bcudeuhl=-BCUDEUHL ret diff --git a/src/std/shared/llneg_fast.src b/src/std/shared/llneg_fast.src index db08ac487..0c52104b8 100644 --- a/src/std/shared/llneg_fast.src +++ b/src/std/shared/llneg_fast.src @@ -9,14 +9,16 @@ __llneg_fast: sbc hl, de ; uhl=-UHL ex (sp), hl ; uhl=UDE, *spl=-UHL ex de, hl ; ude=UDE, uhl=UHL - sbc hl, hl ; uhl=-(UHL!=0) - xor a, a ; a=0 + ld hl, 0 ; uhl=0 sbc hl, de ; uhl=-UDE-(UHL!=0) - ex de, hl ; ude=-UDE-(UHL!=0), uhl=UDE + ; =-UDEUHL>>24 + ex de, hl ; ude=-UDEUHL>>24, uhl=UDE sbc a, c ; a=-C-(UDEUHL!=0) - ld c, a ; c=-C-(UDEUHL!=0) + ; =-CUDEUHL>>48 + ld c, a ; c=-CUDEUHL>>48 ld a, 0 ; a=0 sbc a, b ; a=-B-(CUDEUHL!=0) - ld b, a ; b=-B-(CUDEUHL!=0) + ; =-BCUDEUHL>>56 + ld b, a ; b=-BCUDEUHL>>56 pop hl ; bcudeuhl=-BCUDEUHL ret From 844a7ae4bc67665d2debc6ca9a23a0aac2e5e68c Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Sat, 22 Jan 2022 19:46:00 -0500 Subject: [PATCH 060/142] Optimize _labs --- src/std/shared/labs.src | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/src/std/shared/labs.src b/src/std/shared/labs.src index e38dba93d..662fa5e47 100644 --- a/src/std/shared/labs.src +++ b/src/std/shared/labs.src @@ -3,22 +3,14 @@ public _labs _labs: - call __frameset0 - ld hl,(ix+6) - ld e,(ix+9) - push hl - push de - call __lcmpzero - call __setflag - pop de + pop bc pop hl - jp p,l_1 - call __lneg -l_1: ld sp,ix - pop ix - ret + pop de + push de + push hl + push bc + bit 7, e + ret z + jp __lneg - extern __frameset0 - extern __lcmpzero - extern __setflag extern __lneg From 53b4694631809e16e65973c49f84b4535bd650b1 Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Sat, 22 Jan 2022 19:47:05 -0500 Subject: [PATCH 061/142] Fix a bug in __lldivu_b? --- src/std/shared/lldivu_b.src | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/std/shared/lldivu_b.src b/src/std/shared/lldivu_b.src index 33e001568..d2c21ce95 100644 --- a/src/std/shared/lldivu_b.src +++ b/src/std/shared/lldivu_b.src @@ -2,12 +2,13 @@ __lldivu: public __lldivu_b __lldivu_b: -; Very unoptimized +; Very slow push af push iy ld iy, 0 add iy, sp ld iy, (iy + 9) + xor a, a ld iyh, 64 .loop: add hl, hl From 14f1ba640bd8eb796c3a539d8aec48821aa95705 Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Sat, 22 Jan 2022 19:47:44 -0500 Subject: [PATCH 062/142] Implement _llabs --- src/std/shared/labs.src | 2 +- src/std/shared/llabs.src | 19 +++++++++++++++++++ src/std/stdlib.h | 6 ++++-- 3 files changed, 24 insertions(+), 3 deletions(-) create mode 100644 src/std/shared/llabs.src diff --git a/src/std/shared/labs.src b/src/std/shared/labs.src index 662fa5e47..e5e0365e7 100644 --- a/src/std/shared/labs.src +++ b/src/std/shared/labs.src @@ -1,4 +1,4 @@ -; long labs(long j); +; long labs(long); assume adl=1 public _labs diff --git a/src/std/shared/llabs.src b/src/std/shared/llabs.src new file mode 100644 index 000000000..6212aceb6 --- /dev/null +++ b/src/std/shared/llabs.src @@ -0,0 +1,19 @@ + assume adl=1 + + public _imaxabs +_imaxabs: + public _llabs +_llabs: + pop iy + pop hl + pop de + pop bc + push bc + push de + push hl + push iy + bit 7, b + ret z + jp __llneg + + extern __llneg diff --git a/src/std/stdlib.h b/src/std/stdlib.h index 5376daffd..5aeefa3f7 100644 --- a/src/std/stdlib.h +++ b/src/std/stdlib.h @@ -87,9 +87,11 @@ void quick_exit(int) __NOEXCEPT __attribute__((noreturn)); void _Exit(int) __NOEXCEPT __attribute__((noreturn)); -int abs(int j); +int abs(int n); -long labs(long j); +long labs(long n); + +long long llabs(long long n); div_t div(int numer, int denom); From cf69f60d5609de3fafe16f49a7f979beef5d478b Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Sun, 23 Jan 2022 00:27:48 -0500 Subject: [PATCH 063/142] Implement atrociously slow __lldivu and __llremu --- src/std/shared/lldivu.src | 13 ++++++ src/std/shared/lldivu_b.src | 2 - src/std/shared/lldvrmu.src | 88 +++++++++++++++++++++++++++++++++++++ src/std/shared/llremu.src | 10 +++++ 4 files changed, 111 insertions(+), 2 deletions(-) create mode 100644 src/std/shared/lldivu.src create mode 100644 src/std/shared/lldvrmu.src create mode 100644 src/std/shared/llremu.src diff --git a/src/std/shared/lldivu.src b/src/std/shared/lldivu.src new file mode 100644 index 000000000..d68d46425 --- /dev/null +++ b/src/std/shared/lldivu.src @@ -0,0 +1,13 @@ + assume adl=1 + + public __lldivu +__lldivu: + push iy + call __lldvrmu + ld hl, (iy + 12) + ld de, (iy + 15) + ld bc, (iy + 18) + pop iy + ret + + extern __lldvrmu diff --git a/src/std/shared/lldivu_b.src b/src/std/shared/lldivu_b.src index d2c21ce95..9650e355b 100644 --- a/src/std/shared/lldivu_b.src +++ b/src/std/shared/lldivu_b.src @@ -1,5 +1,3 @@ - public __lldivu -__lldivu: public __lldivu_b __lldivu_b: ; Very slow diff --git a/src/std/shared/lldvrmu.src b/src/std/shared/lldvrmu.src new file mode 100644 index 000000000..c3d642055 --- /dev/null +++ b/src/std/shared/lldvrmu.src @@ -0,0 +1,88 @@ + assume adl=1 + + public __lldvrmu +__lldvrmu: +; Atrociously slow. + + push af + + ld a, i + di + ex af, af' + + ld iy, 8 + add iy, sp + + push bc + push de + push hl + + ld bc, (iy + 7) + sbc hl, hl + ex de, hl + sbc hl, hl + exx + ld de, (iy + 10) + ld hl, (iy + 4) + ld sp, hl + sbc hl, hl + + ld c, 8 + +.byte_loop: + dec iy + ld a, (iy - 9) + + ld b, 8 + +.bit_loop: + adc a, a + exx + adc hl, hl + ex de, hl + adc hl, hl + ex de, hl + exx + adc hl, hl + + exx + sbc hl, sp + ex de, hl + sbc hl, bc + ex de, hl + exx + sbc hl, de + + jr nc, .add_back_skip + exx + add hl, sp + ex de, hl + adc hl, bc + ex de, hl + exx + adc hl, de +.add_back_skip: + + djnz .bit_loop + + adc a, a + cpl + ld (iy + 12), a + + dec c + jr nz, .byte_loop + + ld sp, iy + + push hl + exx + pop bc + + ex af, af' + jp po, .ei_skip + ei +.ei_skip: + + pop af + + ret diff --git a/src/std/shared/llremu.src b/src/std/shared/llremu.src new file mode 100644 index 000000000..8cb75f114 --- /dev/null +++ b/src/std/shared/llremu.src @@ -0,0 +1,10 @@ + assume adl=1 + + public __llremu +__llremu: + push iy + call __lldvrmu + pop iy + ret + + extern __lldvrmu From 3ccc786f429eda8cbdc3723cb2ec77fbe9431b70 Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Sun, 23 Jan 2022 12:50:24 -0500 Subject: [PATCH 064/142] Fix a bug in __lldvrmu --- src/std/shared/lldvrmu.src | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/std/shared/lldvrmu.src b/src/std/shared/lldvrmu.src index c3d642055..4addfd470 100644 --- a/src/std/shared/lldvrmu.src +++ b/src/std/shared/lldvrmu.src @@ -22,7 +22,8 @@ __lldvrmu: ex de, hl sbc hl, hl exx - ld de, (iy + 10) + ld hl, (iy + 10) + ex.s de, hl ld hl, (iy + 4) ld sp, hl sbc hl, hl From 5035e09ca3e0c63e051e207f7f8a72362b8db3f7 Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Sun, 23 Jan 2022 13:43:10 -0500 Subject: [PATCH 065/142] Implement atrociously slow __lldivs --- src/std/shared/lldivs.src | 59 ++++++++++++++++++++++++++++++++++++++ src/std/shared/lldivu.src | 13 +++++++-- src/std/shared/lldvrmu.src | 48 +++++++++++++++++++++---------- src/std/shared/llremu.src | 6 ++++ 4 files changed, 108 insertions(+), 18 deletions(-) create mode 100644 src/std/shared/lldivs.src diff --git a/src/std/shared/lldivs.src b/src/std/shared/lldivs.src new file mode 100644 index 000000000..e4c49acc2 --- /dev/null +++ b/src/std/shared/lldivs.src @@ -0,0 +1,59 @@ + assume adl=1 + + public __lldivs +__lldivs: + + push af + push iy + ld iy, 2 + add iy, sp + + ld a, (iy + 14) + cp a, $80 + jr c, .neg_divisor_skip + + push de + push hl + + ld de, (iy + 7) + sbc hl, hl + sbc hl, de + ld (iy + 7), hl + ld de, (iy + 10) + ld hl, 0 + sbc hl, de + ld (iy + 10), hl + ld de, (iy + 13) + sbc hl, hl + or a, a + sbc hl, de + ld (iy + 13), hl + + pop hl + pop de +.neg_divisor_skip: + + xor a, b + ld (iy + 15), a + + ld a, b + add a, a + call c, __llneg + + call __lldvrmu.hijack + + ld hl, (iy + 15) + ld de, (iy + 18) + ld bc, (iy + 21) + + ld a, (iy + 23) + add a, a + call c, __llneg + + pop iy + pop af + + ret + + extern __lldvrmu.hijack + extern __llneg diff --git a/src/std/shared/lldivu.src b/src/std/shared/lldivu.src index d68d46425..d94db4556 100644 --- a/src/std/shared/lldivu.src +++ b/src/std/shared/lldivu.src @@ -2,12 +2,19 @@ public __lldivu __lldivu: + + push af push iy + call __lldvrmu - ld hl, (iy + 12) - ld de, (iy + 15) - ld bc, (iy + 18) + + ld hl, (iy + 15) + ld de, (iy + 18) + ld bc, (iy + 21) + pop iy + pop af + ret extern __lldvrmu diff --git a/src/std/shared/lldvrmu.src b/src/std/shared/lldvrmu.src index 4addfd470..0f2781e02 100644 --- a/src/std/shared/lldvrmu.src +++ b/src/std/shared/lldvrmu.src @@ -4,27 +4,39 @@ __lldvrmu: ; Atrociously slow. - push af + ld iy, 5 + add iy, sp + public __lldvrmu.hijack +.hijack: ld a, i di - ex af, af' - - ld iy, 8 - add iy, sp + push af push bc push de push hl - - ld bc, (iy + 7) +; Stack frame: +; iy - 17 -> divisor +; iy - 14 -> divisor >> 24 +; iy - 11 -> divisor >> 48 +; iy - 8 -> interrupts state +; iy - 5 -> return vector +; iy - 2 -> ??? +; iy + 1 -> ??? +; iy + 4 -> ??? +; iy + 7 -> dividend +; iy + 10 -> dividend >> 24 +; iy + 13 -> dividend >> 48 + + ld bc, (iy + 10) sbc hl, hl ex de, hl sbc hl, hl exx - ld hl, (iy + 10) + ld hl, (iy + 13) ex.s de, hl - ld hl, (iy + 4) + ld hl, (iy + 7) ld sp, hl sbc hl, hl @@ -68,22 +80,28 @@ __lldvrmu: adc a, a cpl - ld (iy + 12), a + ld (iy + 15), a dec c jr nz, .byte_loop ld sp, iy +; Stack frame: +; iy -> interrupts state +; iy + 3 -> return vector +; iy + 6 -> ??? +; iy + 9 -> ??? +; iy + 12 -> ??? +; iy + 15 -> dividend +; iy + 18 -> dividend >> 24 +; iy + 21 -> dividend >> 48 push hl exx pop bc - ex af, af' - jp po, .ei_skip - ei -.ei_skip: - pop af + ret po + ei ret diff --git a/src/std/shared/llremu.src b/src/std/shared/llremu.src index 8cb75f114..a979f3bf3 100644 --- a/src/std/shared/llremu.src +++ b/src/std/shared/llremu.src @@ -2,9 +2,15 @@ public __llremu __llremu: + + push af push iy + call __lldvrmu + pop iy + pop af + ret extern __lldvrmu From 6f110285d4256b5cf5a9a1bd96f81f72f56c2d38 Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Sun, 23 Jan 2022 14:41:18 -0500 Subject: [PATCH 066/142] Implement atrociously slow __llrems --- src/std/shared/lldivs.src | 30 +++++++++++++++++++++++++----- src/std/shared/lldivu.src | 4 ++-- src/std/shared/llremu.src | 4 ++-- 3 files changed, 29 insertions(+), 9 deletions(-) diff --git a/src/std/shared/lldivs.src b/src/std/shared/lldivs.src index e4c49acc2..8ff61dc8f 100644 --- a/src/std/shared/lldivs.src +++ b/src/std/shared/lldivs.src @@ -1,10 +1,23 @@ assume adl=1 + public __llrems +__llrems: + + scf +virtual + jr nc, $ + 2 + load .jr_nc: 1 from $$ +end virtual + db .jr_nc + public __lldivs __lldivs: - push af + or a, a + push iy + push af + ld iy, 2 add iy, sp @@ -36,22 +49,29 @@ __lldivs: xor a, b ld (iy + 15), a + pop af + push af ld a, b + jr nc, .rem_save_sign_skip + ld (iy + 15), a +.rem_save_sign_skip: + add a, a call c, __llneg call __lldvrmu.hijack + pop af + jr c, .div_quotient_skip ld hl, (iy + 15) ld de, (iy + 18) ld bc, (iy + 21) +.div_quotient_skip: - ld a, (iy + 23) - add a, a - call c, __llneg + bit 7, (iy + 23) + call nz, __llneg pop iy - pop af ret diff --git a/src/std/shared/lldivu.src b/src/std/shared/lldivu.src index d94db4556..d3e1ad2ed 100644 --- a/src/std/shared/lldivu.src +++ b/src/std/shared/lldivu.src @@ -3,8 +3,8 @@ public __lldivu __lldivu: - push af push iy + push af call __lldvrmu @@ -12,8 +12,8 @@ __lldivu: ld de, (iy + 18) ld bc, (iy + 21) - pop iy pop af + pop iy ret diff --git a/src/std/shared/llremu.src b/src/std/shared/llremu.src index a979f3bf3..9e4e8e38d 100644 --- a/src/std/shared/llremu.src +++ b/src/std/shared/llremu.src @@ -3,13 +3,13 @@ public __llremu __llremu: - push af push iy + push af call __lldvrmu - pop iy pop af + pop iy ret From 6c1b60a8b8a2194f9ede2b497686d0defbfc2755 Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Sun, 23 Jan 2022 15:05:05 -0500 Subject: [PATCH 067/142] Add the math test program I've been using this whole time --- examples/standalone/math_test/makefile | 22 ++ examples/standalone/math_test/readme.md | 7 + examples/standalone/math_test/src/main.c | 381 +++++++++++++++++++++++ 3 files changed, 410 insertions(+) create mode 100644 examples/standalone/math_test/makefile create mode 100644 examples/standalone/math_test/readme.md create mode 100644 examples/standalone/math_test/src/main.c diff --git a/examples/standalone/math_test/makefile b/examples/standalone/math_test/makefile new file mode 100644 index 000000000..be0196426 --- /dev/null +++ b/examples/standalone/math_test/makefile @@ -0,0 +1,22 @@ +# ---------------------------- +# Makefile Options +# ---------------------------- + +NAME ?= DEMO +ICON ?= icon.png +DESCRIPTION ?= "CE C Toolchain Demo" +COMPRESSED ?= NO +ARCHIVED ?= NO +# HAS_FLASH_FUNCTIONS ?= NO +OUTPUT_MAP ?= YES + +CFLAGS ?= -Os -mllvm -inline-threshold=100 -Wall -Wextra +CXXFLAGS ?= -Os -mllvm -inline-threshold=100 -Wall -Wextra + +# ---------------------------- + +ifndef CEDEV +$(error CEDEV environment path variable is not set) +endif + +include $(CEDEV)/meta/makefile.mk diff --git a/examples/standalone/math_test/readme.md b/examples/standalone/math_test/readme.md new file mode 100644 index 000000000..85e36d880 --- /dev/null +++ b/examples/standalone/math_test/readme.md @@ -0,0 +1,7 @@ +### Math test + +Exercises a bunch of math functions. + +--- + +This demo is part of the CE C SDK Toolchain. diff --git a/examples/standalone/math_test/src/main.c b/examples/standalone/math_test/src/main.c new file mode 100644 index 000000000..4add9eff2 --- /dev/null +++ b/examples/standalone/math_test/src/main.c @@ -0,0 +1,381 @@ +#define INTERACTIVE 1 + + +#undef NDEBUG + +#include +#include +#include +#include +#include +#include +#include + + +#ifdef _EZ80 +# include +# include + +# if INTERACTIVE +# define x_printf printf +# else +# define x_printf(...) dbg_printf(__VA_ARGS__) +# endif + +#else +# ifndef __cplusplus +typedef int32_t int24_t; +typedef uint32_t uint24_t; +# else +template +struct IntN final +{ +private: + static constexpr int extraBits = sizeof(U) * 8 - bits; + static_assert(extraBits >= 0, "underlying type is not big enough"); + +public: + constexpr IntN(U value) : value(value * (1ULL << extraBits)) {} + constexpr operator U() const { return value >> extraBits; } + +private: + U value; +}; +using uint24_t = IntN<24, uint_fast32_t>; +using int24_t = IntN<24, int_fast32_t>; +# endif + +static int24_t __builtin_bitreverse24(int24_t x) +{ + return __builtin_bitreverse32(x) >> 8; +} +#endif + + +#if INTERACTIVE || !defined(_EZ80) +# define x_printf printf +#else +# define x_printf(...) dbg_printf(__VA_ARGS__) +#endif + +static void separateOutput() +{ +#if INTERACTIVE && defined(_EZ80) + os_ClrHomeFull(); +#else + x_printf("\n--------------------------\n"); +#endif +} + +static void waitForKey() +{ +#if INTERACTIVE && defined(_EZ80) + while (!os_GetCSC()) + ; +#endif +} + +static void finishOutput() +{ + waitForKey(); + separateOutput(); +} + + +static uint64_t atoull_(const char *str) +{ + uint64_t result = 0; + while (isdigit(*str)) + { + result = result * 10 + (unsigned)(*str++ - '0'); + } + + return result; +} + +static int64_t atoll_(const char *str) +{ + bool negative = false; + if (*str == '-') + { + negative = true; + str++; + } + + uint64_t x = atoull_(str); + int64_t result = (int64_t)(negative ? -x : x); + + return result; +} + + +#define DEFINE_UNOP_TYPE(u) \ + typedef struct u##UnOp_ \ + { \ + const char *name; \ + u##int8_t (*b)(u##int8_t); \ + u##int16_t (*s)(u##int16_t); \ + u##int24_t (*i)(u##int24_t); \ + u##int32_t (*l)(u##int32_t); \ + u##int64_t (*ll)(u##int64_t); \ + } u##UnOp; + +#define DEFINE_BINOP_TYPE(u) \ + typedef struct u##BinOp_ \ + { \ + const char *name; \ + u##int8_t (*b)(u##int8_t, u##int8_t); \ + u##int16_t (*s)(u##int16_t, u##int16_t); \ + u##int24_t (*i)(u##int24_t, u##int24_t); \ + u##int32_t (*l)(u##int32_t, u##int32_t); \ + u##int64_t (*ll)(u##int64_t, u##int64_t); \ + } u##BinOp; + +DEFINE_UNOP_TYPE() +DEFINE_UNOP_TYPE(u) + +DEFINE_BINOP_TYPE() +DEFINE_BINOP_TYPE(u) + + +#define DEFINE_UNOP_FUNC(op, name, prefix, type) \ + static type prefix##name##_(type x) \ + { \ + return (type)(op(x)); \ + } + +#define DEFINE_BINOP_FUNC(op, name, prefix, type) \ + static type prefix##name##_(type x, type y) \ + { \ + return (type)(x op y); \ + } + +#define DEFINE_UNOP_FUNC_B(op, name, u) DEFINE_UNOP_FUNC(op, name, b, u##int8_t) +#define DEFINE_UNOP_FUNC_S(op, name, u) DEFINE_UNOP_FUNC(op, name, s, u##int16_t) +#define DEFINE_UNOP_FUNC_I(op, name, u) DEFINE_UNOP_FUNC(op, name, i, u##int24_t) +#define DEFINE_UNOP_FUNC_L(op, name, u) DEFINE_UNOP_FUNC(op, name, l, u##int32_t) +#define DEFINE_UNOP_FUNC_LL(op, name, u) DEFINE_UNOP_FUNC(op, name, ll, u##int64_t) + +#define DEFINE_BINOP_FUNC_B(op, name, u) DEFINE_BINOP_FUNC(op, name, b, u##int8_t) +#define DEFINE_BINOP_FUNC_S(op, name, u) DEFINE_BINOP_FUNC(op, name, s, u##int16_t) +#define DEFINE_BINOP_FUNC_I(op, name, u) DEFINE_BINOP_FUNC(op, name, i, u##int24_t) +#define DEFINE_BINOP_FUNC_L(op, name, u) DEFINE_BINOP_FUNC(op, name, l, u##int32_t) +#define DEFINE_BINOP_FUNC_LL(op, name, u) DEFINE_BINOP_FUNC(op, name, ll, u##int64_t) + + +#define DEFINE_UNOP_FUNCS_UP_TO_B(op, name, u) \ + DEFINE_UNOP_FUNC_B(op, name, u) +#define DEFINE_UNOP_FUNCS_UP_TO_S(op, name, u) \ + DEFINE_UNOP_FUNCS_UP_TO_B(op, name, u) \ + DEFINE_UNOP_FUNC_S(op, name, u) +#define DEFINE_UNOP_FUNCS_UP_TO_I(op, name, u) \ + DEFINE_UNOP_FUNCS_UP_TO_S(op, name, u) \ + DEFINE_UNOP_FUNC_I(op, name, u) +#define DEFINE_UNOP_FUNCS_UP_TO_L(op, name, u) \ + DEFINE_UNOP_FUNCS_UP_TO_I(op, name, u) \ + DEFINE_UNOP_FUNC_L(op, name, u) +#define DEFINE_UNOP_FUNCS_UP_TO_LL(op, name, u) \ + DEFINE_UNOP_FUNCS_UP_TO_L(op, name, u) \ + DEFINE_UNOP_FUNC_LL(op, name, u) + +#define DEFINE_BINOP_FUNCS_UP_TO_B(op, name, u) \ + DEFINE_BINOP_FUNC_B(op, name, u) +#define DEFINE_BINOP_FUNCS_UP_TO_S(op, name, u) \ + DEFINE_BINOP_FUNCS_UP_TO_B(op, name, u) \ + DEFINE_BINOP_FUNC_S(op, name, u) +#define DEFINE_BINOP_FUNCS_UP_TO_I(op, name, u) \ + DEFINE_BINOP_FUNCS_UP_TO_S(op, name, u) \ + DEFINE_BINOP_FUNC_I(op, name, u) +#define DEFINE_BINOP_FUNCS_UP_TO_L(op, name, u) \ + DEFINE_BINOP_FUNCS_UP_TO_I(op, name, u) \ + DEFINE_BINOP_FUNC_L(op, name, u) +#define DEFINE_BINOP_FUNCS_UP_TO_LL(op, name, u) \ + DEFINE_BINOP_FUNCS_UP_TO_L(op, name, u) \ + DEFINE_BINOP_FUNC_LL(op, name, u) + + +#define DEFINE_UNOP_STRUCT_UP_TO_B(name, u) \ + static const u##UnOp unop_##name = {#name, b##name##_}; +#define DEFINE_UNOP_STRUCT_UP_TO_S(name, u) \ + static const u##UnOp unop_##name = {#name, b##name##_, s##name##_}; +#define DEFINE_UNOP_STRUCT_UP_TO_I(name, u) \ + static const u##UnOp unop_##name = {#name, b##name##_, s##name##_, i##name##_}; +#define DEFINE_UNOP_STRUCT_UP_TO_L(name, u) \ + static const u##UnOp unop_##name = {#name, b##name##_, s##name##_, i##name##_, l##name##_}; +#define DEFINE_UNOP_STRUCT_UP_TO_LL(name, u) \ + static const u##UnOp unop_##name = {#name, b##name##_, s##name##_, i##name##_, l##name##_, ll##name##_}; + +#define DEFINE_BINOP_STRUCT_UP_TO_B(name, u) \ + static const u##BinOp binop_##name = {#name, b##name##_}; +#define DEFINE_BINOP_STRUCT_UP_TO_S(name, u) \ + static const u##BinOp binop_##name = {#name, b##name##_, s##name##_}; +#define DEFINE_BINOP_STRUCT_UP_TO_I(name, u) \ + static const u##BinOp binop_##name = {#name, b##name##_, s##name##_, i##name##_}; +#define DEFINE_BINOP_STRUCT_UP_TO_L(name, u) \ + static const u##BinOp binop_##name = {#name, b##name##_, s##name##_, i##name##_, l##name##_}; +#define DEFINE_BINOP_STRUCT_UP_TO_LL(name, u) \ + static const u##BinOp binop_##name = {#name, b##name##_, s##name##_, i##name##_, l##name##_, ll##name##_}; + + +#define DEFINE_UNOP_UP_TO_B(op, name, u) \ + DEFINE_UNOP_FUNCS_UP_TO_B(op, name, u) \ + DEFINE_UNOP_STRUCT_UP_TO_B(name, u) +#define DEFINE_UNOP_UP_TO_S(op, name, u) \ + DEFINE_UNOP_FUNCS_UP_TO_S(op, name, u) \ + DEFINE_UNOP_STRUCT_UP_TO_S(name, u) +#define DEFINE_UNOP_UP_TO_I(op, name, u) \ + DEFINE_UNOP_FUNCS_UP_TO_I(op, name, u) \ + DEFINE_UNOP_STRUCT_UP_TO_I(name, u) +#define DEFINE_UNOP_UP_TO_L(op, name, u) \ + DEFINE_UNOP_FUNCS_UP_TO_L(op, name, u) \ + DEFINE_UNOP_STRUCT_UP_TO_L(name, u) +#define DEFINE_UNOP_UP_TO_LL(op, name, u) \ + DEFINE_UNOP_FUNCS_UP_TO_LL(op, name, u) \ + DEFINE_UNOP_STRUCT_UP_TO_LL(name, u) + +#define DEFINE_BINOP_UP_TO_B(op, name, u) \ + DEFINE_BINOP_FUNCS_UP_TO_B(op, name, u) \ + DEFINE_BINOP_STRUCT_UP_TO_B(name, u) +#define DEFINE_BINOP_UP_TO_S(op, name, u) \ + DEFINE_BINOP_FUNCS_UP_TO_S(op, name, u) \ + DEFINE_BINOP_STRUCT_UP_TO_S(name, u) +#define DEFINE_BINOP_UP_TO_I(op, name, u) \ + DEFINE_BINOP_FUNCS_UP_TO_I(op, name, u) \ + DEFINE_BINOP_STRUCT_UP_TO_I(name, u) +#define DEFINE_BINOP_UP_TO_L(op, name, u) \ + DEFINE_BINOP_FUNCS_UP_TO_L(op, name, u) \ + DEFINE_BINOP_STRUCT_UP_TO_L(name, u) +#define DEFINE_BINOP_UP_TO_LL(op, name, u) \ + DEFINE_BINOP_FUNCS_UP_TO_LL(op, name, u) \ + DEFINE_BINOP_STRUCT_UP_TO_LL(name, u) + + +static void testUnOp(const UnOp *op, int64_t x) +{ + unsigned nameLength = strlen(op->name); + unsigned prefixLength = 2; + unsigned lhsLength = nameLength + prefixLength; + + x_printf("%*s=%016llX\n\n", lhsLength, "x", (unsigned long long)x); + +#define TEST_UNOP(prefix, bits) \ + if (op->prefix) \ + { \ + unsigned digits = (bits + 3) / 4; \ + x_printf("\n%2s%s=%*s%0*llX", #prefix, op->name, 16 - digits, "", digits, (unsigned long long)(op->prefix)(x) & ((1ULL << (bits - 1) << 1) - 1)); \ + } + + TEST_UNOP(b, 8) + TEST_UNOP(s, 16) + TEST_UNOP(i, 24) + TEST_UNOP(l, 32) + TEST_UNOP(ll, 64) + + finishOutput(); +} + +static void testBinOp(const BinOp *op, int64_t x, int64_t y) +{ + unsigned nameLength = strlen(op->name); + unsigned prefixLength = 2; + unsigned lhsLength = nameLength + prefixLength; + + x_printf("%*s=%016llX\n%*s=%016llX\n", lhsLength, "x", (unsigned long long)x, lhsLength, "y", (unsigned long long)y); + +#define TEST_BINOP(prefix, bits) \ + if (op->prefix) \ + { \ + unsigned digits = (bits + 3) / 4; \ + x_printf("\n%2s%s=%*s%0*llX", #prefix, op->name, 16 - digits, "", digits, (unsigned long long)(op->prefix)(x, y) & ((1ULL << (bits - 1) << 1) - 1)); \ + } + + TEST_BINOP(b, 8) + TEST_BINOP(s, 16) + TEST_BINOP(i, 24) + TEST_BINOP(l, 32) + TEST_BINOP(ll, 64) + + finishOutput(); +} + + +DEFINE_UNOP_UP_TO_LL(~, not, ) +DEFINE_UNOP_UP_TO_LL(-, neg, ) + +DEFINE_UNOP_FUNCS_UP_TO_I(abs, abs, ) +DEFINE_UNOP_FUNC_L(labs, abs, ) +DEFINE_UNOP_FUNC_LL(llabs, abs, ) +DEFINE_UNOP_STRUCT_UP_TO_LL(abs, ) + +DEFINE_UNOP_FUNC_B(__builtin_bitreverse8, bitrev, ) +DEFINE_UNOP_FUNC_S(__builtin_bitreverse16, bitrev, ) +DEFINE_UNOP_FUNC_I(__builtin_bitreverse24, bitrev, ) +DEFINE_UNOP_FUNC_L(__builtin_bitreverse32, bitrev, ) +DEFINE_UNOP_FUNC_LL(__builtin_bitreverse64, bitrev, ) +DEFINE_UNOP_STRUCT_UP_TO_LL(bitrev, ) + +// Needs to be unsigned to avoid extra bits from sign extension +DEFINE_UNOP_FUNCS_UP_TO_I(__builtin_popcount, popcnt, u) +DEFINE_UNOP_FUNC_L(__builtin_popcountl, popcnt, u) +DEFINE_UNOP_FUNC_LL(__builtin_popcountll, popcnt, u) +DEFINE_UNOP_STRUCT_UP_TO_LL(popcnt, u) + +DEFINE_BINOP_UP_TO_LL(&, and, ) +DEFINE_BINOP_UP_TO_LL(|, or, ) +DEFINE_BINOP_UP_TO_LL(^, xor, ) +DEFINE_BINOP_UP_TO_LL(+, add, ) +DEFINE_BINOP_UP_TO_LL(-, sub, ) +DEFINE_BINOP_UP_TO_LL(<<, shl, ) +DEFINE_BINOP_UP_TO_LL(>>, shru, u) +DEFINE_BINOP_UP_TO_LL(>>, shrs, ) +DEFINE_BINOP_UP_TO_LL(*, mulu, u) +DEFINE_BINOP_UP_TO_LL(/, divu, u) +DEFINE_BINOP_UP_TO_LL(/, divs, ) +DEFINE_BINOP_UP_TO_LL(%, remu, u) +DEFINE_BINOP_UP_TO_LL(%, rems, ) + + +static const UnOp *unops[] = { + &unop_not, + &unop_neg, + &unop_abs, + &unop_bitrev, + (const UnOp *)&unop_popcnt, +}; + +static const BinOp *binops[] = { + &binop_and, + &binop_or, + &binop_xor, + &binop_add, + &binop_sub, + &binop_shl, + (const BinOp *)&binop_shru, + &binop_shrs, + (const BinOp *)&binop_mulu, + (const BinOp *)&binop_divu, + (const BinOp *)&binop_remu, + &binop_divs, + &binop_rems, +}; + + +int main(int argc, char *argv[]) +{ + int64_t x = argc > 1 ? atoll_(argv[1]) : (int64_t)0xDFA5FBC197EDB389LL; + int64_t y = argc > 2 ? atoll_(argv[2]) : (int64_t)0x08010A030C050E07LL; + + separateOutput(); + + for (size_t i = 0; i < sizeof(unops) / sizeof(*unops); i++) + { + testUnOp(unops[i], x); + } + + for (size_t i = 0; i < sizeof(binops) / sizeof(*binops); i++) + { + testBinOp(binops[i], x, y); + } + + return 0; +} From 87cf376f1f2670107e09c763f5adb5bbf0c40780 Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Sun, 23 Jan 2022 15:05:19 -0500 Subject: [PATCH 068/142] Use fixed (right) output alignment --- examples/standalone/math_test/src/main.c | 34 +++++++++++++----------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/examples/standalone/math_test/src/main.c b/examples/standalone/math_test/src/main.c index 4add9eff2..27ef5d7bd 100644 --- a/examples/standalone/math_test/src/main.c +++ b/examples/standalone/math_test/src/main.c @@ -252,17 +252,18 @@ DEFINE_BINOP_TYPE(u) static void testUnOp(const UnOp *op, int64_t x) { + unsigned lhsLength = 8; unsigned nameLength = strlen(op->name); - unsigned prefixLength = 2; - unsigned lhsLength = nameLength + prefixLength; + unsigned prefixLength = lhsLength - nameLength; - x_printf("%*s=%016llX\n\n", lhsLength, "x", (unsigned long long)x); + x_printf("%*s=%016llX\n\n", lhsLength, "x", (long long)x); -#define TEST_UNOP(prefix, bits) \ - if (op->prefix) \ - { \ - unsigned digits = (bits + 3) / 4; \ - x_printf("\n%2s%s=%*s%0*llX", #prefix, op->name, 16 - digits, "", digits, (unsigned long long)(op->prefix)(x) & ((1ULL << (bits - 1) << 1) - 1)); \ +#define TEST_UNOP(prefix, bits) \ + if (op->prefix) \ + { \ + unsigned digits = (bits + 3) / 4; \ + unsigned long long result = (op->prefix)(x) & ((1ULL << (bits - 1) << 1) - 1); \ + x_printf("\n%*s%s=%*s%0*llX", prefixLength, #prefix, op->name, 16 - digits, "", digits, result); \ } TEST_UNOP(b, 8) @@ -276,17 +277,18 @@ static void testUnOp(const UnOp *op, int64_t x) static void testBinOp(const BinOp *op, int64_t x, int64_t y) { + unsigned lhsLength = 8; unsigned nameLength = strlen(op->name); - unsigned prefixLength = 2; - unsigned lhsLength = nameLength + prefixLength; + unsigned prefixLength = lhsLength - nameLength; - x_printf("%*s=%016llX\n%*s=%016llX\n", lhsLength, "x", (unsigned long long)x, lhsLength, "y", (unsigned long long)y); + x_printf("%*s=%016llX\n%*s=%016llX\n", lhsLength, "x", (long long)x, lhsLength, "y", (long long)y); -#define TEST_BINOP(prefix, bits) \ - if (op->prefix) \ - { \ - unsigned digits = (bits + 3) / 4; \ - x_printf("\n%2s%s=%*s%0*llX", #prefix, op->name, 16 - digits, "", digits, (unsigned long long)(op->prefix)(x, y) & ((1ULL << (bits - 1) << 1) - 1)); \ +#define TEST_BINOP(prefix, bits) \ + if (op->prefix) \ + { \ + unsigned digits = (bits + 3) / 4; \ + unsigned long long result= (op->prefix)(x, y) & ((1ULL << (bits - 1) << 1) - 1); \ + x_printf("\n%*s%s=%*s%0*llX", prefixLength, #prefix, op->name, 16 - digits, "", digits, result); \ } TEST_BINOP(b, 8) From 5de94c6962ca17603d9ecaeea3c4bc2e1df0aead Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Sun, 23 Jan 2022 15:13:37 -0500 Subject: [PATCH 069/142] Hackily combine unary and binary op testing --- examples/standalone/math_test/src/main.c | 54 +++++++++++------------- 1 file changed, 24 insertions(+), 30 deletions(-) diff --git a/examples/standalone/math_test/src/main.c b/examples/standalone/math_test/src/main.c index 27ef5d7bd..514c2dada 100644 --- a/examples/standalone/math_test/src/main.c +++ b/examples/standalone/math_test/src/main.c @@ -250,54 +250,48 @@ DEFINE_BINOP_TYPE(u) DEFINE_BINOP_STRUCT_UP_TO_LL(name, u) -static void testUnOp(const UnOp *op, int64_t x) +static void testOp(bool isBinOp, const BinOp *op, int64_t x, int64_t y) { unsigned lhsLength = 8; unsigned nameLength = strlen(op->name); unsigned prefixLength = lhsLength - nameLength; - x_printf("%*s=%016llX\n\n", lhsLength, "x", (long long)x); + x_printf("%*s=%016llX\n", lhsLength, "x", (long long)x); + if (!isBinOp) + { + x_printf("\n"); + } + else + { + x_printf("%*s=%016llX\n", lhsLength, "y", (long long)y); + } -#define TEST_UNOP(prefix, bits) \ +#define TEST_OP(prefix, bits) \ if (op->prefix) \ { \ unsigned digits = (bits + 3) / 4; \ - unsigned long long result = (op->prefix)(x) & ((1ULL << (bits - 1) << 1) - 1); \ + unsigned long long result = (op->prefix)(x, y) & ((1ULL << (bits - 1) << 1) - 1); \ x_printf("\n%*s%s=%*s%0*llX", prefixLength, #prefix, op->name, 16 - digits, "", digits, result); \ } - TEST_UNOP(b, 8) - TEST_UNOP(s, 16) - TEST_UNOP(i, 24) - TEST_UNOP(l, 32) - TEST_UNOP(ll, 64) + TEST_OP(b, 8) + TEST_OP(s, 16) + TEST_OP(i, 24) + TEST_OP(l, 32) + TEST_OP(ll, 64) finishOutput(); } -static void testBinOp(const BinOp *op, int64_t x, int64_t y) -{ - unsigned lhsLength = 8; - unsigned nameLength = strlen(op->name); - unsigned prefixLength = lhsLength - nameLength; - - x_printf("%*s=%016llX\n%*s=%016llX\n", lhsLength, "x", (long long)x, lhsLength, "y", (long long)y); -#define TEST_BINOP(prefix, bits) \ - if (op->prefix) \ - { \ - unsigned digits = (bits + 3) / 4; \ - unsigned long long result= (op->prefix)(x, y) & ((1ULL << (bits - 1) << 1) - 1); \ - x_printf("\n%*s%s=%*s%0*llX", prefixLength, #prefix, op->name, 16 - digits, "", digits, result); \ - } - - TEST_BINOP(b, 8) - TEST_BINOP(s, 16) - TEST_BINOP(i, 24) - TEST_BINOP(l, 32) - TEST_BINOP(ll, 64) +static void testUnOp(const UnOp *op, int64_t x) +{ + testOp(false, (const BinOp*)op, x, 0); +} - finishOutput(); +static void testBinOp(const BinOp *op, int64_t x, int64_t y) +{ + testOp(true, op, x, y); } From 1dde047c5039765c26cf43bc6d8f7a99972049b7 Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Sun, 23 Jan 2022 15:25:42 -0500 Subject: [PATCH 070/142] Fix whereami on Windows --- tools/cedev-config/src/whereami.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/cedev-config/src/whereami.c b/tools/cedev-config/src/whereami.c index 116213656..be52f492c 100644 --- a/tools/cedev-config/src/whereami.c +++ b/tools/cedev-config/src/whereami.c @@ -55,7 +55,8 @@ extern "C" { #pragma warning(push, 3) #endif #include -#include +// My build environment can't find this file. Seems to build fine without it? +// #include #if defined(_MSC_VER) #pragma warning(pop) #endif From 702cc3b65348de40f89b14a9455d8b7249188b1a Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Sun, 23 Jan 2022 15:38:14 -0500 Subject: [PATCH 071/142] OUTPUT_MAP is now enabled by default --- examples/standalone/math_test/makefile | 1 - 1 file changed, 1 deletion(-) diff --git a/examples/standalone/math_test/makefile b/examples/standalone/math_test/makefile index be0196426..24dabc74a 100644 --- a/examples/standalone/math_test/makefile +++ b/examples/standalone/math_test/makefile @@ -8,7 +8,6 @@ DESCRIPTION ?= "CE C Toolchain Demo" COMPRESSED ?= NO ARCHIVED ?= NO # HAS_FLASH_FUNCTIONS ?= NO -OUTPUT_MAP ?= YES CFLAGS ?= -Os -mllvm -inline-threshold=100 -Wall -Wextra CXXFLAGS ?= -Os -mllvm -inline-threshold=100 -Wall -Wextra From 50d84e78cefbe3d37ad38d0594da2dbd8bedff87 Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Sun, 23 Jan 2022 17:24:51 -0500 Subject: [PATCH 072/142] Fix linking to the OS's __snot --- src/std/linked/linked.src | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/std/linked/linked.src b/src/std/linked/linked.src index f068d8d7a..dee8f9452 100644 --- a/src/std/linked/linked.src +++ b/src/std/linked/linked.src @@ -70,7 +70,7 @@ __smuls := 000224h __smulu := 000228h public __sneg __sneg := 00022Ch - public __snor + public __snot __snot := 000230h public __sor __sor := 000234h From 1369b762e888d8bceec7f5d11905a6044ae61b9e Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Sun, 23 Jan 2022 17:37:46 -0500 Subject: [PATCH 073/142] HAS_FLASH_FUNCTIONS -> STATIC_CRT --- examples/standalone/math_test/makefile | 2 +- src/makefile.mk | 8 +++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/examples/standalone/math_test/makefile b/examples/standalone/math_test/makefile index 24dabc74a..c1d0ec367 100644 --- a/examples/standalone/math_test/makefile +++ b/examples/standalone/math_test/makefile @@ -7,7 +7,7 @@ ICON ?= icon.png DESCRIPTION ?= "CE C Toolchain Demo" COMPRESSED ?= NO ARCHIVED ?= NO -# HAS_FLASH_FUNCTIONS ?= NO +# STATIC_CRT ?= NO CFLAGS ?= -Os -mllvm -inline-threshold=100 -Wall -Wextra CXXFLAGS ?= -Os -mllvm -inline-threshold=100 -Wall -Wextra diff --git a/src/makefile.mk b/src/makefile.mk index 6ce011238..6aa1fd088 100644 --- a/src/makefile.mk +++ b/src/makefile.mk @@ -39,7 +39,9 @@ CUSTOM_FILE_FILE ?= stdio_file.h DEPS ?= #---------------------------- HAS_UPPERCASE_NAME ?= YES -HAS_FLASH_FUNCTIONS ?= YES +# Prefer static crt functions. +# These will increase output size but are usually faster than those from TI-OS. +STATIC_CRT ?= YES HAS_PRINTF ?= YES HAS_CUSTOM_FILE ?= NO #---------------------------- @@ -181,8 +183,8 @@ ifeq ($(HAS_CUSTOM_FILE),YES) DEFCUSTOMFILE := -DHAS_CUSTOM_FILE=1 -DCUSTOM_FILE_FILE=\"$(CUSTOM_FILE_FILE)\" endif -# choose static or linked flash functions -ifeq ($(HAS_FLASH_FUNCTIONS),YES) +# prefer static crt functions +ifeq ($(STATIC_CRT),YES) LDSTATIC := 1 endif From c57f412e42bb427dea2bb8a7f7ffb82d4c4508db Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Sun, 23 Jan 2022 18:03:32 -0500 Subject: [PATCH 074/142] Remove an unnecessary adl assumption --- src/std/shared/internal_bitrev_byte.src | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/std/shared/internal_bitrev_byte.src b/src/std/shared/internal_bitrev_byte.src index c551aaa5b..d7139c231 100644 --- a/src/std/shared/internal_bitrev_byte.src +++ b/src/std/shared/internal_bitrev_byte.src @@ -1,5 +1,3 @@ - assume adl=1 - public __internal_bitrev_byte __internal_bitrev_byte: add hl, hl From 253254de511f51be9f92b3cebec5f1d64928f4c6 Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Sun, 23 Jan 2022 18:42:50 -0500 Subject: [PATCH 075/142] Add some EOF newlines --- src/std/shared/llcmpu.src | 2 +- src/std/shared/llshrs_fast.src | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/std/shared/llcmpu.src b/src/std/shared/llcmpu.src index 7e4373bf8..0a1a0500b 100644 --- a/src/std/shared/llcmpu.src +++ b/src/std/shared/llcmpu.src @@ -22,4 +22,4 @@ __llcmpu: ccf pop hl pop iy - ret \ No newline at end of file + ret diff --git a/src/std/shared/llshrs_fast.src b/src/std/shared/llshrs_fast.src index d764af43a..7cb4134a7 100644 --- a/src/std/shared/llshrs_fast.src +++ b/src/std/shared/llshrs_fast.src @@ -147,4 +147,4 @@ if 0 .shr0: pop bc ret -end if \ No newline at end of file +end if From f5bfa606663e4b1642ea6aba845321a123861d92 Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Sun, 23 Jan 2022 18:43:33 -0500 Subject: [PATCH 076/142] Fix the calling convention used by _tolower and _toupper --- src/std/static/tolower.src | 10 ++++++---- src/std/static/toupper.src | 10 ++++++---- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/src/std/static/tolower.src b/src/std/static/tolower.src index 4bc7aea4f..99a280054 100644 --- a/src/std/static/tolower.src +++ b/src/std/static/tolower.src @@ -1,12 +1,14 @@ public _tolower _tolower: - push af + pop de + ex (sp), hl + push de + ld a, l sub a, 'A' cp a, 1+'Z'-'A' - jr nc, .not_A_Z + ret nc + add a, 'a' ld l, a -.not_A_Z: - pop af ret diff --git a/src/std/static/toupper.src b/src/std/static/toupper.src index 98c83f44e..b71288115 100644 --- a/src/std/static/toupper.src +++ b/src/std/static/toupper.src @@ -1,12 +1,14 @@ public _toupper _toupper: - push af + pop de + ex (sp), hl + push de + ld a, l sub a, 'a' cp a, 1+'z'-'a' - jr nc, .not_a_z + ret nc + add a, 'A' ld l, a -.not_a_z: - pop af ret From 3d1d8d9ca0094b78c6c7373c90da9464b49f7dbb Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Sun, 6 Feb 2022 12:06:42 -0500 Subject: [PATCH 077/142] Optimize __llneg_fast --- src/std/shared/llneg_fast.src | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/std/shared/llneg_fast.src b/src/std/shared/llneg_fast.src index 0c52104b8..c754d5825 100644 --- a/src/std/shared/llneg_fast.src +++ b/src/std/shared/llneg_fast.src @@ -9,15 +9,16 @@ __llneg_fast: sbc hl, de ; uhl=-UHL ex (sp), hl ; uhl=UDE, *spl=-UHL ex de, hl ; ude=UDE, uhl=UHL - ld hl, 0 ; uhl=0 + ld l, a ; l=0 + mlt hl ; uhl=0 sbc hl, de ; uhl=-UDE-(UHL!=0) ; =-UDEUHL>>24 ex de, hl ; ude=-UDEUHL>>24, uhl=UDE sbc a, c ; a=-C-(UDEUHL!=0) ; =-CUDEUHL>>48 ld c, a ; c=-CUDEUHL>>48 - ld a, 0 ; a=0 - sbc a, b ; a=-B-(CUDEUHL!=0) + sbc a, a ; a=-(CUDEUHL!=0) + sub a, b ; a=-B-(CUDEUHL!=0) ; =-BCUDEUHL>>56 ld b, a ; b=-BCUDEUHL>>56 pop hl ; bcudeuhl=-BCUDEUHL From 7e4c905e01d7c932565b3ca8243d2ab2b2c6b6c5 Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Sun, 6 Feb 2022 12:06:56 -0500 Subject: [PATCH 078/142] Add math_test autotest --- examples/standalone/math_test/autotest.json | 212 ++++++++++++++++++++ 1 file changed, 212 insertions(+) create mode 100644 examples/standalone/math_test/autotest.json diff --git a/examples/standalone/math_test/autotest.json b/examples/standalone/math_test/autotest.json new file mode 100644 index 000000000..a43d4ac3d --- /dev/null +++ b/examples/standalone/math_test/autotest.json @@ -0,0 +1,212 @@ +{ + "transfer_files": [ + "bin/DEMO.8xp" + ], + "target": { + "name": "DEMO", + "isASM": true + }, + "sequence": [ + "action|launch", + "hashWait|not", + "key|enter", + "hashWait|neg", + "key|enter", + "hashWait|abs", + "key|enter", + "hashWait|bitrev", + "key|enter", + "hashWait|popcnt", + "key|enter", + "hashWait|and", + "key|enter", + "hashWait|or", + "key|enter", + "hashWait|xor", + "key|enter", + "hashWait|add", + "key|enter", + "hashWait|sub", + "key|enter", + "hashWait|shl", + "key|enter", + "hashWait|shru", + "key|enter", + "hashWait|shrs", + "key|enter", + "hashWait|mulu", + "key|enter", + "hashWait|divu", + "key|enter", + "hashWait|remu", + "key|enter", + "hashWait|divs", + "key|enter", + "hashWait|rems", + "key|enter", + "hashWait|done" + ], + "hashes": { + "not": { + "description": "not", + "start": "vram_start", + "size": "vram_16_size", + "expected_CRCs": [ + "0B3D9374" + ] + }, + "neg": { + "description": "neg", + "start": "vram_start", + "size": "vram_16_size", + "expected_CRCs": [ + "10A84E1B" + ] + }, + "abs": { + "description": "abs", + "start": "vram_start", + "size": "vram_16_size", + "expected_CRCs": [ + "D7807035" + ] + }, + "bitrev": { + "description": "bitrev", + "start": "vram_start", + "size": "vram_16_size", + "expected_CRCs": [ + "BBADF82E" + ] + }, + "popcnt": { + "description": "popcnt", + "start": "vram_start", + "size": "vram_16_size", + "expected_CRCs": [ + "62628040" + ] + }, + "and": { + "description": "and", + "start": "vram_start", + "size": "vram_16_size", + "expected_CRCs": [ + "32DEF9D5" + ] + }, + "or": { + "description": "or", + "start": "vram_start", + "size": "vram_16_size", + "expected_CRCs": [ + "4E247920" + ] + }, + "xor": { + "description": "xor", + "start": "vram_start", + "size": "vram_16_size", + "expected_CRCs": [ + "257F7002" + ] + }, + "or": { + "description": "or", + "start": "vram_start", + "size": "vram_16_size", + "expected_CRCs": [ + "4E247920" + ] + }, + "add": { + "description": "add", + "start": "vram_start", + "size": "vram_16_size", + "expected_CRCs": [ + "CD575A4D" + ] + }, + "sub": { + "description": "sub", + "start": "vram_start", + "size": "vram_16_size", + "expected_CRCs": [ + "EE1C5453" + ] + }, + "shl": { + "description": "shl", + "start": "vram_start", + "size": "vram_16_size", + "expected_CRCs": [ + "FD8CA2EC" + ] + }, + "shru": { + "description": "shru", + "start": "vram_start", + "size": "vram_16_size", + "expected_CRCs": [ + "9754CB4F" + ] + }, + "shrs": { + "description": "shrs", + "start": "vram_start", + "size": "vram_16_size", + "expected_CRCs": [ + "AB5BE5CB" + ] + }, + "mulu": { + "description": "mulu", + "start": "vram_start", + "size": "vram_16_size", + "expected_CRCs": [ + "C6A6FF7D" + ] + }, + "divu": { + "description": "divu", + "start": "vram_start", + "size": "vram_16_size", + "expected_CRCs": [ + "F973C020" + ] + }, + "remu": { + "description": "remu", + "start": "vram_start", + "size": "vram_16_size", + "expected_CRCs": [ + "5196C205" + ] + }, + "divs": { + "description": "divs", + "start": "vram_start", + "size": "vram_16_size", + "expected_CRCs": [ + "2A13A348" + ] + }, + "rems": { + "description": "rems", + "start": "vram_start", + "size": "vram_16_size", + "expected_CRCs": [ + "F45969EF" + ] + }, + "done": { + "description": "done", + "start": "vram_start", + "size": "vram_16_size", + "expected_CRCs": [ + "101734A5", + "FFAF89BA" + ] + } + } +} \ No newline at end of file From 768d79dbd3dc6c77a1556bc8c029d9ee9cccced9 Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Sun, 6 Feb 2022 18:56:44 -0500 Subject: [PATCH 079/142] Optimize __bdivu, __bdivs, __bremu, and __brems --- src/std/shared/bdivs.src | 55 ++++++++++------------------------ src/std/shared/bdivu.src | 40 +++++-------------------- src/std/shared/bdvrmu.src | 22 ++++++++++++++ src/std/shared/brem_common.src | 26 ++++++++++++++++ src/std/shared/brems.src | 50 +++++++++---------------------- src/std/shared/bremu.src | 40 ++++++------------------- 6 files changed, 95 insertions(+), 138 deletions(-) create mode 100644 src/std/shared/bdvrmu.src create mode 100644 src/std/shared/brem_common.src diff --git a/src/std/shared/bdivs.src b/src/std/shared/bdivs.src index cf0e71bcc..525d5f6ae 100644 --- a/src/std/shared/bdivs.src +++ b/src/std/shared/bdivs.src @@ -1,44 +1,21 @@ -; (c) Copyright 2001-2008 Zilog Inc. -;------------------------------------------------------------------------- -; Signed Byte Division. -; Input: -; Operand1: -; B : 8 bit dividend (numerator) -; -; Operand2: -; C : 8 bit divisor (denominator) -; -; Output: -; Result: A : 8 bit quotient -; Registers Used: -; d, e -;------------------------------------------------------------------------- - assume adl=1 - - public __bdivs + public __bdivs __bdivs: - push bc - xor a,a - sub a,b - jp m,_L0 - ld b,a -_L0: - xor a,a - sub a,c - jp m,_L1 - ld c,a -_L1: - call __bdivu - pop bc +; I: B=dividend, C=divisor +; O: a=B/C + push hl - ld l,a - ld a,b - xor a,c - ld a,l - jp p,_L2 - neg -_L2: + + call __brem_common + + ld a, c + xor a, b + + ld a, l pop hl + ret p + + neg ret - extern __bdivu + + extern __bdvrmu diff --git a/src/std/shared/bdivu.src b/src/std/shared/bdivu.src index 52d43bc5d..bb37dd8f7 100644 --- a/src/std/shared/bdivu.src +++ b/src/std/shared/bdivu.src @@ -1,41 +1,17 @@ -; (c) Copyright 2007-2008 Zilog, Inc. -;------------------------------------------------------------------------- -; Unsigned Byte Division. -; Input: -; Operand1: -; B : 8 bit dividend (numerator x) -; -; Operand2: -; C : 8 bit divisor (denominator y) -; -; Output: -; Result: A : 8 bit quotient -; Registers Used: -; none -;------------------------------------------------------------------------- - assume adl=1 - public __bdivu __bdivu: +; I: B=dividend, C=divisor +; O: a=B/C + push bc push hl - ld h,b - xor a,a - ld b,8 ;i = 8 (bit counter) -loop: - sla h - rla - jr c,over - cp a,c - jr c,under -over: - sub a,c - inc h -under: - djnz loop - ld a,h + call __bdvrmu + ld a, l + pop hl pop bc ret + + extern __bdvrmu diff --git a/src/std/shared/bdvrmu.src b/src/std/shared/bdvrmu.src new file mode 100644 index 000000000..4aa265636 --- /dev/null +++ b/src/std/shared/bdvrmu.src @@ -0,0 +1,22 @@ + public __bdvrmu +__bdvrmu: +; I: B=dividend, C=divisor +; O: a=B%C, b=0, h=?, l=B/C + + ld l, b + ld b, 8 + +.loop: + add hl, hl + + ld a, h + sub a, c + + jr c, .bit_skip + ld h, a + inc l +.bit_skip: + + djnz .loop + + ret diff --git a/src/std/shared/brem_common.src b/src/std/shared/brem_common.src new file mode 100644 index 000000000..0725ab6d3 --- /dev/null +++ b/src/std/shared/brem_common.src @@ -0,0 +1,26 @@ + public __brem_common +__brem_common: +; I: B=dividend, C=divisor +; O: a=abs(B)%abs(C), h=?, l=abs(B)/abs(C) + + push bc + + xor a, a + sub a, b + jp m, .neg_dividend_skip + ld b, a +.neg_dividend_skip: + + xor a, a + sub a, c + jp m, .neg_divisor_skip + ld b, c +.neg_divisor_skip: + + call __bdvrmu + + pop bc + ret + + + extern __bdvrmu diff --git a/src/std/shared/brems.src b/src/std/shared/brems.src index 31dc646fd..8c7cfe136 100644 --- a/src/std/shared/brems.src +++ b/src/std/shared/brems.src @@ -1,41 +1,19 @@ -; (c) Copyright 2007-2008 Zilog, Inc. -;------------------------------------------------------------------------- -; Signed Byte Modulus -; Input: -; Operand1: -; A : 8 bit dividend (numerator) -; -; Operand2: -; C : 8 bit divisor (denominator) -; -; Output: -; Result: A : 8 bit modulus -; Registers Used: -; -;------------------------------------------------------------------------- - assume adl=1 - public __brems __brems: - push bc - ld b,a - bit 7,c - jr z,skip1 - ld a,c - neg - ld c,a -skip1: - ld a,b - bit 7,a - jr z,skip2 - neg -skip2: - call __bremu - bit 7,b - jr z,skip3 +; I: B=dividend, C=divisor +; O: a=B%C + + push hl + + call __brem_common + + bit 7, b + + pop hl + ret z + neg -skip3: - pop bc ret - extern __bremu + + extern __brem_common diff --git a/src/std/shared/bremu.src b/src/std/shared/bremu.src index 8c68d922b..26f059702 100644 --- a/src/std/shared/bremu.src +++ b/src/std/shared/bremu.src @@ -1,38 +1,16 @@ -; (c) Copyright 2007-2008 Zilog, Inc. -;------------------------------------------------------------------------- -; Unsigned Byte Modulus -; Input: -; Operand1: -; A : 8 bit dividend (numerator) -; -; Operand2: -; C : 8 bit divisor (denominator) -; -; Output: -; Result: A : 8 bit modulus -; Registers Used: -; none -;------------------------------------------------------------------------- - assume adl=1 - public __bremu __bremu: +; I: B=dividend, C=divisor +; O: a=B%C + push bc push hl - ld h,a - xor a,a - ld b,8 -loop: - sla h - rla - jr c,over - cp a,c - jr c,under -over: - sub a,c - inc h -under: - djnz loop + + call __bdvrmu + pop hl pop bc ret + + + extern __bdvrmu From 23debdedd54b13265af6a7659b89ff91b62699b9 Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Sun, 6 Feb 2022 20:31:12 -0500 Subject: [PATCH 080/142] Fix __bdvrmu --- src/std/shared/bdvrmu.src | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/std/shared/bdvrmu.src b/src/std/shared/bdvrmu.src index 4aa265636..b93231478 100644 --- a/src/std/shared/bdvrmu.src +++ b/src/std/shared/bdvrmu.src @@ -4,6 +4,8 @@ __bdvrmu: ; O: a=B%C, b=0, h=?, l=B/C ld l, b + ld h, 0 + ld b, 8 .loop: From 656b861697bf0976ac30be8256b4c6576be72f24 Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Sun, 6 Feb 2022 20:46:26 -0500 Subject: [PATCH 081/142] Rename __brem_common to the more correct __bdvrms_common --- src/std/shared/bdivs.src | 2 +- src/std/shared/{brem_common.src => bdvrms_common.src} | 4 ++-- src/std/shared/brems.src | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) rename src/std/shared/{brem_common.src => bdvrms_common.src} (87%) diff --git a/src/std/shared/bdivs.src b/src/std/shared/bdivs.src index 525d5f6ae..9b998d1bb 100644 --- a/src/std/shared/bdivs.src +++ b/src/std/shared/bdivs.src @@ -5,7 +5,7 @@ __bdivs: push hl - call __brem_common + call __bdvrms_common ld a, c xor a, b diff --git a/src/std/shared/brem_common.src b/src/std/shared/bdvrms_common.src similarity index 87% rename from src/std/shared/brem_common.src rename to src/std/shared/bdvrms_common.src index 0725ab6d3..78b0f39e4 100644 --- a/src/std/shared/brem_common.src +++ b/src/std/shared/bdvrms_common.src @@ -1,5 +1,5 @@ - public __brem_common -__brem_common: + public __bdvrms_common +__bdvrms_common: ; I: B=dividend, C=divisor ; O: a=abs(B)%abs(C), h=?, l=abs(B)/abs(C) diff --git a/src/std/shared/brems.src b/src/std/shared/brems.src index 8c7cfe136..685f2b4a7 100644 --- a/src/std/shared/brems.src +++ b/src/std/shared/brems.src @@ -5,7 +5,7 @@ __brems: push hl - call __brem_common + call __bdvrms_common bit 7, b @@ -16,4 +16,4 @@ __brems: ret - extern __brem_common + extern __bdvrms_common From 8526651e9a63f7ccfce50b3987a188537f3a477b Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Sun, 6 Feb 2022 20:52:56 -0500 Subject: [PATCH 082/142] Fix inputs to __bremu and __brems --- src/std/shared/brems.src | 5 +++-- src/std/shared/bremu.src | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/std/shared/brems.src b/src/std/shared/brems.src index 685f2b4a7..8fe777a6f 100644 --- a/src/std/shared/brems.src +++ b/src/std/shared/brems.src @@ -1,10 +1,11 @@ public __brems __brems: -; I: B=dividend, C=divisor -; O: a=B%C +; I: A=dividend, C=divisor +; O: a=A%C push hl + ld b, a call __bdvrms_common bit 7, b diff --git a/src/std/shared/bremu.src b/src/std/shared/bremu.src index 26f059702..8a2ef9e0f 100644 --- a/src/std/shared/bremu.src +++ b/src/std/shared/bremu.src @@ -1,11 +1,12 @@ public __bremu __bremu: -; I: B=dividend, C=divisor -; O: a=B%C +; I: A=dividend, C=divisor +; O: a=A%C push bc push hl + ld b, a call __bdvrmu pop hl From 08bce2cb678463c4e1d218f4720ade49283334d7 Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Wed, 9 Feb 2022 13:20:55 -0500 Subject: [PATCH 083/142] Fix extern reference --- src/std/shared/bdivs.src | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/std/shared/bdivs.src b/src/std/shared/bdivs.src index 9b998d1bb..2850b8777 100644 --- a/src/std/shared/bdivs.src +++ b/src/std/shared/bdivs.src @@ -18,4 +18,4 @@ __bdivs: ret - extern __bdvrmu + extern __bdvrms_common From fdf226ea891442b6057ba75f7d5d0f14a6cfc565 Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Wed, 9 Feb 2022 14:07:58 -0500 Subject: [PATCH 084/142] Fix __bdvrms_common --- src/std/shared/bdvrms_common.src | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/std/shared/bdvrms_common.src b/src/std/shared/bdvrms_common.src index 78b0f39e4..4ac8dfb4d 100644 --- a/src/std/shared/bdvrms_common.src +++ b/src/std/shared/bdvrms_common.src @@ -14,7 +14,7 @@ __bdvrms_common: xor a, a sub a, c jp m, .neg_divisor_skip - ld b, c + ld c, a .neg_divisor_skip: call __bdvrmu From 17a99714def36fc71337d02daa69897c9fe9efe1 Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Wed, 9 Feb 2022 14:10:44 -0500 Subject: [PATCH 085/142] Reorder code in __bdvrms_common --- src/std/shared/bdvrms_common.src | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/std/shared/bdvrms_common.src b/src/std/shared/bdvrms_common.src index 4ac8dfb4d..09440270b 100644 --- a/src/std/shared/bdvrms_common.src +++ b/src/std/shared/bdvrms_common.src @@ -5,18 +5,18 @@ __bdvrms_common: push bc - xor a, a - sub a, b - jp m, .neg_dividend_skip - ld b, a -.neg_dividend_skip: - xor a, a sub a, c jp m, .neg_divisor_skip ld c, a .neg_divisor_skip: + xor a, a + sub a, b + jp m, .neg_dividend_skip + ld b, a +.neg_dividend_skip: + call __bdvrmu pop bc From bcbe16bdeb1adc24b96e916bc6197d440efc5858 Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Wed, 9 Feb 2022 15:50:23 -0500 Subject: [PATCH 086/142] Implement optimized __idivu, __idivs, __iremu, and __irems --- src/std/linked/linked.src | 10 +++++++++ src/std/shared/shared.src | 7 +----- src/std/static/idivs.src | 47 +++++++++++++++++++++++++++++++++++++++ src/std/static/idivu.src | 17 ++++++++++++++ src/std/static/idvrmu.src | 28 +++++++++++++++++++++++ src/std/static/irems.src | 39 ++++++++++++++++++++++++++++++++ src/std/static/iremu.src | 16 +++++++++++++ 7 files changed, 158 insertions(+), 6 deletions(-) create mode 100644 src/std/static/idivs.src create mode 100644 src/std/static/idivu.src create mode 100644 src/std/static/idvrmu.src create mode 100644 src/std/static/irems.src create mode 100644 src/std/static/iremu.src diff --git a/src/std/linked/linked.src b/src/std/linked/linked.src index dee8f9452..783ff0dc1 100644 --- a/src/std/linked/linked.src +++ b/src/std/linked/linked.src @@ -88,6 +88,12 @@ __sxor := 000268h __iand := 000134h public __icmpzero __icmpzero := 000138h + public __idivs +__idivs := 00013Ch + public __idivu +__idivu := 000140h + public __idvrmu +__idvrmu := 000144h public __imuls __imuls := 000154h public __imulu @@ -98,6 +104,10 @@ __ineg := 000160h __inot := 000164h public __ior __ior := 000168h + public __irems +__irems := 00016Ch + public __iremu +__iremu := 000170h public __ishl __ishl := 000174h public __ishrs diff --git a/src/std/shared/shared.src b/src/std/shared/shared.src index be70fb97c..ace728de0 100644 --- a/src/std/shared/shared.src +++ b/src/std/shared/shared.src @@ -1,4 +1,4 @@ - public __bldiy, __bstix, __bstiy, __case, __case16, __case16D, __case24, __case24D, __case8, __case8D, __fadd, __fcmp, __fdiv, __fmul, __fppack, __frbtof, __frftob, __frftoi, __frftos, __frftoub, __frftoui, __frftous, __fritof, __frstof, __frubtof, __fruitof, __frustof, __fsub, __ftol, __idivs, __idivu, __idvrmu, __ildix, __ildiy, __imul_b, __indcall, __irems, __iremu, __ishl_b, __ishrs_b, __ishru_b, __istix, __istiy, __itol, __ldivs, __ldivu, __ldvrmu, __lldix, __lldiy, __lrems, __lremu, __lstix, __lstiy, __ltof, __sdivs, __sdivu, __seqcase, __seqcaseD, __setflag, __sldix, __sldiy, __srems, __sremu, __sshl_b, __sshrs_b, __sshru_b, __sstix, __sstiy, __stoi, __stoiu, __ultof, _longjmp, _memchr, _memcmp, _memcpy, _memmove, _setjmp, ___sprintf, _sqrtf, _strcasecmp, _strcat, _strchr, _strcmp, _strcpy, _strcspn, _strlen, _strncat, _strncmp, _strncpy, _strpbrk, _strspn, _strstr, _strtok + public __bldiy, __bstix, __bstiy, __case, __case16, __case16D, __case24, __case24D, __case8, __case8D, __fadd, __fcmp, __fdiv, __fmul, __fppack, __frbtof, __frftob, __frftoi, __frftos, __frftoub, __frftoui, __frftous, __fritof, __frstof, __frubtof, __fruitof, __frustof, __fsub, __ftol, __ildix, __ildiy, __imul_b, __indcall, __ishl_b, __ishrs_b, __ishru_b, __istix, __istiy, __itol, __ldivs, __ldivu, __ldvrmu, __lldix, __lldiy, __lrems, __lremu, __lstix, __lstiy, __ltof, __sdivs, __sdivu, __seqcase, __seqcaseD, __setflag, __sldix, __sldiy, __srems, __sremu, __sshl_b, __sshrs_b, __sshru_b, __sstix, __sstiy, __stoi, __stoiu, __ultof, _longjmp, _memchr, _memcmp, _memcpy, _memmove, _setjmp, ___sprintf, _sqrtf, _strcasecmp, _strcat, _strchr, _strcmp, _strcpy, _strcspn, _strlen, _strncat, _strncmp, _strncpy, _strpbrk, _strspn, _strstr, _strtok __bldiy := 0000FCh __bstix := 00010Ch __bstiy := 000108h @@ -28,15 +28,10 @@ __fruitof := 0002BCh __frustof := 0002C8h __fsub := 000290h __ftol := 00027Ch -__idivs := 00013Ch -__idivu := 000140h -__idvrmu := 000144h __ildix := 000148h __ildiy := 00014Ch __imul_b := 000150h __indcall := 00015Ch -__irems := 00016Ch -__iremu := 000170h __ishl_b := 000178h __ishrs_b := 000180h __ishru_b := 000188h diff --git a/src/std/static/idivs.src b/src/std/static/idivs.src new file mode 100644 index 000000000..9813a3e0f --- /dev/null +++ b/src/std/static/idivs.src @@ -0,0 +1,47 @@ + public __idivs +__idivs: +; I: UHL=dividend, UBC=divisor +; O: uhl=UHL/UBC + + push bc + push de + push af + + ex de, hl + + sbc hl, hl + ccf + sbc hl, bc + inc hl + jp m, .neg_divisor_skip + cpl + push hl + pop bc +.neg_divisor_skip: + + sbc hl, hl + ccf + sbc hl, de + inc hl + jp p, .pos_dividend_skip + cpl + ex de, hl +.pos_dividend_skip: + + push af + + call __idvrmu + ex de, hl + + pop bc + pop af + cp a, b + + pop de + pop bc + ret nz + jp __ineg + + + extern __idvrmu + extern __ineg diff --git a/src/std/static/idivu.src b/src/std/static/idivu.src new file mode 100644 index 000000000..16d177a62 --- /dev/null +++ b/src/std/static/idivu.src @@ -0,0 +1,17 @@ + public __idivu +__idivu: +; I: UHL=dividend, UBC=divisor +; O: uhl=UHL/UBC + + push af + push de + + call __idvrmu + ex de, hl + + pop de + pop af + ret + + + extern __idvrmu diff --git a/src/std/static/idvrmu.src b/src/std/static/idvrmu.src new file mode 100644 index 000000000..94801ee24 --- /dev/null +++ b/src/std/static/idvrmu.src @@ -0,0 +1,28 @@ + public __idvrmu +__idvrmu: +; I: UHL=dividend, UBC=divisor +; O: a=0, ude=UHL/UBC, uhl=UHL%UBC + + or a, a + ex de, hl + sbc hl, hl + + ld a, 24 + +.loop: + add hl, hl + ex de, hl + adc hl, hl + + sbc hl, bc + inc e + + jr nc, .restore_skip + dec e + add hl, bc +.restore_skip: + + dec a + jr nz, .loop + + ret diff --git a/src/std/static/irems.src b/src/std/static/irems.src new file mode 100644 index 000000000..60b06a1f1 --- /dev/null +++ b/src/std/static/irems.src @@ -0,0 +1,39 @@ + public __irems +__irems: +; I: UHL=dividend, UBC=divisor +; O: uhl=UHL%UBC + + push bc + push de + + ex de, hl + + or a, a + sbc hl, hl + sbc hl, bc + jp m, .neg_divisor_skip + push hl + pop bc +.neg_divisor_skip: + + sbc hl, hl + ccf + sbc hl, de + inc hl + jp p, .pos_dividend_skip + ex de, hl +.pos_dividend_skip: + + push af + + call __idvrmu + + pop af + pop de + pop bc + ret m + jp __ineg + + + extern __idvrmu + extern __ineg diff --git a/src/std/static/iremu.src b/src/std/static/iremu.src new file mode 100644 index 000000000..af6b413c7 --- /dev/null +++ b/src/std/static/iremu.src @@ -0,0 +1,16 @@ + public __iremu +__iremu: +; I: UHL=dividend, UBC=divisor +; O: uhl=UHL%UBC + + push af + push de + + call __idvrmu + + pop de + pop af + ret + + + extern __idvrmu From 7c8954e086e13e7926b504124b47447aed4b29db Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Wed, 9 Feb 2022 16:05:36 -0500 Subject: [PATCH 087/142] Fix __idvrmu --- src/std/static/idvrmu.src | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/std/static/idvrmu.src b/src/std/static/idvrmu.src index 94801ee24..c32482b5b 100644 --- a/src/std/static/idvrmu.src +++ b/src/std/static/idvrmu.src @@ -3,13 +3,14 @@ __idvrmu: ; I: UHL=dividend, UBC=divisor ; O: a=0, ude=UHL/UBC, uhl=UHL%UBC - or a, a ex de, hl + or a, a sbc hl, hl ld a, 24 .loop: + ex de, hl add hl, hl ex de, hl adc hl, hl @@ -18,8 +19,8 @@ __idvrmu: inc e jr nc, .restore_skip - dec e add hl, bc + dec e .restore_skip: dec a From d7e7f34125b36b8056e48087c4d7b44136d29073 Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Wed, 9 Feb 2022 16:38:18 -0500 Subject: [PATCH 088/142] Add missing adl assumes --- src/std/static/idivs.src | 2 ++ src/std/static/idivu.src | 2 ++ src/std/static/idvrmu.src | 2 ++ src/std/static/irems.src | 2 ++ src/std/static/iremu.src | 2 ++ 5 files changed, 10 insertions(+) diff --git a/src/std/static/idivs.src b/src/std/static/idivs.src index 9813a3e0f..97e682d06 100644 --- a/src/std/static/idivs.src +++ b/src/std/static/idivs.src @@ -1,3 +1,5 @@ + assume adl=1 + public __idivs __idivs: ; I: UHL=dividend, UBC=divisor diff --git a/src/std/static/idivu.src b/src/std/static/idivu.src index 16d177a62..a077372e3 100644 --- a/src/std/static/idivu.src +++ b/src/std/static/idivu.src @@ -1,3 +1,5 @@ + assume adl=1 + public __idivu __idivu: ; I: UHL=dividend, UBC=divisor diff --git a/src/std/static/idvrmu.src b/src/std/static/idvrmu.src index c32482b5b..a5d6b96b5 100644 --- a/src/std/static/idvrmu.src +++ b/src/std/static/idvrmu.src @@ -1,3 +1,5 @@ + assume adl=1 + public __idvrmu __idvrmu: ; I: UHL=dividend, UBC=divisor diff --git a/src/std/static/irems.src b/src/std/static/irems.src index 60b06a1f1..2c95875cf 100644 --- a/src/std/static/irems.src +++ b/src/std/static/irems.src @@ -1,3 +1,5 @@ + assume adl=1 + public __irems __irems: ; I: UHL=dividend, UBC=divisor diff --git a/src/std/static/iremu.src b/src/std/static/iremu.src index af6b413c7..2851493f8 100644 --- a/src/std/static/iremu.src +++ b/src/std/static/iremu.src @@ -1,3 +1,5 @@ + assume adl=1 + public __iremu __iremu: ; I: UHL=dividend, UBC=divisor From d696fb3fb1241b221c33f59e1e0d7690401197e7 Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Sat, 12 Feb 2022 18:20:37 -0500 Subject: [PATCH 089/142] Implement optimized __ldivu and __lremu --- src/std/linked/linked.src | 6 ++ src/std/shared/lldvrmu.src | 1 - src/std/shared/shared.src | 5 +- src/std/static/ldivu.src | 20 +++++++ src/std/static/ldvrmu.src | 116 +++++++++++++++++++++++++++++++++++++ src/std/static/lremu.src | 20 +++++++ 6 files changed, 163 insertions(+), 5 deletions(-) create mode 100644 src/std/static/ldivu.src create mode 100644 src/std/static/ldvrmu.src create mode 100644 src/std/static/lremu.src diff --git a/src/std/linked/linked.src b/src/std/linked/linked.src index 783ff0dc1..e6d977b2c 100644 --- a/src/std/linked/linked.src +++ b/src/std/linked/linked.src @@ -130,6 +130,10 @@ __lcmps := 0001A8h __lcmpu := 0001ACh public __lcmpzero __lcmpzero := 0001B0h + public __ldivu +__ldivu := 0001B8h + public __ldvrmu +__ldvrmu := 0001BCh public __lmuls __lmuls := 0001C8h public __lmulu @@ -140,6 +144,8 @@ __lneg := 0001D0h __lnot := 0001D4h public __lor __lor := 0001D8h + public __lremu +__lremu := 0001E0h public __lshl __lshl := 0001E4h public __lshrs diff --git a/src/std/shared/lldvrmu.src b/src/std/shared/lldvrmu.src index 0f2781e02..ac75062fa 100644 --- a/src/std/shared/lldvrmu.src +++ b/src/std/shared/lldvrmu.src @@ -103,5 +103,4 @@ __lldvrmu: pop af ret po ei - ret diff --git a/src/std/shared/shared.src b/src/std/shared/shared.src index ace728de0..420646db6 100644 --- a/src/std/shared/shared.src +++ b/src/std/shared/shared.src @@ -1,4 +1,4 @@ - public __bldiy, __bstix, __bstiy, __case, __case16, __case16D, __case24, __case24D, __case8, __case8D, __fadd, __fcmp, __fdiv, __fmul, __fppack, __frbtof, __frftob, __frftoi, __frftos, __frftoub, __frftoui, __frftous, __fritof, __frstof, __frubtof, __fruitof, __frustof, __fsub, __ftol, __ildix, __ildiy, __imul_b, __indcall, __ishl_b, __ishrs_b, __ishru_b, __istix, __istiy, __itol, __ldivs, __ldivu, __ldvrmu, __lldix, __lldiy, __lrems, __lremu, __lstix, __lstiy, __ltof, __sdivs, __sdivu, __seqcase, __seqcaseD, __setflag, __sldix, __sldiy, __srems, __sremu, __sshl_b, __sshrs_b, __sshru_b, __sstix, __sstiy, __stoi, __stoiu, __ultof, _longjmp, _memchr, _memcmp, _memcpy, _memmove, _setjmp, ___sprintf, _sqrtf, _strcasecmp, _strcat, _strchr, _strcmp, _strcpy, _strcspn, _strlen, _strncat, _strncmp, _strncpy, _strpbrk, _strspn, _strstr, _strtok + public __bldiy, __bstix, __bstiy, __case, __case16, __case16D, __case24, __case24D, __case8, __case8D, __fadd, __fcmp, __fdiv, __fmul, __fppack, __frbtof, __frftob, __frftoi, __frftos, __frftoub, __frftoui, __frftous, __fritof, __frstof, __frubtof, __fruitof, __frustof, __fsub, __ftol, __ildix, __ildiy, __imul_b, __indcall, __ishl_b, __ishrs_b, __ishru_b, __istix, __istiy, __itol, __ldivs, __lldix, __lldiy, __lrems, __lstix, __lstiy, __ltof, __sdivs, __sdivu, __seqcase, __seqcaseD, __setflag, __sldix, __sldiy, __srems, __sremu, __sshl_b, __sshrs_b, __sshru_b, __sstix, __sstiy, __stoi, __stoiu, __ultof, _longjmp, _memchr, _memcmp, _memcpy, _memmove, _setjmp, ___sprintf, _sqrtf, _strcasecmp, _strcat, _strchr, _strcmp, _strcpy, _strcspn, _strlen, _strncat, _strncmp, _strncpy, _strpbrk, _strspn, _strstr, _strtok __bldiy := 0000FCh __bstix := 00010Ch __bstiy := 000108h @@ -39,12 +39,9 @@ __istix := 00018Ch __istiy := 000190h __itol := 000194h __ldivs := 0001B4h -__ldivu := 0001B8h -__ldvrmu := 0001BCh __lldix := 0001C0h __lldiy := 0001C4h __lrems := 0001DCh -__lremu := 0001E0h __lstix := 0001F0h __lstiy := 0001F4h __ltof := 000284h diff --git a/src/std/static/ldivu.src b/src/std/static/ldivu.src new file mode 100644 index 000000000..9ae597fcd --- /dev/null +++ b/src/std/static/ldivu.src @@ -0,0 +1,20 @@ + assume adl=1 + + public __ldivu +__ldivu: +; I: EUHL=dividend, AUBC=divisor +; O: euhl=EUHL/AUBC + + call __ldvrmu + ld a, b + exx + push bc + exx + pop bc + + ret z + ei + ret + + + extern __ldvrmu diff --git a/src/std/static/ldvrmu.src b/src/std/static/ldvrmu.src new file mode 100644 index 000000000..8a5ad6175 --- /dev/null +++ b/src/std/static/ldvrmu.src @@ -0,0 +1,116 @@ + assume adl=1 + +;C| ldiv_t _ldvrmu(uint32_t dividend, uint32_t divisor) { + public __ldvrmu +__ldvrmu: +if 1 +; I: EUHL=dividend, AUBC=divisor +; O: a[uhl']=EUHL%AUBC, bcu=0, b=A, c=?, euhl=EUHL/AUBC, eubc'=AUBC, zf=!IEF2 + +;C| ldiv_t result; +;C| result.quot = dividend; + ; euhl : result.quot + + push bc + + ld c, a ; c = A + ld a, i ; a = I + ; pf = IEF2 + di + + ld a, c ; a = A + exx + pop bc + ld e, a ; eubc' : divisor + + push af + +;C| result.rem = 0; + xor a, a + sbc hl, hl ; auhl' : result.rem + +;C| int i = 32; + exx + ld b, 32 ; b : i + +;C| do { +.loop: + +;C| bool dividendBit = result.quot >> 31; +;C| result.quot <<= 1; + add hl, hl + rl e +;C| result.rem = (result.rem << 1) + dividendBit; + exx + adc hl, hl + adc a, a + +;C| bool quotBit = result.rem < divisor; +;C| result.rem -= divisor; + sbc hl, bc + sbc a, e + +;C| if (!quotBit) { + jr nc, .restore_skip +;C| result.rem += divisor; + add hl, bc + adc a, e +;C| } +.restore_skip: + +;C| if (quotBit) { + exx + jr c, .1_skip +;C| result.quot++; + inc l +;C| } +.1_skip: + +;C| } while (--i != 0); + djnz .loop + +;C| return result; + pop bc + bit 2, c + ret +;C| } + +else +; I: EUHL=dividend, AUBC=divisor +; O: a=0, ude=UHL/UBC, uhl=UHL%UBC + + push ix + push hl + ex (sp), iy ; euiy = dividend + + ld ixl, a ; [ixl]ubc = divisor + + xor a, a + sbc hl, hl ; auhl = remainder + + ld ixh, 32 + +.loop: + add iy, iy + rl e + adc hl, hl + rla + + sbc hl, bc + sbc a, ixl + inc iyl + + jr nc, .restore_skip + add hl, bc + adc a, ixl + dec iyl +.restore_skip: + + dec ixh + jr nz, .loop + + + + ret + +end if diff --git a/src/std/static/lremu.src b/src/std/static/lremu.src new file mode 100644 index 000000000..67a54b394 --- /dev/null +++ b/src/std/static/lremu.src @@ -0,0 +1,20 @@ + assume adl=1 + + public __lremu +__lremu: +; I: EUHL=dividend, AUBC=divisor +; O: euhl=EUHL%AUBC + + call __ldvrmu + ld e, a + push de + exx + ld a, e + pop de + + ret z + ei + ret + + + extern __ldvrmu From 94e8b05dcf44b0ce8e713de25fb053198e796d45 Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Sat, 12 Feb 2022 18:41:04 -0500 Subject: [PATCH 090/142] Implement less optimal __ldivu and __ldivs Currently disabled. These versions avoid shadow registers and changing the signature of __ldvrmu. --- src/std/static/ldivu.src | 14 ++++++++++++++ src/std/static/ldvrmu.src | 25 +++++++++++-------------- src/std/static/lremu.src | 14 ++++++++++++++ 3 files changed, 39 insertions(+), 14 deletions(-) diff --git a/src/std/static/ldivu.src b/src/std/static/ldivu.src index 9ae597fcd..9ccd71096 100644 --- a/src/std/static/ldivu.src +++ b/src/std/static/ldivu.src @@ -5,6 +5,7 @@ __ldivu: ; I: EUHL=dividend, AUBC=divisor ; O: euhl=EUHL/AUBC +if 1 call __ldvrmu ld a, b exx @@ -16,5 +17,18 @@ __ldivu: ei ret +else + push ix + push iy + + call __ldvrmu + ld a, iyh + pop iy + ex (sp), ix + pop hl + + ret +end if + extern __ldvrmu diff --git a/src/std/static/ldvrmu.src b/src/std/static/ldvrmu.src index 8a5ad6175..d2bca8b63 100644 --- a/src/std/static/ldvrmu.src +++ b/src/std/static/ldvrmu.src @@ -77,40 +77,37 @@ if 1 else ; I: EUHL=dividend, AUBC=divisor -; O: a=0, ude=UHL/UBC, uhl=UHL%UBC +; O: auhl=EUHL%AUBC, euix=EUHL/AUBC, iyh=A, iyl=0 - push ix push hl - ex (sp), iy ; euiy = dividend + pop ix ; euix = dividend - ld ixl, a ; [ixl]ubc = divisor + ld iyh, a ; [iyh]ubc = divisor xor a, a - sbc hl, hl ; auhl = remainder + sbc hl, hl ; auhl = remainder - ld ixh, 32 + ld iyl, 32 .loop: - add iy, iy + add ix, ix rl e adc hl, hl rla sbc hl, bc - sbc a, ixl - inc iyl + sbc a, iyh + inc ixl jr nc, .restore_skip add hl, bc - adc a, ixl - dec iyl + adc a, iyh + dec ixl .restore_skip: - dec ixh + dec iyl jr nz, .loop - - ret end if diff --git a/src/std/static/lremu.src b/src/std/static/lremu.src index 67a54b394..a2993f5d0 100644 --- a/src/std/static/lremu.src +++ b/src/std/static/lremu.src @@ -5,6 +5,7 @@ __lremu: ; I: EUHL=dividend, AUBC=divisor ; O: euhl=EUHL%AUBC +if 1 call __ldvrmu ld e, a push de @@ -16,5 +17,18 @@ __lremu: ei ret +else + push ix + push iy + + call __ldvrmu + ld e, a + ld a, iyh + + pop iy + pop ix + ret +end if + extern __ldvrmu From 8c379ae45d01b1526a2ada6db1ce938bcf39ee05 Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Sat, 12 Feb 2022 18:49:38 -0500 Subject: [PATCH 091/142] Let's make this valid C --- src/std/static/ldvrmu.src | 40 +++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/src/std/static/ldvrmu.src b/src/std/static/ldvrmu.src index d2bca8b63..fd3f6c80f 100644 --- a/src/std/static/ldvrmu.src +++ b/src/std/static/ldvrmu.src @@ -1,14 +1,14 @@ assume adl=1 -;C| ldiv_t _ldvrmu(uint32_t dividend, uint32_t divisor) { +;;; ldiv_t _ldvrmu(uint32_t dividend, uint32_t divisor) { public __ldvrmu __ldvrmu: if 1 ; I: EUHL=dividend, AUBC=divisor ; O: a[uhl']=EUHL%AUBC, bcu=0, b=A, c=?, euhl=EUHL/AUBC, eubc'=AUBC, zf=!IEF2 -;C| ldiv_t result; -;C| result.quot = dividend; +;;; ldiv_t result; +;;; result.quot = dividend; ; euhl : result.quot push bc @@ -25,55 +25,55 @@ if 1 push af -;C| result.rem = 0; +;;; result.rem = 0; xor a, a sbc hl, hl ; auhl' : result.rem -;C| int i = 32; +;;; int i = 32; exx ld b, 32 ; b : i -;C| do { +;;; do { .loop: -;C| bool dividendBit = result.quot >> 31; -;C| result.quot <<= 1; +;;; bool dividendBit = result.quot >> 31; +;;; result.quot <<= 1; add hl, hl rl e -;C| result.rem = (result.rem << 1) + dividendBit; +;;; result.rem = (result.rem << 1) + dividendBit; exx adc hl, hl adc a, a -;C| bool quotBit = result.rem < divisor; -;C| result.rem -= divisor; +;;; bool quotBit = result.rem < divisor; +;;; result.rem -= divisor; sbc hl, bc sbc a, e -;C| if (!quotBit) { +;;; if (!quotBit) { jr nc, .restore_skip -;C| result.rem += divisor; +;;; result.rem += divisor; add hl, bc adc a, e -;C| } +;;; } .restore_skip: -;C| if (quotBit) { +;;; if (quotBit) { exx jr c, .1_skip -;C| result.quot++; +;;; result.quot++; inc l -;C| } +;;; } .1_skip: -;C| } while (--i != 0); +;;; } while (--i != 0); djnz .loop -;C| return result; +;;; return result; pop bc bit 2, c ret -;C| } +;;; } else ; I: EUHL=dividend, AUBC=divisor From 1b5ead2a9d41c01d6e7ed02ee30635bc5af91dc4 Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Sat, 12 Feb 2022 18:50:07 -0500 Subject: [PATCH 092/142] Fix indentation --- src/std/static/ldvrmu.src | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/std/static/ldvrmu.src b/src/std/static/ldvrmu.src index fd3f6c80f..ffcf0ce27 100644 --- a/src/std/static/ldvrmu.src +++ b/src/std/static/ldvrmu.src @@ -52,7 +52,7 @@ if 1 ;;; if (!quotBit) { jr nc, .restore_skip -;;; result.rem += divisor; +;;; result.rem += divisor; add hl, bc adc a, e ;;; } From 2de19c71d0e94455c9264a700135b010cbb0218b Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Sat, 12 Feb 2022 19:13:38 -0500 Subject: [PATCH 093/142] Fix the C version of __ldvrmu --- src/std/static/ldvrmu.src | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/std/static/ldvrmu.src b/src/std/static/ldvrmu.src index ffcf0ce27..6a2639310 100644 --- a/src/std/static/ldvrmu.src +++ b/src/std/static/ldvrmu.src @@ -1,13 +1,17 @@ assume adl=1 -;;; ldiv_t _ldvrmu(uint32_t dividend, uint32_t divisor) { +;;; struct u32div_t { +;;; uint32_t rem; +;;; uint32_t quot; +;;; }; +;;; u32div_t _ldvrmu(uint32_t dividend, uint32_t divisor) { public __ldvrmu __ldvrmu: if 1 ; I: EUHL=dividend, AUBC=divisor ; O: a[uhl']=EUHL%AUBC, bcu=0, b=A, c=?, euhl=EUHL/AUBC, eubc'=AUBC, zf=!IEF2 -;;; ldiv_t result; +;;; u32div_t result; ;;; result.quot = dividend; ; euhl : result.quot @@ -45,7 +49,7 @@ if 1 adc hl, hl adc a, a -;;; bool quotBit = result.rem < divisor; +;;; bool quotBit = result.rem >= divisor; ;;; result.rem -= divisor; sbc hl, bc sbc a, e From 933fffb6d253e3ce0c996b19ef66ac4cc5daa076 Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Sun, 13 Feb 2022 12:54:03 -0500 Subject: [PATCH 094/142] Implement optimized __ldivs and __lrems --- src/std/linked/linked.src | 4 ++++ src/std/shared/shared.src | 4 +--- src/std/static/ldivs.src | 42 +++++++++++++++++++++++++++++++++++++++ src/std/static/lrems.src | 35 ++++++++++++++++++++++++++++++++ 4 files changed, 82 insertions(+), 3 deletions(-) create mode 100644 src/std/static/ldivs.src create mode 100644 src/std/static/lrems.src diff --git a/src/std/linked/linked.src b/src/std/linked/linked.src index e6d977b2c..ebce0ce09 100644 --- a/src/std/linked/linked.src +++ b/src/std/linked/linked.src @@ -130,6 +130,8 @@ __lcmps := 0001A8h __lcmpu := 0001ACh public __lcmpzero __lcmpzero := 0001B0h + public __ldivs +__ldivs := 0001B4h public __ldivu __ldivu := 0001B8h public __ldvrmu @@ -144,6 +146,8 @@ __lneg := 0001D0h __lnot := 0001D4h public __lor __lor := 0001D8h + public __lrems +__lrems := 0001DCh public __lremu __lremu := 0001E0h public __lshl diff --git a/src/std/shared/shared.src b/src/std/shared/shared.src index 420646db6..3da5b2b2f 100644 --- a/src/std/shared/shared.src +++ b/src/std/shared/shared.src @@ -1,4 +1,4 @@ - public __bldiy, __bstix, __bstiy, __case, __case16, __case16D, __case24, __case24D, __case8, __case8D, __fadd, __fcmp, __fdiv, __fmul, __fppack, __frbtof, __frftob, __frftoi, __frftos, __frftoub, __frftoui, __frftous, __fritof, __frstof, __frubtof, __fruitof, __frustof, __fsub, __ftol, __ildix, __ildiy, __imul_b, __indcall, __ishl_b, __ishrs_b, __ishru_b, __istix, __istiy, __itol, __ldivs, __lldix, __lldiy, __lrems, __lstix, __lstiy, __ltof, __sdivs, __sdivu, __seqcase, __seqcaseD, __setflag, __sldix, __sldiy, __srems, __sremu, __sshl_b, __sshrs_b, __sshru_b, __sstix, __sstiy, __stoi, __stoiu, __ultof, _longjmp, _memchr, _memcmp, _memcpy, _memmove, _setjmp, ___sprintf, _sqrtf, _strcasecmp, _strcat, _strchr, _strcmp, _strcpy, _strcspn, _strlen, _strncat, _strncmp, _strncpy, _strpbrk, _strspn, _strstr, _strtok + public __bldiy, __bstix, __bstiy, __case, __case16, __case16D, __case24, __case24D, __case8, __case8D, __fadd, __fcmp, __fdiv, __fmul, __fppack, __frbtof, __frftob, __frftoi, __frftos, __frftoub, __frftoui, __frftous, __fritof, __frstof, __frubtof, __fruitof, __frustof, __fsub, __ftol, __ildix, __ildiy, __imul_b, __indcall, __ishl_b, __ishrs_b, __ishru_b, __istix, __istiy, __itol, __lldix, __lldiy, __lstix, __lstiy, __ltof, __sdivs, __sdivu, __seqcase, __seqcaseD, __setflag, __sldix, __sldiy, __srems, __sremu, __sshl_b, __sshrs_b, __sshru_b, __sstix, __sstiy, __stoi, __stoiu, __ultof, _longjmp, _memchr, _memcmp, _memcpy, _memmove, _setjmp, ___sprintf, _sqrtf, _strcasecmp, _strcat, _strchr, _strcmp, _strcpy, _strcspn, _strlen, _strncat, _strncmp, _strncpy, _strpbrk, _strspn, _strstr, _strtok __bldiy := 0000FCh __bstix := 00010Ch __bstiy := 000108h @@ -38,10 +38,8 @@ __ishru_b := 000188h __istix := 00018Ch __istiy := 000190h __itol := 000194h -__ldivs := 0001B4h __lldix := 0001C0h __lldiy := 0001C4h -__lrems := 0001DCh __lstix := 0001F0h __lstiy := 0001F4h __ltof := 000284h diff --git a/src/std/static/ldivs.src b/src/std/static/ldivs.src new file mode 100644 index 000000000..aa0655e50 --- /dev/null +++ b/src/std/static/ldivs.src @@ -0,0 +1,42 @@ + assume adl=1 + + public __ldivs +__ldivs: +; I: EUHL=dividend, AUBC=divisor +; O: euhl=EUHL/AUBC + + bit 7, e + + push af + push bc + + call nz, __lneg + + cp a, $80 + jr c, .neg_divisor_skip + push hl + sbc hl, hl + sbc hl, bc + ex (sp), hl + pop bc + cpl + sbc a, -1 +.neg_divisor_skip: + + call __ldivu + + pop bc + pop af + + rlca + rrca + jr nz, .pos_dividend_skip + ccf +.pos_dividend_skip: + + ret c + jp __lneg + + + extern __ldivu + extern __lneg diff --git a/src/std/static/lrems.src b/src/std/static/lrems.src new file mode 100644 index 000000000..e6c027ef0 --- /dev/null +++ b/src/std/static/lrems.src @@ -0,0 +1,35 @@ + assume adl=1 + + public __lrems +__lrems: +; I: EUHL=dividend, AUBC=divisor +; O: euhl=EUHL%AUBC + + bit 7, e + + push af + push bc + + call nz, __lneg + + cp a, $80 + jr c, .neg_divisor_skip + push hl + sbc hl, hl + sbc hl, bc + ex (sp), hl + pop bc + cpl + sbc a, -1 +.neg_divisor_skip: + + call __lremu + + pop bc + pop af + ret z + jp __lneg + + + extern __lneg + extern __lremu From a7f8cca4aa96e713cb71f9ef5a57b7c6503ceeb8 Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Sun, 13 Feb 2022 13:23:35 -0500 Subject: [PATCH 095/142] __bdvrms_common -> __bdvrms --- src/std/shared/bdivs.src | 4 ++-- src/std/shared/{bdvrms_common.src => bdvrms.src} | 4 ++-- src/std/shared/brems.src | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) rename src/std/shared/{bdvrms_common.src => bdvrms.src} (87%) diff --git a/src/std/shared/bdivs.src b/src/std/shared/bdivs.src index 2850b8777..317eb6296 100644 --- a/src/std/shared/bdivs.src +++ b/src/std/shared/bdivs.src @@ -5,7 +5,7 @@ __bdivs: push hl - call __bdvrms_common + call __bdvrms ld a, c xor a, b @@ -18,4 +18,4 @@ __bdivs: ret - extern __bdvrms_common + extern __bdvrms diff --git a/src/std/shared/bdvrms_common.src b/src/std/shared/bdvrms.src similarity index 87% rename from src/std/shared/bdvrms_common.src rename to src/std/shared/bdvrms.src index 09440270b..84cd624a0 100644 --- a/src/std/shared/bdvrms_common.src +++ b/src/std/shared/bdvrms.src @@ -1,5 +1,5 @@ - public __bdvrms_common -__bdvrms_common: + public __bdvrms +__bdvrms: ; I: B=dividend, C=divisor ; O: a=abs(B)%abs(C), h=?, l=abs(B)/abs(C) diff --git a/src/std/shared/brems.src b/src/std/shared/brems.src index 8fe777a6f..df7ad5a8c 100644 --- a/src/std/shared/brems.src +++ b/src/std/shared/brems.src @@ -6,7 +6,7 @@ __brems: push hl ld b, a - call __bdvrms_common + call __bdvrms bit 7, b @@ -17,4 +17,4 @@ __brems: ret - extern __bdvrms_common + extern __bdvrms From 92a3c1db42ea655af31e977779dc24fefd5a0b32 Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Sun, 13 Feb 2022 13:43:43 -0500 Subject: [PATCH 096/142] Slightly optimize __bdvrms --- src/std/shared/bdvrms.src | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/std/shared/bdvrms.src b/src/std/shared/bdvrms.src index 84cd624a0..d0d92a804 100644 --- a/src/std/shared/bdvrms.src +++ b/src/std/shared/bdvrms.src @@ -5,15 +5,19 @@ __bdvrms: push bc + ld a, c + rla + jr nc, .neg_divisor_skip xor a, a sub a, c - jp m, .neg_divisor_skip ld c, a .neg_divisor_skip: + ld a, b + rla + jr nc, .neg_dividend_skip xor a, a sub a, b - jp m, .neg_dividend_skip ld b, a .neg_dividend_skip: From 3f8b26dcbf2a0bf022879282a2cc6afdc6099e9f Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Sun, 13 Feb 2022 13:44:20 -0500 Subject: [PATCH 097/142] Share code common to __ldivs and __lrems --- src/std/static/ldivs.src | 15 ++------------- src/std/static/ldivs_lrems_common.src | 24 ++++++++++++++++++++++++ src/std/static/lrems.src | 15 ++------------- 3 files changed, 28 insertions(+), 26 deletions(-) create mode 100644 src/std/static/ldivs_lrems_common.src diff --git a/src/std/static/ldivs.src b/src/std/static/ldivs.src index aa0655e50..3e9add9ad 100644 --- a/src/std/static/ldivs.src +++ b/src/std/static/ldivs.src @@ -6,22 +6,10 @@ __ldivs: ; O: euhl=EUHL/AUBC bit 7, e - push af push bc - call nz, __lneg - - cp a, $80 - jr c, .neg_divisor_skip - push hl - sbc hl, hl - sbc hl, bc - ex (sp), hl - pop bc - cpl - sbc a, -1 -.neg_divisor_skip: + call __ldivs_lrems_common call __ldivu @@ -38,5 +26,6 @@ __ldivs: jp __lneg + extern __ldivs_lrems_common extern __ldivu extern __lneg diff --git a/src/std/static/ldivs_lrems_common.src b/src/std/static/ldivs_lrems_common.src new file mode 100644 index 000000000..fdbca5a2f --- /dev/null +++ b/src/std/static/ldivs_lrems_common.src @@ -0,0 +1,24 @@ + assume adl=1 + + public __ldivs_lrems_common +__ldivs_lrems_common: +; I: ZF=EUHL>=0 +; O: aubc=abs(AUBC), euhl=abs(EUHL) + + call nz, __lneg + + or a, a + ret p + + push hl + sbc hl, hl + sbc hl, bc + ex (sp), hl + pop bc + cpl + sbc a, -1 + + ret + + + extern __lneg diff --git a/src/std/static/lrems.src b/src/std/static/lrems.src index e6c027ef0..e8656a05d 100644 --- a/src/std/static/lrems.src +++ b/src/std/static/lrems.src @@ -6,22 +6,10 @@ __lrems: ; O: euhl=EUHL%AUBC bit 7, e - push af push bc - call nz, __lneg - - cp a, $80 - jr c, .neg_divisor_skip - push hl - sbc hl, hl - sbc hl, bc - ex (sp), hl - pop bc - cpl - sbc a, -1 -.neg_divisor_skip: + call __ldivs_lrems_common call __lremu @@ -31,5 +19,6 @@ __lrems: jp __lneg + extern __ldivs_lrems_common extern __lneg extern __lremu From 7614597c982b80a46f5b10d0a8bce658c554092b Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Sun, 13 Feb 2022 14:02:18 -0500 Subject: [PATCH 098/142] Disable OS-linked __lrems due to a bug --- src/std/linked/linked.src | 5 +++-- src/std/{static => shared}/lrems.src | 0 2 files changed, 3 insertions(+), 2 deletions(-) rename src/std/{static => shared}/lrems.src (100%) diff --git a/src/std/linked/linked.src b/src/std/linked/linked.src index ebce0ce09..8574f8ec2 100644 --- a/src/std/linked/linked.src +++ b/src/std/linked/linked.src @@ -146,8 +146,9 @@ __lneg := 0001D0h __lnot := 0001D4h public __lor __lor := 0001D8h - public __lrems -__lrems := 0001DCh +; Bug: Does not ensure `nc` before `sbc hl, hl` to negate divisor. +; public __lrems +; __lrems := 0001DCh public __lremu __lremu := 0001E0h public __lshl diff --git a/src/std/static/lrems.src b/src/std/shared/lrems.src similarity index 100% rename from src/std/static/lrems.src rename to src/std/shared/lrems.src From 7955498f79600b9ce2dc148f98d6eabdbaae7dce Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Sun, 13 Feb 2022 14:46:36 -0500 Subject: [PATCH 099/142] Implement __idvrms --- src/std/static/idivs.src | 39 +++---------------------------- src/std/static/idvrms.src | 49 +++++++++++++++++++++++++++++++++++++++ src/std/static/irems.src | 30 +++--------------------- 3 files changed, 55 insertions(+), 63 deletions(-) create mode 100644 src/std/static/idvrms.src diff --git a/src/std/static/idivs.src b/src/std/static/idivs.src index 97e682d06..2213867b0 100644 --- a/src/std/static/idivs.src +++ b/src/std/static/idivs.src @@ -5,45 +5,12 @@ __idivs: ; I: UHL=dividend, UBC=divisor ; O: uhl=UHL/UBC - push bc push de - push af - ex de, hl - - sbc hl, hl - ccf - sbc hl, bc - inc hl - jp m, .neg_divisor_skip - cpl - push hl - pop bc -.neg_divisor_skip: - - sbc hl, hl - ccf - sbc hl, de - inc hl - jp p, .pos_dividend_skip - cpl - ex de, hl -.pos_dividend_skip: - - push af - - call __idvrmu - ex de, hl - - pop bc - pop af - cp a, b + call __idvrms pop de - pop bc - ret nz - jp __ineg + ret - extern __idvrmu - extern __ineg + extern __idvrms diff --git a/src/std/static/idvrms.src b/src/std/static/idvrms.src new file mode 100644 index 000000000..ebd251fc1 --- /dev/null +++ b/src/std/static/idvrms.src @@ -0,0 +1,49 @@ + assume adl=1 + + public __idvrms +__idvrms: +; I: UHL=dividend, UBC=divisor +; O: ude=UHL%UBC, uhl=UHL/UBC + + push bc + push af + + ex de, hl + + sbc hl, hl + ccf + sbc hl, bc + inc hl + jp m, .neg_divisor_skip + cpl + push hl + pop bc +.neg_divisor_skip: + + sbc hl, hl + ccf + sbc hl, de + inc hl + jp p, .pos_dividend_skip + cpl + ex de, hl +.pos_dividend_skip: + + push af + + call __idvrmu + + pop af + call p, __ineg + + ld b, a + pop af + cp a, b + pop bc + ex de, hl + ret nz + jp __ineg + + + extern __idvrmu + extern __ineg diff --git a/src/std/static/irems.src b/src/std/static/irems.src index 2c95875cf..dfed236ae 100644 --- a/src/std/static/irems.src +++ b/src/std/static/irems.src @@ -5,37 +5,13 @@ __irems: ; I: UHL=dividend, UBC=divisor ; O: uhl=UHL%UBC - push bc push de + call __idvrms ex de, hl - or a, a - sbc hl, hl - sbc hl, bc - jp m, .neg_divisor_skip - push hl - pop bc -.neg_divisor_skip: - - sbc hl, hl - ccf - sbc hl, de - inc hl - jp p, .pos_dividend_skip - ex de, hl -.pos_dividend_skip: - - push af - - call __idvrmu - - pop af pop de - pop bc - ret m - jp __ineg + ret - extern __idvrmu - extern __ineg + extern __idvrms From 857bfa33bdc97df971b656d004174adb553981a2 Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Wed, 16 Feb 2022 14:36:02 -0500 Subject: [PATCH 100/142] Optimize div --- src/std/shared/div.src | 56 +++++++++--------------------------------- 1 file changed, 12 insertions(+), 44 deletions(-) diff --git a/src/std/shared/div.src b/src/std/shared/div.src index 36a73e7f6..26adadadd 100644 --- a/src/std/shared/div.src +++ b/src/std/shared/div.src @@ -1,52 +1,20 @@ -; Copyright 1992-2008 Zilog, Inc. -;------------------------------------------------------------------------- -; Div function -; -; div_t div(int numer,int denom); -; -;------------------------------------------------------------------------- - assume adl=1 + assume adl=1 public _div _div: - push af - push bc - push hl - push de - - ld hl,21 - add hl,sp - ld bc,(hl) ; bc=val of denominator - dec hl - dec hl - dec hl - ld hl,(hl) ; hl=val of numerator - push hl - call __idivs - ex de,hl ; de= quotient - pop hl ; hl=val of numerator - call __irems - push hl - pop bc - - ld hl,15 - add hl,sp - ld hl,(hl) - inc hl - inc hl - inc hl - ld (hl),bc ; remainder - dec hl - dec hl - dec hl - ld (hl),de ;quotient - pop de + pop iy + pop hl pop bc - pop bc - pop af + push de + push de + push de + push de + + call __idvrms + ld (iy), hl + ld (iy + 3), de ret - extern __idivs - extern __irems + extern __idvrms From eb2c8e0d713c140521155528ca9b1474ae9a52f8 Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Wed, 16 Feb 2022 14:44:02 -0500 Subject: [PATCH 101/142] Test div and ldiv --- examples/standalone/math_test/src/main.c | 315 +++++++++++++---------- 1 file changed, 178 insertions(+), 137 deletions(-) diff --git a/examples/standalone/math_test/src/main.c b/examples/standalone/math_test/src/main.c index 514c2dada..537dd1bb1 100644 --- a/examples/standalone/math_test/src/main.c +++ b/examples/standalone/math_test/src/main.c @@ -109,6 +109,91 @@ static int64_t atoll_(const char *str) } +#define DEFINE_UNOP_PREFIX_FUNC(op, name, prefix, type) \ + static type prefix##name##_(type x) \ + { \ + return (type)(op(x)); \ + } + +#define DEFINE_BINOP_FUNC_FUNC(type, typePrefix, name, funcPrefix, func, post) \ + static type typePrefix##name##_(type x, type y) \ + { \ + return (type)(funcPrefix##func(x, y)post); \ + } + +#define DEFINE_BINOP_INFIX_FUNC(op, name, prefix, type) \ + static type prefix##name##_(type x, type y) \ + { \ + return (type)(x op y); \ + } + +#define DEFINE_UNOP_PREFIX_FUNC_B(op, name, u) \ + DEFINE_UNOP_PREFIX_FUNC(op, name, b, u##int8_t) +#define DEFINE_UNOP_PREFIX_FUNC_S(op, name, u) \ + DEFINE_UNOP_PREFIX_FUNC(op, name, s, u##int16_t) +#define DEFINE_UNOP_PREFIX_FUNC_I(op, name, u) \ + DEFINE_UNOP_PREFIX_FUNC(op, name, i, u##int24_t) +#define DEFINE_UNOP_PREFIX_FUNC_L(op, name, u) \ + DEFINE_UNOP_PREFIX_FUNC(op, name, l, u##int32_t) +#define DEFINE_UNOP_PREFIX_FUNC_LL(op, name, u) \ + DEFINE_UNOP_PREFIX_FUNC(op, name, ll, u##int64_t) + +#define DEFINE_BINOP_FUNC_FUNC_B(u, name, func, post) \ + DEFINE_BINOP_FUNC_FUNC(u##int8_t, b, name, b, func, post) +#define DEFINE_BINOP_FUNC_FUNC_S(u, name, func, post) \ + DEFINE_BINOP_FUNC_FUNC(u##int16_t, s, name, s, func, post) +#define DEFINE_BINOP_FUNC_FUNC_I(u, name, func, post) \ + DEFINE_BINOP_FUNC_FUNC(u##int24_t, i, name, , func, post) +#define DEFINE_BINOP_FUNC_FUNC_L(u, name, func, post) \ + DEFINE_BINOP_FUNC_FUNC(u##int32_t, l, name, l, func, post) +#define DEFINE_BINOP_FUNC_FUNC_LL(u, name, func, post) \ + DEFINE_BINOP_FUNC_FUNC(u##int64_t, ll, name, ll, func, post) + +#define DEFINE_BINOP_INFIX_FUNC_B(op, name, u) \ + DEFINE_BINOP_INFIX_FUNC(op, name, b, u##int8_t) +#define DEFINE_BINOP_INFIX_FUNC_S(op, name, u) \ + DEFINE_BINOP_INFIX_FUNC(op, name, s, u##int16_t) +#define DEFINE_BINOP_INFIX_FUNC_I(op, name, u) \ + DEFINE_BINOP_INFIX_FUNC(op, name, i, u##int24_t) +#define DEFINE_BINOP_INFIX_FUNC_L(op, name, u) \ + DEFINE_BINOP_INFIX_FUNC(op, name, l, u##int32_t) +#define DEFINE_BINOP_INFIX_FUNC_LL(op, name, u) \ + DEFINE_BINOP_INFIX_FUNC(op, name, ll, u##int64_t) + +#define DEFINE_UNOP_PREFIX_FUNC_B_TO_S(op, name, u) \ + DEFINE_UNOP_PREFIX_FUNC_B(op, name, u) \ + DEFINE_UNOP_PREFIX_FUNC_S(op, name, u) +#define DEFINE_UNOP_PREFIX_FUNC_B_TO_I(op, name, u) \ + DEFINE_UNOP_PREFIX_FUNC_B_TO_S(op, name, u) \ + DEFINE_UNOP_PREFIX_FUNC_I(op, name, u) +#define DEFINE_UNOP_PREFIX_FUNC_B_TO_L(op, name, u) \ + DEFINE_UNOP_PREFIX_FUNC_B_TO_I(op, name, u) \ + DEFINE_UNOP_PREFIX_FUNC_L(op, name, u) +#define DEFINE_UNOP_PREFIX_FUNC_B_TO_LL(op, name, u) \ + DEFINE_UNOP_PREFIX_FUNC_B_TO_L(op, name, u) \ + DEFINE_UNOP_PREFIX_FUNC_LL(op, name, u) + +#define DEFINE_BINOP_INFIX_FUNC_B_TO_S(op, name, u) \ + DEFINE_BINOP_INFIX_FUNC_B(op, name, u) \ + DEFINE_BINOP_INFIX_FUNC_S(op, name, u) +#define DEFINE_BINOP_INFIX_FUNC_B_TO_I(op, name, u) \ + DEFINE_BINOP_INFIX_FUNC_B_TO_S(op, name, u) \ + DEFINE_BINOP_INFIX_FUNC_I(op, name, u) +#define DEFINE_BINOP_INFIX_FUNC_B_TO_L(op, name, u) \ + DEFINE_BINOP_INFIX_FUNC_B_TO_I(op, name, u) \ + DEFINE_BINOP_INFIX_FUNC_L(op, name, u) +#define DEFINE_BINOP_INFIX_FUNC_B_TO_LL(op, name, u) \ + DEFINE_BINOP_INFIX_FUNC_B_TO_L(op, name, u) \ + DEFINE_BINOP_INFIX_FUNC_LL(op, name, u) + +#define DEFINE_BINOP_FUNC_FUNC_I_TO_L(u, name, func, post) \ + DEFINE_BINOP_FUNC_FUNC_I(u, name, func, post) \ + DEFINE_BINOP_FUNC_FUNC_L(u, name, func, post) +#define DEFINE_BINOP_FUNC_FUNC_I_TO_LL(u, name, func, post) \ + DEFINE_BINOP_FUNC_FUNC_I_TO_L(u, name, func, post) \ + DEFINE_BINOP_FUNC_FUNC_LL(u, name, func, post) + + #define DEFINE_UNOP_TYPE(u) \ typedef struct u##UnOp_ \ { \ @@ -137,117 +222,64 @@ DEFINE_UNOP_TYPE(u) DEFINE_BINOP_TYPE() DEFINE_BINOP_TYPE(u) - -#define DEFINE_UNOP_FUNC(op, name, prefix, type) \ - static type prefix##name##_(type x) \ - { \ - return (type)(op(x)); \ - } - -#define DEFINE_BINOP_FUNC(op, name, prefix, type) \ - static type prefix##name##_(type x, type y) \ - { \ - return (type)(x op y); \ - } - -#define DEFINE_UNOP_FUNC_B(op, name, u) DEFINE_UNOP_FUNC(op, name, b, u##int8_t) -#define DEFINE_UNOP_FUNC_S(op, name, u) DEFINE_UNOP_FUNC(op, name, s, u##int16_t) -#define DEFINE_UNOP_FUNC_I(op, name, u) DEFINE_UNOP_FUNC(op, name, i, u##int24_t) -#define DEFINE_UNOP_FUNC_L(op, name, u) DEFINE_UNOP_FUNC(op, name, l, u##int32_t) -#define DEFINE_UNOP_FUNC_LL(op, name, u) DEFINE_UNOP_FUNC(op, name, ll, u##int64_t) - -#define DEFINE_BINOP_FUNC_B(op, name, u) DEFINE_BINOP_FUNC(op, name, b, u##int8_t) -#define DEFINE_BINOP_FUNC_S(op, name, u) DEFINE_BINOP_FUNC(op, name, s, u##int16_t) -#define DEFINE_BINOP_FUNC_I(op, name, u) DEFINE_BINOP_FUNC(op, name, i, u##int24_t) -#define DEFINE_BINOP_FUNC_L(op, name, u) DEFINE_BINOP_FUNC(op, name, l, u##int32_t) -#define DEFINE_BINOP_FUNC_LL(op, name, u) DEFINE_BINOP_FUNC(op, name, ll, u##int64_t) - - -#define DEFINE_UNOP_FUNCS_UP_TO_B(op, name, u) \ - DEFINE_UNOP_FUNC_B(op, name, u) -#define DEFINE_UNOP_FUNCS_UP_TO_S(op, name, u) \ - DEFINE_UNOP_FUNCS_UP_TO_B(op, name, u) \ - DEFINE_UNOP_FUNC_S(op, name, u) -#define DEFINE_UNOP_FUNCS_UP_TO_I(op, name, u) \ - DEFINE_UNOP_FUNCS_UP_TO_S(op, name, u) \ - DEFINE_UNOP_FUNC_I(op, name, u) -#define DEFINE_UNOP_FUNCS_UP_TO_L(op, name, u) \ - DEFINE_UNOP_FUNCS_UP_TO_I(op, name, u) \ - DEFINE_UNOP_FUNC_L(op, name, u) -#define DEFINE_UNOP_FUNCS_UP_TO_LL(op, name, u) \ - DEFINE_UNOP_FUNCS_UP_TO_L(op, name, u) \ - DEFINE_UNOP_FUNC_LL(op, name, u) - -#define DEFINE_BINOP_FUNCS_UP_TO_B(op, name, u) \ - DEFINE_BINOP_FUNC_B(op, name, u) -#define DEFINE_BINOP_FUNCS_UP_TO_S(op, name, u) \ - DEFINE_BINOP_FUNCS_UP_TO_B(op, name, u) \ - DEFINE_BINOP_FUNC_S(op, name, u) -#define DEFINE_BINOP_FUNCS_UP_TO_I(op, name, u) \ - DEFINE_BINOP_FUNCS_UP_TO_S(op, name, u) \ - DEFINE_BINOP_FUNC_I(op, name, u) -#define DEFINE_BINOP_FUNCS_UP_TO_L(op, name, u) \ - DEFINE_BINOP_FUNCS_UP_TO_I(op, name, u) \ - DEFINE_BINOP_FUNC_L(op, name, u) -#define DEFINE_BINOP_FUNCS_UP_TO_LL(op, name, u) \ - DEFINE_BINOP_FUNCS_UP_TO_L(op, name, u) \ - DEFINE_BINOP_FUNC_LL(op, name, u) - - -#define DEFINE_UNOP_STRUCT_UP_TO_B(name, u) \ +#define DEFINE_UNOP_STRUCT_B(name, u) \ static const u##UnOp unop_##name = {#name, b##name##_}; -#define DEFINE_UNOP_STRUCT_UP_TO_S(name, u) \ +#define DEFINE_UNOP_STRUCT_B_TO_S(name, u) \ static const u##UnOp unop_##name = {#name, b##name##_, s##name##_}; -#define DEFINE_UNOP_STRUCT_UP_TO_I(name, u) \ +#define DEFINE_UNOP_STRUCT_B_TO_I(name, u) \ static const u##UnOp unop_##name = {#name, b##name##_, s##name##_, i##name##_}; -#define DEFINE_UNOP_STRUCT_UP_TO_L(name, u) \ +#define DEFINE_UNOP_STRUCT_B_TO_L(name, u) \ static const u##UnOp unop_##name = {#name, b##name##_, s##name##_, i##name##_, l##name##_}; -#define DEFINE_UNOP_STRUCT_UP_TO_LL(name, u) \ +#define DEFINE_UNOP_STRUCT_B_TO_LL(name, u) \ static const u##UnOp unop_##name = {#name, b##name##_, s##name##_, i##name##_, l##name##_, ll##name##_}; -#define DEFINE_BINOP_STRUCT_UP_TO_B(name, u) \ +#define DEFINE_BINOP_STRUCT_B(name, u) \ static const u##BinOp binop_##name = {#name, b##name##_}; -#define DEFINE_BINOP_STRUCT_UP_TO_S(name, u) \ +#define DEFINE_BINOP_STRUCT_B_TO_S(name, u) \ static const u##BinOp binop_##name = {#name, b##name##_, s##name##_}; -#define DEFINE_BINOP_STRUCT_UP_TO_I(name, u) \ +#define DEFINE_BINOP_STRUCT_B_TO_I(name, u) \ static const u##BinOp binop_##name = {#name, b##name##_, s##name##_, i##name##_}; -#define DEFINE_BINOP_STRUCT_UP_TO_L(name, u) \ +#define DEFINE_BINOP_STRUCT_B_TO_L(name, u) \ static const u##BinOp binop_##name = {#name, b##name##_, s##name##_, i##name##_, l##name##_}; -#define DEFINE_BINOP_STRUCT_UP_TO_LL(name, u) \ +#define DEFINE_BINOP_STRUCT_B_TO_LL(name, u) \ static const u##BinOp binop_##name = {#name, b##name##_, s##name##_, i##name##_, l##name##_, ll##name##_}; - - -#define DEFINE_UNOP_UP_TO_B(op, name, u) \ - DEFINE_UNOP_FUNCS_UP_TO_B(op, name, u) \ - DEFINE_UNOP_STRUCT_UP_TO_B(name, u) -#define DEFINE_UNOP_UP_TO_S(op, name, u) \ - DEFINE_UNOP_FUNCS_UP_TO_S(op, name, u) \ - DEFINE_UNOP_STRUCT_UP_TO_S(name, u) -#define DEFINE_UNOP_UP_TO_I(op, name, u) \ - DEFINE_UNOP_FUNCS_UP_TO_I(op, name, u) \ - DEFINE_UNOP_STRUCT_UP_TO_I(name, u) -#define DEFINE_UNOP_UP_TO_L(op, name, u) \ - DEFINE_UNOP_FUNCS_UP_TO_L(op, name, u) \ - DEFINE_UNOP_STRUCT_UP_TO_L(name, u) -#define DEFINE_UNOP_UP_TO_LL(op, name, u) \ - DEFINE_UNOP_FUNCS_UP_TO_LL(op, name, u) \ - DEFINE_UNOP_STRUCT_UP_TO_LL(name, u) - -#define DEFINE_BINOP_UP_TO_B(op, name, u) \ - DEFINE_BINOP_FUNCS_UP_TO_B(op, name, u) \ - DEFINE_BINOP_STRUCT_UP_TO_B(name, u) -#define DEFINE_BINOP_UP_TO_S(op, name, u) \ - DEFINE_BINOP_FUNCS_UP_TO_S(op, name, u) \ - DEFINE_BINOP_STRUCT_UP_TO_S(name, u) -#define DEFINE_BINOP_UP_TO_I(op, name, u) \ - DEFINE_BINOP_FUNCS_UP_TO_I(op, name, u) \ - DEFINE_BINOP_STRUCT_UP_TO_I(name, u) -#define DEFINE_BINOP_UP_TO_L(op, name, u) \ - DEFINE_BINOP_FUNCS_UP_TO_L(op, name, u) \ - DEFINE_BINOP_STRUCT_UP_TO_L(name, u) -#define DEFINE_BINOP_UP_TO_LL(op, name, u) \ - DEFINE_BINOP_FUNCS_UP_TO_LL(op, name, u) \ - DEFINE_BINOP_STRUCT_UP_TO_LL(name, u) +#define DEFINE_BINOP_STRUCT_I_TO_L(name, u) \ + static const u##BinOp binop_##name = {#name, NULL, NULL, i##name##_, l##name##_}; +#define DEFINE_BINOP_STRUCT_I_TO_LL(name, u) \ + static const u##BinOp binop_##name = {#name, NULL, NULL, i##name##_, l##name##_, ll##name##_}; + + +#define DEFINE_UNOP_PREFIX_B(op, name, u) \ + DEFINE_UNOP_PREFIX_FUNC_B(op, name, u) \ + DEFINE_UNOP_STRUCT_B(name, u) +#define DEFINE_UNOP_PREFIX_B_TO_S(op, name, u) \ + DEFINE_UNOP_PREFIX_FUNC_B_TO_S(op, name, u) \ + DEFINE_UNOP_STRUCT_B_TO_S(name, u) +#define DEFINE_UNOP_PREFIX_B_TO_I(op, name, u) \ + DEFINE_UNOP_PREFIX_FUNC_B_TO_I(op, name, u) \ + DEFINE_UNOP_STRUCT_B_TO_I(name, u) +#define DEFINE_UNOP_PREFIX_B_TO_L(op, name, u) \ + DEFINE_UNOP_PREFIX_FUNC_B_TO_L(op, name, u) \ + DEFINE_UNOP_STRUCT_B_TO_L(name, u) +#define DEFINE_UNOP_PREFIX_B_TO_LL(op, name, u) \ + DEFINE_UNOP_PREFIX_FUNC_B_TO_LL(op, name, u) \ + DEFINE_UNOP_STRUCT_B_TO_LL(name, u) + +#define DEFINE_BINOP_INFIX_B(op, name, u) \ + DEFINE_BINOP_INFIX_FUNC_B(op, name, u) \ + DEFINE_BINOP_STRUCT_B(name, u) +#define DEFINE_BINOP_INFIX_B_TO_S(op, name, u) \ + DEFINE_BINOP_INFIX_FUNC_B_TO_S(op, name, u) \ + DEFINE_BINOP_STRUCT_B_TO_S(name, u) +#define DEFINE_BINOP_INFIX_B_TO_I(op, name, u) \ + DEFINE_BINOP_INFIX_FUNC_B_TO_I(op, name, u) \ + DEFINE_BINOP_STRUCT_B_TO_I(name, u) +#define DEFINE_BINOP_INFIX_B_TO_L(op, name, u) \ + DEFINE_BINOP_INFIX_FUNC_B_TO_L(op, name, u) \ + DEFINE_BINOP_STRUCT_B_TO_L(name, u) +#define DEFINE_BINOP_INFIX_B_TO_LL(op, name, u) \ + DEFINE_BINOP_INFIX_FUNC_B_TO_LL(op, name, u) \ + DEFINE_BINOP_STRUCT_B_TO_LL(name, u) static void testOp(bool isBinOp, const BinOp *op, int64_t x, int64_t y) @@ -266,12 +298,13 @@ static void testOp(bool isBinOp, const BinOp *op, int64_t x, int64_t y) x_printf("%*s=%016llX\n", lhsLength, "y", (long long)y); } -#define TEST_OP(prefix, bits) \ - if (op->prefix) \ - { \ - unsigned digits = (bits + 3) / 4; \ - unsigned long long result = (op->prefix)(x, y) & ((1ULL << (bits - 1) << 1) - 1); \ - x_printf("\n%*s%s=%*s%0*llX", prefixLength, #prefix, op->name, 16 - digits, "", digits, result); \ +#define TEST_OP(prefix, bits) \ + x_printf("\n"); \ + if (op->prefix) \ + { \ + unsigned digits = (bits + 3) / 4; \ + unsigned long long result = (op->prefix)(x, y) & ((1ULL << (bits - 1) << 1) - 1); \ + x_printf("%*s%s=%*s%0*llX", prefixLength, #prefix, op->name, 16 - digits, "", digits, result); \ } TEST_OP(b, 8) @@ -283,7 +316,6 @@ static void testOp(bool isBinOp, const BinOp *op, int64_t x, int64_t y) finishOutput(); } - static void testUnOp(const UnOp *op, int64_t x) { testOp(false, (const BinOp*)op, x, 0); @@ -295,40 +327,47 @@ static void testBinOp(const BinOp *op, int64_t x, int64_t y) } -DEFINE_UNOP_UP_TO_LL(~, not, ) -DEFINE_UNOP_UP_TO_LL(-, neg, ) +DEFINE_UNOP_PREFIX_B_TO_LL(~, not, ) +DEFINE_UNOP_PREFIX_B_TO_LL(-, neg, ) -DEFINE_UNOP_FUNCS_UP_TO_I(abs, abs, ) -DEFINE_UNOP_FUNC_L(labs, abs, ) -DEFINE_UNOP_FUNC_LL(llabs, abs, ) -DEFINE_UNOP_STRUCT_UP_TO_LL(abs, ) +DEFINE_UNOP_PREFIX_FUNC_B_TO_I(abs, abs, ) +DEFINE_UNOP_PREFIX_FUNC_L(labs, abs, ) +DEFINE_UNOP_PREFIX_FUNC_LL(llabs, abs, ) +DEFINE_UNOP_STRUCT_B_TO_LL(abs, ) -DEFINE_UNOP_FUNC_B(__builtin_bitreverse8, bitrev, ) -DEFINE_UNOP_FUNC_S(__builtin_bitreverse16, bitrev, ) -DEFINE_UNOP_FUNC_I(__builtin_bitreverse24, bitrev, ) -DEFINE_UNOP_FUNC_L(__builtin_bitreverse32, bitrev, ) -DEFINE_UNOP_FUNC_LL(__builtin_bitreverse64, bitrev, ) -DEFINE_UNOP_STRUCT_UP_TO_LL(bitrev, ) +DEFINE_UNOP_PREFIX_FUNC_B(__builtin_bitreverse8, bitrev, ) +DEFINE_UNOP_PREFIX_FUNC_S(__builtin_bitreverse16, bitrev, ) +DEFINE_UNOP_PREFIX_FUNC_I(__builtin_bitreverse24, bitrev, ) +DEFINE_UNOP_PREFIX_FUNC_L(__builtin_bitreverse32, bitrev, ) +DEFINE_UNOP_PREFIX_FUNC_LL(__builtin_bitreverse64, bitrev, ) +DEFINE_UNOP_STRUCT_B_TO_LL(bitrev, ) // Needs to be unsigned to avoid extra bits from sign extension -DEFINE_UNOP_FUNCS_UP_TO_I(__builtin_popcount, popcnt, u) -DEFINE_UNOP_FUNC_L(__builtin_popcountl, popcnt, u) -DEFINE_UNOP_FUNC_LL(__builtin_popcountll, popcnt, u) -DEFINE_UNOP_STRUCT_UP_TO_LL(popcnt, u) - -DEFINE_BINOP_UP_TO_LL(&, and, ) -DEFINE_BINOP_UP_TO_LL(|, or, ) -DEFINE_BINOP_UP_TO_LL(^, xor, ) -DEFINE_BINOP_UP_TO_LL(+, add, ) -DEFINE_BINOP_UP_TO_LL(-, sub, ) -DEFINE_BINOP_UP_TO_LL(<<, shl, ) -DEFINE_BINOP_UP_TO_LL(>>, shru, u) -DEFINE_BINOP_UP_TO_LL(>>, shrs, ) -DEFINE_BINOP_UP_TO_LL(*, mulu, u) -DEFINE_BINOP_UP_TO_LL(/, divu, u) -DEFINE_BINOP_UP_TO_LL(/, divs, ) -DEFINE_BINOP_UP_TO_LL(%, remu, u) -DEFINE_BINOP_UP_TO_LL(%, rems, ) +DEFINE_UNOP_PREFIX_FUNC_B_TO_I(__builtin_popcount, popcnt, u) +DEFINE_UNOP_PREFIX_FUNC_L(__builtin_popcountl, popcnt, u) +DEFINE_UNOP_PREFIX_FUNC_LL(__builtin_popcountll, popcnt, u) +DEFINE_UNOP_STRUCT_B_TO_LL(popcnt, u) + +DEFINE_BINOP_INFIX_B_TO_LL(&, and, ) +DEFINE_BINOP_INFIX_B_TO_LL(|, or, ) +DEFINE_BINOP_INFIX_B_TO_LL(^, xor, ) +DEFINE_BINOP_INFIX_B_TO_LL(+, add, ) +DEFINE_BINOP_INFIX_B_TO_LL(-, sub, ) +DEFINE_BINOP_INFIX_B_TO_LL(<<, shl, ) +DEFINE_BINOP_INFIX_B_TO_LL(>>, shru, u) +DEFINE_BINOP_INFIX_B_TO_LL(>>, shrs, ) +DEFINE_BINOP_INFIX_B_TO_LL(*, mulu, u) +DEFINE_BINOP_INFIX_B_TO_LL(/, divu, u) +DEFINE_BINOP_INFIX_B_TO_LL(%, remu, u) +DEFINE_BINOP_INFIX_B_TO_LL(/, divs, ) +DEFINE_BINOP_INFIX_B_TO_LL(%, rems, ) + + +DEFINE_BINOP_FUNC_FUNC_I_TO_L( , div_q, div, .quot) +DEFINE_BINOP_STRUCT_I_TO_L(div_q, ) + +DEFINE_BINOP_FUNC_FUNC_I_TO_L( , div_r, div, .rem) +DEFINE_BINOP_STRUCT_I_TO_L(div_r, ) static const UnOp *unops[] = { @@ -353,6 +392,8 @@ static const BinOp *binops[] = { (const BinOp *)&binop_remu, &binop_divs, &binop_rems, + &binop_div_q, + &binop_div_r, }; From 248cd0235698b124d090c1e77f620b108d027b3c Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Wed, 16 Feb 2022 15:01:51 -0500 Subject: [PATCH 102/142] Normalize test macro parameter order --- examples/standalone/math_test/src/main.c | 278 +++++++++++------------ 1 file changed, 139 insertions(+), 139 deletions(-) diff --git a/examples/standalone/math_test/src/main.c b/examples/standalone/math_test/src/main.c index 537dd1bb1..68768a14d 100644 --- a/examples/standalone/math_test/src/main.c +++ b/examples/standalone/math_test/src/main.c @@ -109,82 +109,82 @@ static int64_t atoll_(const char *str) } -#define DEFINE_UNOP_PREFIX_FUNC(op, name, prefix, type) \ - static type prefix##name##_(type x) \ - { \ - return (type)(op(x)); \ +#define DEFINE_UNOP_PREFIX_FUNC(type, name, op) \ + static type name##_(type x) \ + { \ + return (type)(op(x)); \ } -#define DEFINE_BINOP_FUNC_FUNC(type, typePrefix, name, funcPrefix, func, post) \ - static type typePrefix##name##_(type x, type y) \ - { \ - return (type)(funcPrefix##func(x, y)post); \ +#define DEFINE_BINOP_FUNC_FUNC(type, name, func, post) \ + static type name##_(type x, type y) \ + { \ + return (type)(func(x, y)post); \ } -#define DEFINE_BINOP_INFIX_FUNC(op, name, prefix, type) \ - static type prefix##name##_(type x, type y) \ - { \ - return (type)(x op y); \ +#define DEFINE_BINOP_INFIX_FUNC(type, name, op) \ + static type name##_(type x, type y) \ + { \ + return (type)(x op y); \ } -#define DEFINE_UNOP_PREFIX_FUNC_B(op, name, u) \ - DEFINE_UNOP_PREFIX_FUNC(op, name, b, u##int8_t) -#define DEFINE_UNOP_PREFIX_FUNC_S(op, name, u) \ - DEFINE_UNOP_PREFIX_FUNC(op, name, s, u##int16_t) -#define DEFINE_UNOP_PREFIX_FUNC_I(op, name, u) \ - DEFINE_UNOP_PREFIX_FUNC(op, name, i, u##int24_t) -#define DEFINE_UNOP_PREFIX_FUNC_L(op, name, u) \ - DEFINE_UNOP_PREFIX_FUNC(op, name, l, u##int32_t) -#define DEFINE_UNOP_PREFIX_FUNC_LL(op, name, u) \ - DEFINE_UNOP_PREFIX_FUNC(op, name, ll, u##int64_t) +#define DEFINE_UNOP_PREFIX_FUNC_B(u, name, op) \ + DEFINE_UNOP_PREFIX_FUNC(u##int8_t, b##name, op) +#define DEFINE_UNOP_PREFIX_FUNC_S(u, name, op) \ + DEFINE_UNOP_PREFIX_FUNC(u##int16_t, s##name, op) +#define DEFINE_UNOP_PREFIX_FUNC_I(u, name, op) \ + DEFINE_UNOP_PREFIX_FUNC(u##int24_t, i##name, op) +#define DEFINE_UNOP_PREFIX_FUNC_L(u, name, op) \ + DEFINE_UNOP_PREFIX_FUNC(u##int32_t, l##name, op) +#define DEFINE_UNOP_PREFIX_FUNC_LL(u, name, op) \ + DEFINE_UNOP_PREFIX_FUNC(u##int64_t, ll##name, op) #define DEFINE_BINOP_FUNC_FUNC_B(u, name, func, post) \ - DEFINE_BINOP_FUNC_FUNC(u##int8_t, b, name, b, func, post) + DEFINE_BINOP_FUNC_FUNC(u##int8_t, b##name, b##func, post) #define DEFINE_BINOP_FUNC_FUNC_S(u, name, func, post) \ - DEFINE_BINOP_FUNC_FUNC(u##int16_t, s, name, s, func, post) + DEFINE_BINOP_FUNC_FUNC(u##int16_t, s##name, s##func, post) #define DEFINE_BINOP_FUNC_FUNC_I(u, name, func, post) \ - DEFINE_BINOP_FUNC_FUNC(u##int24_t, i, name, , func, post) + DEFINE_BINOP_FUNC_FUNC(u##int24_t, i##name, func, post) #define DEFINE_BINOP_FUNC_FUNC_L(u, name, func, post) \ - DEFINE_BINOP_FUNC_FUNC(u##int32_t, l, name, l, func, post) + DEFINE_BINOP_FUNC_FUNC(u##int32_t, l##name, l##func, post) #define DEFINE_BINOP_FUNC_FUNC_LL(u, name, func, post) \ - DEFINE_BINOP_FUNC_FUNC(u##int64_t, ll, name, ll, func, post) - -#define DEFINE_BINOP_INFIX_FUNC_B(op, name, u) \ - DEFINE_BINOP_INFIX_FUNC(op, name, b, u##int8_t) -#define DEFINE_BINOP_INFIX_FUNC_S(op, name, u) \ - DEFINE_BINOP_INFIX_FUNC(op, name, s, u##int16_t) -#define DEFINE_BINOP_INFIX_FUNC_I(op, name, u) \ - DEFINE_BINOP_INFIX_FUNC(op, name, i, u##int24_t) -#define DEFINE_BINOP_INFIX_FUNC_L(op, name, u) \ - DEFINE_BINOP_INFIX_FUNC(op, name, l, u##int32_t) -#define DEFINE_BINOP_INFIX_FUNC_LL(op, name, u) \ - DEFINE_BINOP_INFIX_FUNC(op, name, ll, u##int64_t) - -#define DEFINE_UNOP_PREFIX_FUNC_B_TO_S(op, name, u) \ - DEFINE_UNOP_PREFIX_FUNC_B(op, name, u) \ - DEFINE_UNOP_PREFIX_FUNC_S(op, name, u) -#define DEFINE_UNOP_PREFIX_FUNC_B_TO_I(op, name, u) \ - DEFINE_UNOP_PREFIX_FUNC_B_TO_S(op, name, u) \ - DEFINE_UNOP_PREFIX_FUNC_I(op, name, u) -#define DEFINE_UNOP_PREFIX_FUNC_B_TO_L(op, name, u) \ - DEFINE_UNOP_PREFIX_FUNC_B_TO_I(op, name, u) \ - DEFINE_UNOP_PREFIX_FUNC_L(op, name, u) -#define DEFINE_UNOP_PREFIX_FUNC_B_TO_LL(op, name, u) \ - DEFINE_UNOP_PREFIX_FUNC_B_TO_L(op, name, u) \ - DEFINE_UNOP_PREFIX_FUNC_LL(op, name, u) - -#define DEFINE_BINOP_INFIX_FUNC_B_TO_S(op, name, u) \ - DEFINE_BINOP_INFIX_FUNC_B(op, name, u) \ - DEFINE_BINOP_INFIX_FUNC_S(op, name, u) -#define DEFINE_BINOP_INFIX_FUNC_B_TO_I(op, name, u) \ - DEFINE_BINOP_INFIX_FUNC_B_TO_S(op, name, u) \ - DEFINE_BINOP_INFIX_FUNC_I(op, name, u) -#define DEFINE_BINOP_INFIX_FUNC_B_TO_L(op, name, u) \ - DEFINE_BINOP_INFIX_FUNC_B_TO_I(op, name, u) \ - DEFINE_BINOP_INFIX_FUNC_L(op, name, u) -#define DEFINE_BINOP_INFIX_FUNC_B_TO_LL(op, name, u) \ - DEFINE_BINOP_INFIX_FUNC_B_TO_L(op, name, u) \ - DEFINE_BINOP_INFIX_FUNC_LL(op, name, u) + DEFINE_BINOP_FUNC_FUNC(u##int64_t, ll##name, ll##func, post) + +#define DEFINE_BINOP_INFIX_FUNC_B(u, name, op) \ + DEFINE_BINOP_INFIX_FUNC(u##int8_t, b##name, op) +#define DEFINE_BINOP_INFIX_FUNC_S(u, name, op) \ + DEFINE_BINOP_INFIX_FUNC(u##int16_t, s##name, op) +#define DEFINE_BINOP_INFIX_FUNC_I(u, name, op) \ + DEFINE_BINOP_INFIX_FUNC(u##int24_t, i##name, op) +#define DEFINE_BINOP_INFIX_FUNC_L(u, name, op) \ + DEFINE_BINOP_INFIX_FUNC(u##int32_t, l##name, op) +#define DEFINE_BINOP_INFIX_FUNC_LL(u, name, op) \ + DEFINE_BINOP_INFIX_FUNC(u##int64_t, ll##name, op) + +#define DEFINE_UNOP_PREFIX_FUNC_B_TO_S(u, name, op) \ + DEFINE_UNOP_PREFIX_FUNC_B(u, name, op) \ + DEFINE_UNOP_PREFIX_FUNC_S(u, name, op) +#define DEFINE_UNOP_PREFIX_FUNC_B_TO_I(u, name, op) \ + DEFINE_UNOP_PREFIX_FUNC_B_TO_S(u, name, op) \ + DEFINE_UNOP_PREFIX_FUNC_I(u, name, op) +#define DEFINE_UNOP_PREFIX_FUNC_B_TO_L(u, name, op) \ + DEFINE_UNOP_PREFIX_FUNC_B_TO_I(u, name, op) \ + DEFINE_UNOP_PREFIX_FUNC_L(u, name, op) +#define DEFINE_UNOP_PREFIX_FUNC_B_TO_LL(u, name, op) \ + DEFINE_UNOP_PREFIX_FUNC_B_TO_L(u, name, op) \ + DEFINE_UNOP_PREFIX_FUNC_LL(u, name, op) + +#define DEFINE_BINOP_INFIX_FUNC_B_TO_S(u, name, op) \ + DEFINE_BINOP_INFIX_FUNC_B(u, name, op) \ + DEFINE_BINOP_INFIX_FUNC_S(u, name, op) +#define DEFINE_BINOP_INFIX_FUNC_B_TO_I(u, name, op) \ + DEFINE_BINOP_INFIX_FUNC_B_TO_S(u, name, op) \ + DEFINE_BINOP_INFIX_FUNC_I(u, name, op) +#define DEFINE_BINOP_INFIX_FUNC_B_TO_L(u, name, op) \ + DEFINE_BINOP_INFIX_FUNC_B_TO_I(u, name, op) \ + DEFINE_BINOP_INFIX_FUNC_L(u, name, op) +#define DEFINE_BINOP_INFIX_FUNC_B_TO_LL(u, name, op) \ + DEFINE_BINOP_INFIX_FUNC_B_TO_L(u, name, op) \ + DEFINE_BINOP_INFIX_FUNC_LL(u, name, op) #define DEFINE_BINOP_FUNC_FUNC_I_TO_L(u, name, func, post) \ DEFINE_BINOP_FUNC_FUNC_I(u, name, func, post) \ @@ -222,64 +222,64 @@ DEFINE_UNOP_TYPE(u) DEFINE_BINOP_TYPE() DEFINE_BINOP_TYPE(u) -#define DEFINE_UNOP_STRUCT_B(name, u) \ +#define DEFINE_UNOP_STRUCT_B(u, name) \ static const u##UnOp unop_##name = {#name, b##name##_}; -#define DEFINE_UNOP_STRUCT_B_TO_S(name, u) \ +#define DEFINE_UNOP_STRUCT_B_TO_S(u, name) \ static const u##UnOp unop_##name = {#name, b##name##_, s##name##_}; -#define DEFINE_UNOP_STRUCT_B_TO_I(name, u) \ +#define DEFINE_UNOP_STRUCT_B_TO_I(u, name) \ static const u##UnOp unop_##name = {#name, b##name##_, s##name##_, i##name##_}; -#define DEFINE_UNOP_STRUCT_B_TO_L(name, u) \ +#define DEFINE_UNOP_STRUCT_B_TO_L(u, name) \ static const u##UnOp unop_##name = {#name, b##name##_, s##name##_, i##name##_, l##name##_}; -#define DEFINE_UNOP_STRUCT_B_TO_LL(name, u) \ +#define DEFINE_UNOP_STRUCT_B_TO_LL(u, name) \ static const u##UnOp unop_##name = {#name, b##name##_, s##name##_, i##name##_, l##name##_, ll##name##_}; -#define DEFINE_BINOP_STRUCT_B(name, u) \ +#define DEFINE_BINOP_STRUCT_B(u, name) \ static const u##BinOp binop_##name = {#name, b##name##_}; -#define DEFINE_BINOP_STRUCT_B_TO_S(name, u) \ +#define DEFINE_BINOP_STRUCT_B_TO_S(u, name) \ static const u##BinOp binop_##name = {#name, b##name##_, s##name##_}; -#define DEFINE_BINOP_STRUCT_B_TO_I(name, u) \ +#define DEFINE_BINOP_STRUCT_B_TO_I(u, name) \ static const u##BinOp binop_##name = {#name, b##name##_, s##name##_, i##name##_}; -#define DEFINE_BINOP_STRUCT_B_TO_L(name, u) \ +#define DEFINE_BINOP_STRUCT_B_TO_L(u, name) \ static const u##BinOp binop_##name = {#name, b##name##_, s##name##_, i##name##_, l##name##_}; -#define DEFINE_BINOP_STRUCT_B_TO_LL(name, u) \ +#define DEFINE_BINOP_STRUCT_B_TO_LL(u, name) \ static const u##BinOp binop_##name = {#name, b##name##_, s##name##_, i##name##_, l##name##_, ll##name##_}; -#define DEFINE_BINOP_STRUCT_I_TO_L(name, u) \ +#define DEFINE_BINOP_STRUCT_I_TO_L(u, name) \ static const u##BinOp binop_##name = {#name, NULL, NULL, i##name##_, l##name##_}; -#define DEFINE_BINOP_STRUCT_I_TO_LL(name, u) \ +#define DEFINE_BINOP_STRUCT_I_TO_LL(u, name) \ static const u##BinOp binop_##name = {#name, NULL, NULL, i##name##_, l##name##_, ll##name##_}; -#define DEFINE_UNOP_PREFIX_B(op, name, u) \ - DEFINE_UNOP_PREFIX_FUNC_B(op, name, u) \ - DEFINE_UNOP_STRUCT_B(name, u) -#define DEFINE_UNOP_PREFIX_B_TO_S(op, name, u) \ - DEFINE_UNOP_PREFIX_FUNC_B_TO_S(op, name, u) \ - DEFINE_UNOP_STRUCT_B_TO_S(name, u) -#define DEFINE_UNOP_PREFIX_B_TO_I(op, name, u) \ - DEFINE_UNOP_PREFIX_FUNC_B_TO_I(op, name, u) \ - DEFINE_UNOP_STRUCT_B_TO_I(name, u) -#define DEFINE_UNOP_PREFIX_B_TO_L(op, name, u) \ - DEFINE_UNOP_PREFIX_FUNC_B_TO_L(op, name, u) \ - DEFINE_UNOP_STRUCT_B_TO_L(name, u) -#define DEFINE_UNOP_PREFIX_B_TO_LL(op, name, u) \ - DEFINE_UNOP_PREFIX_FUNC_B_TO_LL(op, name, u) \ - DEFINE_UNOP_STRUCT_B_TO_LL(name, u) - -#define DEFINE_BINOP_INFIX_B(op, name, u) \ - DEFINE_BINOP_INFIX_FUNC_B(op, name, u) \ - DEFINE_BINOP_STRUCT_B(name, u) -#define DEFINE_BINOP_INFIX_B_TO_S(op, name, u) \ - DEFINE_BINOP_INFIX_FUNC_B_TO_S(op, name, u) \ - DEFINE_BINOP_STRUCT_B_TO_S(name, u) -#define DEFINE_BINOP_INFIX_B_TO_I(op, name, u) \ - DEFINE_BINOP_INFIX_FUNC_B_TO_I(op, name, u) \ - DEFINE_BINOP_STRUCT_B_TO_I(name, u) -#define DEFINE_BINOP_INFIX_B_TO_L(op, name, u) \ - DEFINE_BINOP_INFIX_FUNC_B_TO_L(op, name, u) \ - DEFINE_BINOP_STRUCT_B_TO_L(name, u) -#define DEFINE_BINOP_INFIX_B_TO_LL(op, name, u) \ - DEFINE_BINOP_INFIX_FUNC_B_TO_LL(op, name, u) \ - DEFINE_BINOP_STRUCT_B_TO_LL(name, u) +#define DEFINE_UNOP_PREFIX_B(u, name, op) \ + DEFINE_UNOP_PREFIX_FUNC_B(u, name, op) \ + DEFINE_UNOP_STRUCT_B(u, name) +#define DEFINE_UNOP_PREFIX_B_TO_S(u, name, op) \ + DEFINE_UNOP_PREFIX_FUNC_B_TO_S(u, name, op) \ + DEFINE_UNOP_STRUCT_B_TO_S(u, name) +#define DEFINE_UNOP_PREFIX_B_TO_I(u, name, op) \ + DEFINE_UNOP_PREFIX_FUNC_B_TO_I(u, name, op) \ + DEFINE_UNOP_STRUCT_B_TO_I(u, name) +#define DEFINE_UNOP_PREFIX_B_TO_L(u, name, op) \ + DEFINE_UNOP_PREFIX_FUNC_B_TO_L(u, name, op) \ + DEFINE_UNOP_STRUCT_B_TO_L(u, name) +#define DEFINE_UNOP_PREFIX_B_TO_LL(u, name, op) \ + DEFINE_UNOP_PREFIX_FUNC_B_TO_LL(u, name, op) \ + DEFINE_UNOP_STRUCT_B_TO_LL(u, name) + +#define DEFINE_BINOP_INFIX_B(u, name, op) \ + DEFINE_BINOP_INFIX_FUNC_B(u, name, op) \ + DEFINE_BINOP_STRUCT_B(u, name) +#define DEFINE_BINOP_INFIX_B_TO_S(u, name, op) \ + DEFINE_BINOP_INFIX_FUNC_B_TO_S(u, name, op) \ + DEFINE_BINOP_STRUCT_B_TO_S(u, name) +#define DEFINE_BINOP_INFIX_B_TO_I(u, name, op) \ + DEFINE_BINOP_INFIX_FUNC_B_TO_I(u, name, op) \ + DEFINE_BINOP_STRUCT_B_TO_I(u, name) +#define DEFINE_BINOP_INFIX_B_TO_L(u, name, op) \ + DEFINE_BINOP_INFIX_FUNC_B_TO_L(u, name, op) \ + DEFINE_BINOP_STRUCT_B_TO_L(u, name) +#define DEFINE_BINOP_INFIX_B_TO_LL(u, name, op) \ + DEFINE_BINOP_INFIX_FUNC_B_TO_LL(u, name, op) \ + DEFINE_BINOP_STRUCT_B_TO_LL(u, name) static void testOp(bool isBinOp, const BinOp *op, int64_t x, int64_t y) @@ -327,47 +327,47 @@ static void testBinOp(const BinOp *op, int64_t x, int64_t y) } -DEFINE_UNOP_PREFIX_B_TO_LL(~, not, ) -DEFINE_UNOP_PREFIX_B_TO_LL(-, neg, ) +DEFINE_UNOP_PREFIX_B_TO_LL( , not, ~) +DEFINE_UNOP_PREFIX_B_TO_LL( , neg, -) -DEFINE_UNOP_PREFIX_FUNC_B_TO_I(abs, abs, ) -DEFINE_UNOP_PREFIX_FUNC_L(labs, abs, ) -DEFINE_UNOP_PREFIX_FUNC_LL(llabs, abs, ) -DEFINE_UNOP_STRUCT_B_TO_LL(abs, ) +DEFINE_UNOP_PREFIX_FUNC_B_TO_I( , abs, abs) +DEFINE_UNOP_PREFIX_FUNC_L( , abs, labs) +DEFINE_UNOP_PREFIX_FUNC_LL( , abs, llabs) +DEFINE_UNOP_STRUCT_B_TO_LL( , abs) -DEFINE_UNOP_PREFIX_FUNC_B(__builtin_bitreverse8, bitrev, ) -DEFINE_UNOP_PREFIX_FUNC_S(__builtin_bitreverse16, bitrev, ) -DEFINE_UNOP_PREFIX_FUNC_I(__builtin_bitreverse24, bitrev, ) -DEFINE_UNOP_PREFIX_FUNC_L(__builtin_bitreverse32, bitrev, ) -DEFINE_UNOP_PREFIX_FUNC_LL(__builtin_bitreverse64, bitrev, ) -DEFINE_UNOP_STRUCT_B_TO_LL(bitrev, ) +DEFINE_UNOP_PREFIX_FUNC_B( , bitrev, __builtin_bitreverse8) +DEFINE_UNOP_PREFIX_FUNC_S( , bitrev, __builtin_bitreverse16) +DEFINE_UNOP_PREFIX_FUNC_I( , bitrev, __builtin_bitreverse24) +DEFINE_UNOP_PREFIX_FUNC_L( , bitrev, __builtin_bitreverse32) +DEFINE_UNOP_PREFIX_FUNC_LL( , bitrev, __builtin_bitreverse64) +DEFINE_UNOP_STRUCT_B_TO_LL( , bitrev) // Needs to be unsigned to avoid extra bits from sign extension -DEFINE_UNOP_PREFIX_FUNC_B_TO_I(__builtin_popcount, popcnt, u) -DEFINE_UNOP_PREFIX_FUNC_L(__builtin_popcountl, popcnt, u) -DEFINE_UNOP_PREFIX_FUNC_LL(__builtin_popcountll, popcnt, u) -DEFINE_UNOP_STRUCT_B_TO_LL(popcnt, u) - -DEFINE_BINOP_INFIX_B_TO_LL(&, and, ) -DEFINE_BINOP_INFIX_B_TO_LL(|, or, ) -DEFINE_BINOP_INFIX_B_TO_LL(^, xor, ) -DEFINE_BINOP_INFIX_B_TO_LL(+, add, ) -DEFINE_BINOP_INFIX_B_TO_LL(-, sub, ) -DEFINE_BINOP_INFIX_B_TO_LL(<<, shl, ) -DEFINE_BINOP_INFIX_B_TO_LL(>>, shru, u) -DEFINE_BINOP_INFIX_B_TO_LL(>>, shrs, ) -DEFINE_BINOP_INFIX_B_TO_LL(*, mulu, u) -DEFINE_BINOP_INFIX_B_TO_LL(/, divu, u) -DEFINE_BINOP_INFIX_B_TO_LL(%, remu, u) -DEFINE_BINOP_INFIX_B_TO_LL(/, divs, ) -DEFINE_BINOP_INFIX_B_TO_LL(%, rems, ) +DEFINE_UNOP_PREFIX_FUNC_B_TO_I(u, popcnt, __builtin_popcount) +DEFINE_UNOP_PREFIX_FUNC_L(u, popcnt, __builtin_popcountl) +DEFINE_UNOP_PREFIX_FUNC_LL(u, popcnt, __builtin_popcountll) +DEFINE_UNOP_STRUCT_B_TO_LL(u, popcnt) + +DEFINE_BINOP_INFIX_B_TO_LL( , and, &) +DEFINE_BINOP_INFIX_B_TO_LL( , or, |) +DEFINE_BINOP_INFIX_B_TO_LL( , xor, ^) +DEFINE_BINOP_INFIX_B_TO_LL( , add, +) +DEFINE_BINOP_INFIX_B_TO_LL( , sub, -) +DEFINE_BINOP_INFIX_B_TO_LL( , shl, <<) +DEFINE_BINOP_INFIX_B_TO_LL(u, shru, >>) +DEFINE_BINOP_INFIX_B_TO_LL( , shrs, >>) +DEFINE_BINOP_INFIX_B_TO_LL(u, mulu, *) +DEFINE_BINOP_INFIX_B_TO_LL(u, divu, /) +DEFINE_BINOP_INFIX_B_TO_LL(u, remu, %) +DEFINE_BINOP_INFIX_B_TO_LL( , divs, /) +DEFINE_BINOP_INFIX_B_TO_LL( , rems, %) DEFINE_BINOP_FUNC_FUNC_I_TO_L( , div_q, div, .quot) -DEFINE_BINOP_STRUCT_I_TO_L(div_q, ) +DEFINE_BINOP_STRUCT_I_TO_L( , div_q) DEFINE_BINOP_FUNC_FUNC_I_TO_L( , div_r, div, .rem) -DEFINE_BINOP_STRUCT_I_TO_L(div_r, ) +DEFINE_BINOP_STRUCT_I_TO_L( , div_r) static const UnOp *unops[] = { From c8e79e58f30994eac425880b93efb3a362b284ed Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Wed, 16 Feb 2022 15:46:59 -0500 Subject: [PATCH 103/142] Slightly optimize __ldivu --- src/std/static/ldivu.src | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/std/static/ldivu.src b/src/std/static/ldivu.src index 9ccd71096..5c35b7bb4 100644 --- a/src/std/static/ldivu.src +++ b/src/std/static/ldivu.src @@ -6,11 +6,11 @@ __ldivu: ; O: euhl=EUHL/AUBC if 1 + push bc + call __ldvrmu + ld a, b - exx - push bc - exx pop bc ret z From fef3723f6df9dd6b06364009e54ba7ba8f19e6c9 Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Wed, 16 Feb 2022 20:38:41 -0500 Subject: [PATCH 104/142] Optimize part of __lldivs for size --- src/std/shared/lldivs.src | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/src/std/shared/lldivs.src b/src/std/shared/lldivs.src index 8ff61dc8f..7b943103c 100644 --- a/src/std/shared/lldivs.src +++ b/src/std/shared/lldivs.src @@ -25,25 +25,23 @@ __lldivs: cp a, $80 jr c, .neg_divisor_skip - push de + push bc push hl + ld c, a - ld de, (iy + 7) - sbc hl, hl - sbc hl, de - ld (iy + 7), hl - ld de, (iy + 10) - ld hl, 0 - sbc hl, de - ld (iy + 10), hl - ld de, (iy + 13) - sbc hl, hl - or a, a - sbc hl, de - ld (iy + 13), hl + lea hl, iy + 7 + ld b, 8 + +.neg_divisor_loop: + ld a, 0 + sbc a, (hl) + ld (hl), a + inc hl + djnz .neg_divisor_loop + ld a, c pop hl - pop de + pop bc .neg_divisor_skip: xor a, b From 04c2db8ecdf8140f9cea85c6c8d51a5aebba72b1 Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Wed, 16 Feb 2022 22:55:54 -0500 Subject: [PATCH 105/142] Optimize llabs slightly --- src/std/shared/llabs.src | 11 ++++++----- src/std/shared/llneg.src | 6 ++++-- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/src/std/shared/llabs.src b/src/std/shared/llabs.src index 6212aceb6..c7c1f0ba7 100644 --- a/src/std/shared/llabs.src +++ b/src/std/shared/llabs.src @@ -11,9 +11,10 @@ _llabs: push bc push de push hl - push iy - bit 7, b - ret z - jp __llneg + ld a, b + or a, a + call m, __llneg.hijack_nc + jp (iy) - extern __llneg + + extern __llneg.hijack_nc diff --git a/src/std/shared/llneg.src b/src/std/shared/llneg.src index 96284ba7e..8ba3a5894 100644 --- a/src/std/shared/llneg.src +++ b/src/std/shared/llneg.src @@ -2,9 +2,11 @@ public __llneg __llneg: - push de ; *spl=UDE - ex de, hl ; ude=UHL, uhl=UDE or a, a + public __llneg.hijack_nc +.hijack_nc: + ex de, hl ; ude=UHL, uhl=UDE + push hl ; *spl=UDE sbc hl, hl ; uhl=0 sbc hl, de ; uhl=-UHL ex (sp), hl ; uhl=UDE, *spl=-UHL From 3603dea8c04dfebad91cef205e12db8b28c86611 Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Wed, 16 Feb 2022 23:45:43 -0500 Subject: [PATCH 106/142] Optimize __lldivs slightly --- src/std/shared/lldivs.src | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/src/std/shared/lldivs.src b/src/std/shared/lldivs.src index 7b943103c..caa07a5d7 100644 --- a/src/std/shared/lldivs.src +++ b/src/std/shared/lldivs.src @@ -18,16 +18,22 @@ __lldivs: push iy push af + sbc a, a + ld iy, 2 add iy, sp - ld a, (iy + 14) - cp a, $80 - jr c, .neg_divisor_skip + ld a, b + jr nz, .xor_divisor_skip + xor a, (iy + 14) +.xor_divisor_skip: + ld (iy + 15), a + + bit 7, (iy + 14) + jr z, .neg_divisor_skip push bc push hl - ld c, a lea hl, iy + 7 ld b, 8 @@ -39,23 +45,12 @@ __lldivs: inc hl djnz .neg_divisor_loop - ld a, c pop hl pop bc .neg_divisor_skip: - xor a, b - ld (iy + 15), a - - pop af - push af - ld a, b - jr nc, .rem_save_sign_skip - ld (iy + 15), a -.rem_save_sign_skip: - - add a, a - call c, __llneg + bit 7, b + call nz, __llneg call __lldvrmu.hijack From 285cd00502175552180edda5de67e2ecb1a81b66 Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Wed, 16 Feb 2022 23:54:49 -0500 Subject: [PATCH 107/142] Fix comments in __lldvrmu --- src/std/shared/lldvrmu.src | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/std/shared/lldvrmu.src b/src/std/shared/lldvrmu.src index ac75062fa..032a96dd2 100644 --- a/src/std/shared/lldvrmu.src +++ b/src/std/shared/lldvrmu.src @@ -17,17 +17,17 @@ __lldvrmu: push de push hl ; Stack frame: -; iy - 17 -> divisor -; iy - 14 -> divisor >> 24 -; iy - 11 -> divisor >> 48 +; iy - 17 -> dividend +; iy - 14 -> dividend >> 24 +; iy - 11 -> dividend >> 48 ; iy - 8 -> interrupts state ; iy - 5 -> return vector ; iy - 2 -> ??? ; iy + 1 -> ??? ; iy + 4 -> ??? -; iy + 7 -> dividend -; iy + 10 -> dividend >> 24 -; iy + 13 -> dividend >> 48 +; iy + 7 -> divisor +; iy + 10 -> divisor >> 24 +; iy + 13 -> divisor >> 48 ld bc, (iy + 10) sbc hl, hl @@ -92,9 +92,9 @@ __lldvrmu: ; iy + 6 -> ??? ; iy + 9 -> ??? ; iy + 12 -> ??? -; iy + 15 -> dividend -; iy + 18 -> dividend >> 24 -; iy + 21 -> dividend >> 48 +; iy + 15 -> quotient +; iy + 18 -> quotient >> 24 +; iy + 21 -> quotient >> 48 push hl exx From 1414ea7cabcb58a35a2e843a0abcfdf63818579c Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Thu, 17 Feb 2022 01:40:42 -0500 Subject: [PATCH 108/142] Implement lldiv and imaxdiv --- src/std/inttypes.h | 5 ++++- src/std/shared/lldiv.src | 46 +++++++++++++++++++++++++++++++++++++++ src/std/shared/lldivs.src | 13 +++++------ src/std/stdlib.h | 7 ++++++ 4 files changed, 63 insertions(+), 8 deletions(-) create mode 100644 src/std/shared/lldiv.src diff --git a/src/std/inttypes.h b/src/std/inttypes.h index 41a8baf89..0e7fce007 100644 --- a/src/std/inttypes.h +++ b/src/std/inttypes.h @@ -175,7 +175,10 @@ __BEGIN_DECLS extern intmax_t imaxabs(intmax_t __n) __NOEXCEPT __attribute__((__const__)); -typedef struct { intmax_t quot, rem; } imaxdiv_t; +struct imaxdiv_t { + intmax_t rem; + intmax_t quot; +}; extern imaxdiv_t imaxdiv(intmax_t __numer, intmax_t __denom) __NOEXCEPT __attribute__((__const__)); diff --git a/src/std/shared/lldiv.src b/src/std/shared/lldiv.src new file mode 100644 index 000000000..6e69d7da8 --- /dev/null +++ b/src/std/shared/lldiv.src @@ -0,0 +1,46 @@ + assume adl=1 + + public _imaxdiv +_imaxdiv: + public _lldiv +_lldiv: + + pop hl + pop iy + ld (iy + 11), hl + pop hl + pop de + pop bc + + call __llrems + + ld (iy), hl + ld (iy + 3), de + ld (iy + 6), bc + + pop hl + pop de + ex (sp), hl + push hl + add hl, hl + pop bc + pop hl + call c, __llneg + + ld (iy + 8), hl + + ld hl, -21 + add hl, sp + ld sp, hl + + ld hl, (iy + 11) + + ld (iy + 11), de + ld (iy + 14), c + ld (iy + 15), b + + jp (hl) + + + extern __llneg + extern __llrems diff --git a/src/std/shared/lldivs.src b/src/std/shared/lldivs.src index caa07a5d7..23f44855b 100644 --- a/src/std/shared/lldivs.src +++ b/src/std/shared/lldivs.src @@ -18,19 +18,17 @@ __lldivs: push iy push af - sbc a, a - ld iy, 2 add iy, sp ld a, b - jr nz, .xor_divisor_skip + ld (iy), a xor a, (iy + 14) -.xor_divisor_skip: ld (iy + 15), a - bit 7, (iy + 14) - jr z, .neg_divisor_skip + xor a, b + rla + jr nc, .neg_divisor_skip push bc push hl @@ -55,13 +53,14 @@ __lldivs: call __lldvrmu.hijack pop af + bit 7, (iy + 8) jr c, .div_quotient_skip ld hl, (iy + 15) ld de, (iy + 18) ld bc, (iy + 21) + bit 7, (iy + 23) .div_quotient_skip: - bit 7, (iy + 23) call nz, __llneg pop iy diff --git a/src/std/stdlib.h b/src/std/stdlib.h index 5aeefa3f7..15d8880bf 100644 --- a/src/std/stdlib.h +++ b/src/std/stdlib.h @@ -13,6 +13,11 @@ typedef struct { long rem; } ldiv_t; +typedef struct { + long long rem; + long long quot; +} lldiv_t; + typedef char __align; union header { struct { @@ -97,6 +102,8 @@ div_t div(int numer, int denom); ldiv_t ldiv(long numer, long denom); +lldiv_t lldiv(long long numer, long long denom); + __END_DECLS #endif /* _STDLIB_H */ From 6346a3734bbeb35c490eebddd2ddd9163140d71c Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Thu, 17 Feb 2022 01:41:26 -0500 Subject: [PATCH 109/142] Test lldiv --- examples/standalone/math_test/src/main.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/examples/standalone/math_test/src/main.c b/examples/standalone/math_test/src/main.c index 68768a14d..9abf40d69 100644 --- a/examples/standalone/math_test/src/main.c +++ b/examples/standalone/math_test/src/main.c @@ -363,11 +363,11 @@ DEFINE_BINOP_INFIX_B_TO_LL( , divs, /) DEFINE_BINOP_INFIX_B_TO_LL( , rems, %) -DEFINE_BINOP_FUNC_FUNC_I_TO_L( , div_q, div, .quot) -DEFINE_BINOP_STRUCT_I_TO_L( , div_q) +DEFINE_BINOP_FUNC_FUNC_I_TO_LL( , div_q, div, .quot) +DEFINE_BINOP_STRUCT_I_TO_LL( , div_q) -DEFINE_BINOP_FUNC_FUNC_I_TO_L( , div_r, div, .rem) -DEFINE_BINOP_STRUCT_I_TO_L( , div_r) +DEFINE_BINOP_FUNC_FUNC_I_TO_LL( , div_r, div, .rem) +DEFINE_BINOP_STRUCT_I_TO_LL( , div_r) static const UnOp *unops[] = { From b9c561af1f467fea084b196e267954bf0c9b61b4 Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Thu, 17 Feb 2022 01:44:17 -0500 Subject: [PATCH 110/142] Add *div results to autotest --- examples/standalone/math_test/autotest.json | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/examples/standalone/math_test/autotest.json b/examples/standalone/math_test/autotest.json index a43d4ac3d..2b8707ddf 100644 --- a/examples/standalone/math_test/autotest.json +++ b/examples/standalone/math_test/autotest.json @@ -44,6 +44,10 @@ "key|enter", "hashWait|rems", "key|enter", + "hashWait|div_q", + "key|enter", + "hashWait|div_r", + "key|enter", "hashWait|done" ], "hashes": { @@ -199,6 +203,22 @@ "F45969EF" ] }, + "div_q": { + "description": "div_q", + "start": "vram_start", + "size": "vram_16_size", + "expected_CRCs": [ + "EAFFC6D3" + ] + }, + "div_r": { + "description": "div_r", + "start": "vram_start", + "size": "vram_16_size", + "expected_CRCs": [ + "B9786654" + ] + }, "done": { "description": "done", "start": "vram_start", From e708b9679a01de2e77131df5e4ef06778b0fc62e Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Thu, 17 Feb 2022 02:32:18 -0500 Subject: [PATCH 111/142] Fix definition of imaxdiv_t --- src/std/inttypes.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/std/inttypes.h b/src/std/inttypes.h index 0e7fce007..61414ea72 100644 --- a/src/std/inttypes.h +++ b/src/std/inttypes.h @@ -175,10 +175,10 @@ __BEGIN_DECLS extern intmax_t imaxabs(intmax_t __n) __NOEXCEPT __attribute__((__const__)); -struct imaxdiv_t { +typedef struct { intmax_t rem; intmax_t quot; -}; +} imaxdiv_t; extern imaxdiv_t imaxdiv(intmax_t __numer, intmax_t __denom) __NOEXCEPT __attribute__((__const__)); From 11c20b8a6740a84331d8bc461de48980b053c1da Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Sat, 19 Feb 2022 15:08:13 -0500 Subject: [PATCH 112/142] Optimize ldiv --- src/std/shared/ldiv.src | 102 +++++++++++++++------------------------- 1 file changed, 38 insertions(+), 64 deletions(-) diff --git a/src/std/shared/ldiv.src b/src/std/shared/ldiv.src index a71a09bdc..1d7d32ad2 100644 --- a/src/std/shared/ldiv.src +++ b/src/std/shared/ldiv.src @@ -1,80 +1,54 @@ -; Copyright (C) 1999-2008 Zilog, Inc, All Rights Reserved -;------------------------------------------------------------------------- -; ldiv function -; -; ldiv_t ldiv(long numer, long denom); -; -;------------------------------------------------------------------------- assume adl=1 public _ldiv _ldiv: - push bc + pop hl + pop iy + ld (iy), hl + pop hl + pop de + pop bc + ex (sp), hl + ld a, l + ex (sp), hl + + xor a, e push af - push de - push hl + xor a, e - ld hl,27 - add hl,sp - ld a,(hl) - dec hl - dec hl - dec hl - ld bc,(hl) ; aBC : denominator - dec hl - dec hl - dec hl - ld e,(hl) - dec hl - dec hl - dec hl - ld hl,(hl) ; eHL: Numerator - call __ldivs push de - push hl ; eHL : quotient - ld hl,27 - add hl,sp - ld e,(hl) - dec hl - dec hl - dec hl - ld hl,(hl) - call __lrems - ld a,e - ex de,hl ;aDE :remainder + bit 7, e + call __ldivs_lrems_common - ld hl,21 - add hl,sp - ld hl,(hl) - push hl + call __ldvrmu - ld bc, 7 - add hl, bc - ld (hl),a - dec hl - dec hl - dec hl - ld (hl),de ; aDE : remainder - - pop hl - pop bc - ld (hl),bc ; bc : 3 LSB of quotient - pop bc - push hl - inc hl - inc hl - inc hl - ld (hl),c ; c : MSB of quotient - pop hl + exx + ld e, a + pop af + call m, __lneg + ld (iy + 7), e + ld (iy + 4), hl - pop de - pop de + exx pop af - pop bc + call m, __lneg + ld (iy + 3), e + ld de, (iy) + ld (iy), hl + push de + push de + push de + push de + push de + bit 2, c + ret z + ei ret - extern __ldivs - extern __lrems + + extern __ldivs_lrems_common + extern __ldvrmu + extern __lneg From fea7ef5add741597f785d12b447c9f79483942ac Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Sat, 19 Feb 2022 18:05:21 -0500 Subject: [PATCH 113/142] Optimize ldiv slightly --- src/std/shared/ldiv.src | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/src/std/shared/ldiv.src b/src/std/shared/ldiv.src index 1d7d32ad2..493313d26 100644 --- a/src/std/shared/ldiv.src +++ b/src/std/shared/ldiv.src @@ -5,18 +5,19 @@ _ldiv: pop hl pop iy - ld (iy), hl - pop hl pop de pop bc + ld a, c + pop bc ex (sp), hl - ld a, l - ex (sp), hl + ex de, hl + ld d, a xor a, e push af - xor a, e + ld a, e + ld e, d push de bit 7, e @@ -28,25 +29,25 @@ _ldiv: ld e, a pop af call m, __lneg - ld (iy + 7), e ld (iy + 4), hl + ld (iy + 7), e exx pop af call m, __lneg - ld (iy + 3), e - ld de, (iy) ld (iy), hl + ld (iy + 3), e - push de + ex (sp), hl push de push de push de push de bit 2, c - ret z + jr z, .ei_skip ei - ret +.ei_skip: + jp (hl) extern __ldivs_lrems_common From 4cdb4ea7a535bb019ec66b9b342ec66b2f2f17a5 Mon Sep 17 00:00:00 2001 From: Matthew Waltz Date: Fri, 4 Mar 2022 19:59:41 -0700 Subject: [PATCH 114/142] add simple strtoll and strtoull Signed-off-by: Matthew Waltz --- src/std/limits.h | 6 +-- src/std/shared/strtoll.c | 97 +++++++++++++++++++++++++++++++++++++++ src/std/shared/strtoull.c | 95 ++++++++++++++++++++++++++++++++++++++ src/std/stdlib.h | 8 ++++ 4 files changed, 203 insertions(+), 3 deletions(-) create mode 100644 src/std/shared/strtoll.c create mode 100644 src/std/shared/strtoull.c diff --git a/src/std/limits.h b/src/std/limits.h index c9a9894a3..917f8559a 100644 --- a/src/std/limits.h +++ b/src/std/limits.h @@ -25,8 +25,8 @@ #define LONG_MAX __LONG_MAX__ #define ULONG_MAX __ULONG_MAX__ -#define LLONG_MIN ~__LLONG_MAX__ -#define LLONG_MAX __LLONG_MAX__ -#define ULLONG_MAX __ULLONG_MAX__ +#define LLONG_MIN 9223372036854775807 +#define LLONG_MAX -9223372036854775807 +#define ULLONG_MAX 18446744073709551615 #endif /* _LIMITS_H */ diff --git a/src/std/shared/strtoll.c b/src/std/shared/strtoll.c new file mode 100644 index 000000000..ce543d598 --- /dev/null +++ b/src/std/shared/strtoll.c @@ -0,0 +1,97 @@ +/************************************************************************/ +/* */ +/* Copyright (C)1987-2008 by */ +/* Zilog, Inc. */ +/* */ +/* San Jose, California */ +/* */ +/************************************************************************/ +#include +#include +#include +#include +#include +#include + +/************************************************* +* +* strtoll - string to long long conversion +* +* Inputs: +* cp - pointer to the character string +* endptr - place to put ptr to first invalid character +* base - radix +* +* Returns: +* the value of the number +* +*************************************************/ +long long strtoll(const char *__restrict nptr, + char **__restrict endptr, int base) +{ + long long sum,psum; + char sign; + int radix = base; + char *cp = (char*)nptr; + char digit; + + while (isspace(*cp)) + ++cp; + + sign = 0; + if ( *cp == (char)'-' ) { + sign = 1; + ++cp; + } + else if ( *cp == (char)'+' ) + ++cp; + + if (base == 0) + { + radix = 10; + if (*cp == (char)'0') + { + if (cp[1] == (char)'x' || cp[1] == (char)'X') + { + radix = 16; + } + else + { + radix = 8; + } + } + } + + if (base == 16 && *cp == (char)'0' && (cp[1] == (char)'x' || cp[1] == (char)'X')) + cp += 2; + + sum = 0; + for (;;) { + digit = toupper(*(cp++)); + if (digit >= (char)'A') + digit = (digit - (char)'A') + (char)10; + else + digit -= (char)'0'; + if (digit < (char)0 || digit >= (char)radix) + break; + psum = sum; + sum *= radix; + sum += digit; + if (sum < psum) { /* overflow */ + sum = sign ? LLONG_MIN : LLONG_MAX; + errno=ERANGE; + break; + } + } + + if (endptr) { + --cp; + if (base == 0) { + while (*cp == (char)'h' || *cp == (char)'u' || + *cp == (char)'l' || *cp == (char)'L') + ++cp; + } + *endptr = (char*)cp; + } + return(sign ? -sum : sum); +} diff --git a/src/std/shared/strtoull.c b/src/std/shared/strtoull.c new file mode 100644 index 000000000..7c921d7a1 --- /dev/null +++ b/src/std/shared/strtoull.c @@ -0,0 +1,95 @@ +/************************************************* + * Copyright (C) 2006-2008 by Zilog, Inc. + * All Rights Reserved + *************************************************/ + +#include +#include +#include +#include +#include +#include + +/************************************************* +* +* strtoull - string to unsigned long long conversion +* +* Inputs: +* nptr - pointer to the character string +* endptr - place to put ptr to first invalid character +* base - radix +* +* Returns: +* the value of the number +* +*************************************************/ +unsigned long long strtoull(const char *__restrict nptr, + char **__restrict endptr, int base) +{ + unsigned long long sum, psum; + unsigned char sign; + unsigned char digit; + unsigned char radix = base; + char *cp = (char*)nptr; + + while (isspace(*cp)) + ++cp; + + sign = 0; + + if ( *cp == '+' ) { + ++cp; + } + else if ( *cp == '-' ) { + ++cp; + sign = 1; + } + + if (base == 0) { + if (*cp == '0') + if (cp[1] == 'x' || cp[1] == 'X') + radix = 16; + else + radix = 8; + else + radix = 10; + } + + /* If the base is declared as 16, the "0x" prefix may be ignored. */ + if (base == 16 && *cp == '0' && (cp[1] == 'x' || cp[1] == 'X')) + cp += 2; + + sum = 0; + for (;;) { + digit = toupper(*(cp++)); + if (digit >= 'A') + digit = digit - ('A' - 10); + else + digit -= '0'; + if (digit >= radix) + break; + psum = sum; + sum = sum * radix + digit; + if (sum < psum) { /* overflow */ + errno = ERANGE; + sum = ULLONG_MAX; + } + } + + if (sign) { + // errno = ERANGE; + // sum = 0; + sum = -sum; + } + + if (endptr) { + --cp; + if (base == 0) { + while (*cp == 'h' || *cp == 'u' || + *cp == 'l' || *cp == 'L') + ++cp; + } + *endptr = (char*)cp; + } + return(sum); +} diff --git a/src/std/stdlib.h b/src/std/stdlib.h index 15d8880bf..9c1c38ab0 100644 --- a/src/std/stdlib.h +++ b/src/std/stdlib.h @@ -62,10 +62,18 @@ double strtod(const char *__restrict nptr, long strtol(const char *__restrict nptr, char **__restrict endptr, int base) __attribute__((nonnull(1))); +long long strtoll(const char *__restrict nptr, + char **__restrict endptr, + int base) __attribute__((nonnull(1))); + unsigned long strtoul(const char *__restrict nptr, char **__restrict endptr, int base) __attribute__((nonnull(1))); +unsigned long long strtoull(const char *__restrict nptr, + char **__restrict endptr, int base) + __attribute__((nonnull(1))); + void srand(unsigned int seed); int rand(void); From b33ebac620db610c84c44877462e2a8bc40b92e5 Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Fri, 4 Mar 2022 21:39:59 -0500 Subject: [PATCH 115/142] `atos` isn't a thing --- src/std/shared/atoi.src | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/std/shared/atoi.src b/src/std/shared/atoi.src index dd51a3560..4e1a948ad 100644 --- a/src/std/shared/atoi.src +++ b/src/std/shared/atoi.src @@ -1,7 +1,6 @@ assume adl=1 - public _atoi, _atol, _atos -_atos := _atoi + public _atoi, _atol _atol := _atoi _atoi: pop bc From 3bd439bd467504ef317b2177a7e065e30ec7ba74 Mon Sep 17 00:00:00 2001 From: Matthew Waltz Date: Fri, 4 Mar 2022 21:10:26 -0700 Subject: [PATCH 116/142] add source files to a section Signed-off-by: Matthew Waltz --- src/std/shared/abs.src | 3 +-- src/std/shared/bdivs.src | 2 ++ src/std/shared/bdivu.src | 2 ++ src/std/shared/bdvrmu.src | 3 +++ src/std/shared/bmuls.src | 17 +---------------- src/std/shared/bmulu.src | 17 +---------------- src/std/shared/brems.src | 2 ++ src/std/shared/bremu.src | 2 ++ src/std/shared/bshrs.src | 3 +++ src/std/shared/clock.src | 2 +- src/std/shared/div.src | 2 +- src/std/shared/frimuls.src | 16 ---------------- src/std/shared/frimulu.src | 16 ---------------- src/std/shared/frsmuls.src | 16 ---------------- src/std/shared/getchar.src | 2 +- src/std/shared/iand_fast.src | 3 +++ src/std/shared/ibitrev_fast.src | 1 + src/std/shared/icmpzero_fast.src | 3 +++ src/std/shared/imulu_fast.src | 4 ++-- src/std/shared/inchar.src | 2 +- src/std/shared/ineg_fast.src | 1 + src/std/shared/inot_fast.src | 1 + src/std/shared/internal_bitrev_byte.src | 3 +++ src/std/shared/ior_fast.src | 3 +++ src/std/shared/isalnum.src | 7 ------- src/std/shared/isalpha.src | 5 ----- src/std/shared/isascii.src | 4 ---- src/std/shared/iscntrl.src | 5 ----- src/std/shared/isdigit.src | 5 ----- src/std/shared/isgraph.src | 6 ------ src/std/shared/ishrs_1_fast.src | 1 + src/std/shared/ishru_1_fast.src | 1 + src/std/shared/islower.src | 6 ------ src/std/shared/isprint.src | 6 ------ src/std/shared/ispunct.src | 6 ------ src/std/shared/isspace.src | 6 ------ src/std/shared/isupper.src | 6 ------ src/std/shared/isxdigit.src | 6 ------ src/std/shared/ixor_fast.src | 3 +++ src/std/shared/labs.src | 1 - src/std/shared/ladd_1.src | 1 + src/std/shared/ladd_b_fast.src | 3 +++ src/std/shared/ladd_fast.src | 3 +++ src/std/shared/land_fast.src | 3 +++ src/std/shared/lcmps_fast.src | 3 +++ src/std/shared/lcmpu_fast.src | 3 +++ src/std/shared/lcmpzero_fast.src | 3 +++ src/std/shared/llabs.src | 4 ++-- src/std/shared/lladd.src | 3 +++ src/std/shared/lladd_1.src | 1 + src/std/shared/lladd_b.src | 3 +++ src/std/shared/lladd_b_fast.src | 3 +++ src/std/shared/lladd_fast.src | 3 +++ src/std/shared/lland.src | 3 +++ src/std/shared/lland_fast.src | 3 +++ src/std/shared/llbitrev.src | 1 + src/std/shared/llcmpu.src | 3 +++ src/std/shared/llcmpu_fast.src | 3 +++ src/std/shared/llcmpzero.src | 3 +++ src/std/shared/llcmpzero_fast.src | 3 +++ src/std/shared/lldiv.src | 6 +++--- src/std/shared/lldivs.src | 1 + src/std/shared/lldivu.src | 1 + src/std/shared/lldivu_b.src | 3 +++ src/std/shared/lldvrmu.src | 1 + src/std/shared/llmulu.src | 4 ++-- src/std/shared/llmulu_b.src | 3 +++ src/std/shared/llneg.src | 1 + src/std/shared/llneg_fast.src | 1 + src/std/shared/llnot.src | 1 + src/std/shared/llnot_fast.src | 1 + src/std/shared/llor.src | 3 +++ src/std/shared/llor_fast.src | 3 +++ src/std/shared/llpopcnt.src | 1 + src/std/shared/llremu.src | 1 + src/std/shared/llshl.src | 3 +++ src/std/shared/llshl_1_fast.src | 1 + src/std/shared/llshrs.src | 3 +++ src/std/shared/llshrs_1_fast.src | 1 + src/std/shared/llshrs_fast.src | 3 +++ src/std/shared/llshru.src | 3 +++ src/std/shared/llshru_1_fast.src | 1 + src/std/shared/llsub.src | 3 +++ src/std/shared/llsub_1.src | 1 + src/std/shared/llsub_fast.src | 3 +++ src/std/shared/llxor.src | 3 +++ src/std/shared/llxor_fast.src | 3 +++ src/std/shared/lmulu_fast.src | 4 ++-- src/std/shared/lneg_fast.src | 1 + src/std/shared/lnot_fast.src | 1 + src/std/shared/lor_fast.src | 3 +++ src/std/shared/lrems.src | 1 + src/std/shared/lshrs_1_fast.src | 1 + src/std/shared/lshru_1_fast.src | 1 + src/std/shared/lsub_1.src | 1 + src/std/shared/lsub_fast.src | 3 +++ src/std/shared/lxor_fast.src | 3 +++ src/std/shared/memset.src | 1 - src/std/shared/outchar.src | 2 +- src/std/shared/putchar.src | 2 +- src/std/shared/puts.src | 2 +- src/std/shared/sand_fast.src | 3 +++ src/std/shared/scmpzero_fast.src | 3 +++ src/std/shared/smulu_fast.src | 6 ++++-- src/std/shared/sneg_fast.src | 3 +++ src/std/shared/snot_fast.src | 3 +++ src/std/shared/sor_fast.src | 3 +++ src/std/shared/spopcnt_fast.src | 1 + src/std/shared/srand.src | 1 - src/std/shared/strncasecmp.src | 1 - src/std/shared/sxor_fast.src | 3 +++ src/std/static/bshl.src | 3 +++ src/std/static/bshru.src | 3 +++ src/std/static/cos.src | 1 - src/std/static/iand.src | 3 +++ src/std/static/icmpzero.src | 3 +++ src/std/static/idivs.src | 1 + src/std/static/idivu.src | 1 + src/std/static/idvrms.src | 1 + src/std/static/idvrmu.src | 1 + src/std/static/imulu.src | 4 ++-- src/std/static/ineg.src | 3 +++ src/std/static/inot.src | 3 +++ src/std/static/ior.src | 3 +++ src/std/static/iremu.src | 1 + src/std/static/ishl.src | 3 +++ src/std/static/ishrs.src | 1 + src/std/static/ishru.src | 1 + src/std/static/ixor.src | 3 +++ src/std/static/ladd.src | 3 +++ src/std/static/ladd_b.src | 3 +++ src/std/static/land.src | 3 +++ src/std/static/lcmps.src | 3 +++ src/std/static/lcmpu.src | 3 +++ src/std/static/lcmpzero.src | 3 +++ src/std/static/ldivs.src | 1 + src/std/static/ldivs_lrems_common.src | 1 + src/std/static/ldivu.src | 1 + src/std/static/ldvrmu.src | 1 + src/std/static/lmulu.src | 4 ++-- src/std/static/lneg.src | 1 + src/std/static/lnot.src | 1 + src/std/static/lor.src | 3 +++ src/std/static/lremu.src | 1 + src/std/static/lshl.src | 3 +++ src/std/static/lshrs.src | 1 + src/std/static/lshru.src | 1 + src/std/static/lsub.src | 3 +++ src/std/static/lxor.src | 3 +++ src/std/static/sand.src | 3 +++ src/std/static/scmpzero.src | 3 +++ src/std/static/smulu.src | 6 ++++-- src/std/static/sneg.src | 3 +++ src/std/static/snot.src | 3 +++ src/std/static/sor.src | 3 +++ src/std/static/sshl.src | 3 +++ src/std/static/sshrs.src | 3 +++ src/std/static/sshru.src | 3 +++ src/std/static/sxor.src | 3 +++ src/std/static/tolower.src | 3 +++ src/std/static/toupper.src | 3 +++ 161 files changed, 307 insertions(+), 181 deletions(-) diff --git a/src/std/shared/abs.src b/src/std/shared/abs.src index 95b7bfc59..43d8826d4 100644 --- a/src/std/shared/abs.src +++ b/src/std/shared/abs.src @@ -1,5 +1,4 @@ -; int abs(int); - assume adl=1 + assume adl=1 section .text public _abs diff --git a/src/std/shared/bdivs.src b/src/std/shared/bdivs.src index 4f076c378..b0ddc39c2 100644 --- a/src/std/shared/bdivs.src +++ b/src/std/shared/bdivs.src @@ -1,3 +1,5 @@ + assume adl=1 + section .text public __bdivs __bdivs: diff --git a/src/std/shared/bdivu.src b/src/std/shared/bdivu.src index f729b8f4f..f9bc91657 100644 --- a/src/std/shared/bdivu.src +++ b/src/std/shared/bdivu.src @@ -1,3 +1,5 @@ + assume adl=1 + section .text public __bdivu __bdivu: diff --git a/src/std/shared/bdvrmu.src b/src/std/shared/bdvrmu.src index b93231478..36963ec2c 100644 --- a/src/std/shared/bdvrmu.src +++ b/src/std/shared/bdvrmu.src @@ -1,3 +1,6 @@ + assume adl=1 + + section .text public __bdvrmu __bdvrmu: ; I: B=dividend, C=divisor diff --git a/src/std/shared/bmuls.src b/src/std/shared/bmuls.src index b84de035b..2f94fa278 100644 --- a/src/std/shared/bmuls.src +++ b/src/std/shared/bmuls.src @@ -1,19 +1,4 @@ -; (c) Copyright 2007-2008 Zilog, Inc. -;------------------------------------------------------------------------- -; Signed Byte Multiply -; Input: -; Operand1: -; A : 8 bit -; -; Operand2: -; B : 8 bit -; -; Output: -; Result: A : 8 bit product -; Registers Used: -; none -;------------------------------------------------------------------------- - assume adl=1 + assume adl=1 section .text public __bmuls diff --git a/src/std/shared/bmulu.src b/src/std/shared/bmulu.src index 55dfa90a9..fa1b2ce66 100644 --- a/src/std/shared/bmulu.src +++ b/src/std/shared/bmulu.src @@ -1,19 +1,4 @@ -; (c) Copyright 2007-2008 Zilog, Inc. -;------------------------------------------------------------------------- -; Unsigned Byte Multiply -; Input: -; Operand1: -; B : 8 bit -; -; Operand2: -; C : 8 bit -; -; Output: -; Result: A : 8 bit product -; Registers Used: -; none -;------------------------------------------------------------------------- - assume adl=1 + assume adl=1 section .text public __bmulu diff --git a/src/std/shared/brems.src b/src/std/shared/brems.src index 620c5672f..d34f8de04 100644 --- a/src/std/shared/brems.src +++ b/src/std/shared/brems.src @@ -1,3 +1,5 @@ + assume adl=1 + section .text public __brems __brems: diff --git a/src/std/shared/bremu.src b/src/std/shared/bremu.src index c8f93aae2..1ad30970f 100644 --- a/src/std/shared/bremu.src +++ b/src/std/shared/bremu.src @@ -1,3 +1,5 @@ + assume adl=1 + section .text public __bremu __bremu: diff --git a/src/std/shared/bshrs.src b/src/std/shared/bshrs.src index 25bf0bf13..8461f9bb8 100644 --- a/src/std/shared/bshrs.src +++ b/src/std/shared/bshrs.src @@ -1,3 +1,6 @@ + assume adl=1 + + section .text public __bshrs __bshrs: ; CC: if B!=0: B*(5*r(PC)+1)+6*r(PC)+(ADL?6*r(SPL)+3*w(SPL):4*r(SPS)+2*w(SPS))+1 diff --git a/src/std/shared/clock.src b/src/std/shared/clock.src index 988d7e0ca..064600998 100644 --- a/src/std/shared/clock.src +++ b/src/std/shared/clock.src @@ -1,7 +1,7 @@ mpTmr2Counter := 0F20010h - assume adl = 1 + assume adl=1 section .text public _clock diff --git a/src/std/shared/div.src b/src/std/shared/div.src index 4872bd5f7..b9a9f4a48 100644 --- a/src/std/shared/div.src +++ b/src/std/shared/div.src @@ -1,4 +1,4 @@ - assume adl=1 + assume adl=1 section .text public _div diff --git a/src/std/shared/frimuls.src b/src/std/shared/frimuls.src index 15cd56ed9..800c38df3 100644 --- a/src/std/shared/frimuls.src +++ b/src/std/shared/frimuls.src @@ -1,19 +1,3 @@ -; (c) Copyright 2004-2008 Zilog, Inc. -;------------------------------------------------------------------------- -; fract interger Multiplication Unsigned. -; -; Input: -; Operand1: -; uHL : 24 bits -; -; Operand2: -; uBC : 24 bits -; -; Output: -; Result: uHL : 24 bit -; Registers Used: -; none -;------------------------------------------------------------------------- assume adl=1 section .text diff --git a/src/std/shared/frimulu.src b/src/std/shared/frimulu.src index e79827382..021730040 100644 --- a/src/std/shared/frimulu.src +++ b/src/std/shared/frimulu.src @@ -1,19 +1,3 @@ -; (c) Copyright 2004-2008 Zilog, Inc. -;------------------------------------------------------------------------- -; fract interger Multiplication Unsigned. -; -; Input: -; Operand1: -; uHL : 24 bits -; -; Operand2: -; uBC : 24 bits -; -; Output: -; Result: uHL : 24 bits -; Registers Used: -; none -;------------------------------------------------------------------------- assume adl=1 section .text diff --git a/src/std/shared/frsmuls.src b/src/std/shared/frsmuls.src index 71198cafc..22da523df 100644 --- a/src/std/shared/frsmuls.src +++ b/src/std/shared/frsmuls.src @@ -1,19 +1,3 @@ -; (c) Copyright 2004-2008 Zilog, Inc. -;------------------------------------------------------------------------- -; fract short Multiplication signed. -; -; Input: -; Operand1: -; HL : 16 bits -; -; Operand2: -; BC : 16 bits -; -; Output: -; Result: HL : 16 bits -; Registers Used: -; none -;------------------------------------------------------------------------- assume adl=1 section .text diff --git a/src/std/shared/getchar.src b/src/std/shared/getchar.src index 11e44fd71..ff9ba0d2a 100644 --- a/src/std/shared/getchar.src +++ b/src/std/shared/getchar.src @@ -1,4 +1,4 @@ - assume adl = 1 + assume adl=1 section .text public _getchar diff --git a/src/std/shared/iand_fast.src b/src/std/shared/iand_fast.src index 0cae57084..b4dbc4fea 100644 --- a/src/std/shared/iand_fast.src +++ b/src/std/shared/iand_fast.src @@ -1,3 +1,6 @@ + assume adl=1 + + section .text public __iand_fast __iand_fast: push bc diff --git a/src/std/shared/ibitrev_fast.src b/src/std/shared/ibitrev_fast.src index 0c64c4529..e1b508bf3 100644 --- a/src/std/shared/ibitrev_fast.src +++ b/src/std/shared/ibitrev_fast.src @@ -1,5 +1,6 @@ assume adl=1 + section .text public __ibitrev_fast __ibitrev_fast: add hl, hl diff --git a/src/std/shared/icmpzero_fast.src b/src/std/shared/icmpzero_fast.src index a40f8ee80..2e044e743 100644 --- a/src/std/shared/icmpzero_fast.src +++ b/src/std/shared/icmpzero_fast.src @@ -1,3 +1,6 @@ + assume adl=1 + + section .text public __icmpzero_fast __icmpzero_fast: xor a, a diff --git a/src/std/shared/imulu_fast.src b/src/std/shared/imulu_fast.src index 99ee2750f..cd015c232 100644 --- a/src/std/shared/imulu_fast.src +++ b/src/std/shared/imulu_fast.src @@ -1,8 +1,8 @@ assume adl=1 - public __imuls_fast + section .text + public __imuls_fast, __imulu_fast __imuls_fast: - public __imulu_fast __imulu_fast: ; Multiplies UHL by UBC and returns the 24-bit product uhl. diff --git a/src/std/shared/inchar.src b/src/std/shared/inchar.src index cc2f51b1f..fda121fca 100644 --- a/src/std/shared/inchar.src +++ b/src/std/shared/inchar.src @@ -1,4 +1,4 @@ - assume adl = 1 + assume adl=1 section .text weak _inchar diff --git a/src/std/shared/ineg_fast.src b/src/std/shared/ineg_fast.src index 8caaa34b1..0527ff957 100644 --- a/src/std/shared/ineg_fast.src +++ b/src/std/shared/ineg_fast.src @@ -1,5 +1,6 @@ assume adl=1 + section .text public __ineg_fast __ineg_fast: add hl, de ; uhl=UHL+UDE diff --git a/src/std/shared/inot_fast.src b/src/std/shared/inot_fast.src index b8b743557..3c32bf605 100644 --- a/src/std/shared/inot_fast.src +++ b/src/std/shared/inot_fast.src @@ -1,5 +1,6 @@ assume adl=1 + section .text public __inot_fast __inot_fast: add hl, de ; uhl=UHL+UDE diff --git a/src/std/shared/internal_bitrev_byte.src b/src/std/shared/internal_bitrev_byte.src index d7139c231..3677ddb9b 100644 --- a/src/std/shared/internal_bitrev_byte.src +++ b/src/std/shared/internal_bitrev_byte.src @@ -1,3 +1,6 @@ + assume adl=1 + + section .text public __internal_bitrev_byte __internal_bitrev_byte: add hl, hl diff --git a/src/std/shared/ior_fast.src b/src/std/shared/ior_fast.src index cd115bdb8..65288f73e 100644 --- a/src/std/shared/ior_fast.src +++ b/src/std/shared/ior_fast.src @@ -1,3 +1,6 @@ + assume adl=1 + + section .text public __ior_fast __ior_fast: push bc diff --git a/src/std/shared/isalnum.src b/src/std/shared/isalnum.src index b75c79dfa..1048b0347 100644 --- a/src/std/shared/isalnum.src +++ b/src/std/shared/isalnum.src @@ -1,10 +1,3 @@ -; original source -; int isalnum(int c) { -; if( (unsigned char)c & (unsigned char)0x80 ) -; return (0); -; return(__maptab[c] & (unsigned char)(UC|LC|DIG)); -; } - assume adl=1 section .text diff --git a/src/std/shared/isalpha.src b/src/std/shared/isalpha.src index 6901bf5f6..071a98d38 100644 --- a/src/std/shared/isalpha.src +++ b/src/std/shared/isalpha.src @@ -1,8 +1,3 @@ -; original source -; int isalnum(int c) { -; if( (unsigned char)c & (unsigned char)0x80 ) -; return (0); -; return(__maptab[c] & (unsigned char)(UC|LC)); assume adl=1 section .text diff --git a/src/std/shared/isascii.src b/src/std/shared/isascii.src index 6c3d9a5f3..4211f43b5 100644 --- a/src/std/shared/isascii.src +++ b/src/std/shared/isascii.src @@ -1,7 +1,3 @@ -; original source -; int isascii(int c) { -; return c >= 0 && c < 128; -; } assume adl=1 section .text diff --git a/src/std/shared/iscntrl.src b/src/std/shared/iscntrl.src index a4dc9c0dd..3d027c9f9 100644 --- a/src/std/shared/iscntrl.src +++ b/src/std/shared/iscntrl.src @@ -1,8 +1,3 @@ -;int iscntrl(int c) { -; if( (unsigned char)c & (unsigned char)0x80 ) -; return (0); -; return(__maptab[c] & (unsigned char)(CTL)); -;} assume adl=1 section .text diff --git a/src/std/shared/isdigit.src b/src/std/shared/isdigit.src index ca5cdc79b..9d37b03aa 100644 --- a/src/std/shared/isdigit.src +++ b/src/std/shared/isdigit.src @@ -1,8 +1,3 @@ -;int isdigit(int c) { -; if( (unsigned char)c & (unsigned char)0x80 ) -; return (0); -; return(__maptab[c] & (unsigned char)(DIG)); -;} assume adl=1 section .text diff --git a/src/std/shared/isgraph.src b/src/std/shared/isgraph.src index 32b9dcfb1..839783ecf 100644 --- a/src/std/shared/isgraph.src +++ b/src/std/shared/isgraph.src @@ -1,9 +1,3 @@ -; original source -; int isgraph(int c) { -; if( (unsigned char)c & (unsigned char)0x80 ) -; return (0); -; return(__maptab[c] & (unsigned char)(UC|LC|DIG|PUN)); -; } assume adl=1 section .text diff --git a/src/std/shared/ishrs_1_fast.src b/src/std/shared/ishrs_1_fast.src index 1f6e4a15f..b5c676d8a 100644 --- a/src/std/shared/ishrs_1_fast.src +++ b/src/std/shared/ishrs_1_fast.src @@ -1,5 +1,6 @@ assume adl=1 + section .text public __ishru_1_fast __ishru_1_fast: push hl diff --git a/src/std/shared/ishru_1_fast.src b/src/std/shared/ishru_1_fast.src index 5fac4f94c..c68dbc8b3 100644 --- a/src/std/shared/ishru_1_fast.src +++ b/src/std/shared/ishru_1_fast.src @@ -1,5 +1,6 @@ assume adl=1 + section .text public __ishru_1_fast __ishru_1_fast: push hl diff --git a/src/std/shared/islower.src b/src/std/shared/islower.src index 92afe2cb8..864dec6d9 100644 --- a/src/std/shared/islower.src +++ b/src/std/shared/islower.src @@ -1,9 +1,3 @@ -; original source -; int islower(int c) { -; if( (unsigned char)c & (unsigned char)0x80 ) -; return (0); -; return(__maptab[c] & (unsigned char)(LC)); -; } assume adl=1 section .text diff --git a/src/std/shared/isprint.src b/src/std/shared/isprint.src index 748c7666a..5a0aec5a0 100644 --- a/src/std/shared/isprint.src +++ b/src/std/shared/isprint.src @@ -1,9 +1,3 @@ -; original source -; int isprint(int c) { -; if( (unsigned char)c & (unsigned char)0x80 ) -; return (0); -; return(__maptab[c] & (unsigned char)(UC|LC|DIG|PUN|SP)); -; } assume adl=1 section .text diff --git a/src/std/shared/ispunct.src b/src/std/shared/ispunct.src index 34174a8e4..ccc542fa5 100644 --- a/src/std/shared/ispunct.src +++ b/src/std/shared/ispunct.src @@ -1,9 +1,3 @@ -; original source -; int ispunct(int c) { -; if( (unsigned char)c & (unsigned char)0x80 ) -; return (0); -; return(__maptab[c] & (unsigned char)(PUN)); -; } assume adl=1 section .text diff --git a/src/std/shared/isspace.src b/src/std/shared/isspace.src index ee8282d30..b8face4db 100644 --- a/src/std/shared/isspace.src +++ b/src/std/shared/isspace.src @@ -1,9 +1,3 @@ -; original source -; int isspace(int c) { -; if( (unsigned char)c & (unsigned char)0x80 ) -; return (0); -; return(__maptab[c] & (unsigned char)(WS)); -; } assume adl=1 section .text diff --git a/src/std/shared/isupper.src b/src/std/shared/isupper.src index 8ce4d0815..2de883b3c 100644 --- a/src/std/shared/isupper.src +++ b/src/std/shared/isupper.src @@ -1,9 +1,3 @@ -; original source -; int isupper(int c) { -; if( (unsigned char)c & (unsigned char)0x80 ) -; return (0); -; return(__maptab[c] & (unsigned char)(UC)); -; } assume adl=1 section .text diff --git a/src/std/shared/isxdigit.src b/src/std/shared/isxdigit.src index 884bc9f42..d5485ce15 100644 --- a/src/std/shared/isxdigit.src +++ b/src/std/shared/isxdigit.src @@ -1,9 +1,3 @@ -; original source -; int isxdigit(int c) { -; if( (unsigned char)c & (unsigned char)0x80 ) -; return (0); -; return(__maptab[c] & (unsigned char)(DIG|HEX)); -; } assume adl=1 section .text diff --git a/src/std/shared/ixor_fast.src b/src/std/shared/ixor_fast.src index b906049de..36a90a3e9 100644 --- a/src/std/shared/ixor_fast.src +++ b/src/std/shared/ixor_fast.src @@ -1,3 +1,6 @@ + assume adl=1 + + section .text public __ixor_fast __ixor_fast: push bc diff --git a/src/std/shared/labs.src b/src/std/shared/labs.src index 1a7285883..c12cd8ebd 100644 --- a/src/std/shared/labs.src +++ b/src/std/shared/labs.src @@ -1,4 +1,3 @@ -; long labs(long); assume adl=1 section .text diff --git a/src/std/shared/ladd_1.src b/src/std/shared/ladd_1.src index 597ecee1e..01833819a 100644 --- a/src/std/shared/ladd_1.src +++ b/src/std/shared/ladd_1.src @@ -1,5 +1,6 @@ assume adl=1 + section .text public __ladd_1 __ladd_1: inc hl diff --git a/src/std/shared/ladd_b_fast.src b/src/std/shared/ladd_b_fast.src index 27f30108c..17f93486b 100644 --- a/src/std/shared/ladd_b_fast.src +++ b/src/std/shared/ladd_b_fast.src @@ -1,3 +1,6 @@ + assume adl=1 + + section .text public __ladd_b_fast __ladd_b_fast: add a, l diff --git a/src/std/shared/ladd_fast.src b/src/std/shared/ladd_fast.src index 4974ef002..247fedc0a 100644 --- a/src/std/shared/ladd_fast.src +++ b/src/std/shared/ladd_fast.src @@ -1,3 +1,6 @@ + assume adl=1 + + section .text public __ladd_fast __ladd_fast: add hl, bc diff --git a/src/std/shared/land_fast.src b/src/std/shared/land_fast.src index 4e92c6e8e..c26514514 100644 --- a/src/std/shared/land_fast.src +++ b/src/std/shared/land_fast.src @@ -1,3 +1,6 @@ + assume adl=1 + + section .text public __land_fast __land_fast: and a, e diff --git a/src/std/shared/lcmps_fast.src b/src/std/shared/lcmps_fast.src index 3875dc17c..2ec9531e5 100644 --- a/src/std/shared/lcmps_fast.src +++ b/src/std/shared/lcmps_fast.src @@ -1,3 +1,6 @@ + assume adl=1 + + section .text public __lcmps_fast __lcmps_fast: ld d, a diff --git a/src/std/shared/lcmpu_fast.src b/src/std/shared/lcmpu_fast.src index 9158a1d37..6ef89132e 100644 --- a/src/std/shared/lcmpu_fast.src +++ b/src/std/shared/lcmpu_fast.src @@ -1,3 +1,6 @@ + assume adl=1 + + section .text public __lcmpu_fast __lcmpu_fast: sub a, e diff --git a/src/std/shared/lcmpzero_fast.src b/src/std/shared/lcmpzero_fast.src index 0efee8eb2..75d2eb24f 100644 --- a/src/std/shared/lcmpzero_fast.src +++ b/src/std/shared/lcmpzero_fast.src @@ -1,3 +1,6 @@ + assume adl=1 + + section .text public __lcmpzero_fast __lcmpzero_fast: xor a, a diff --git a/src/std/shared/llabs.src b/src/std/shared/llabs.src index c7c1f0ba7..4a48520d7 100644 --- a/src/std/shared/llabs.src +++ b/src/std/shared/llabs.src @@ -1,8 +1,8 @@ assume adl=1 - public _imaxabs + section .text + public _imaxabs, _llabs _imaxabs: - public _llabs _llabs: pop iy pop hl diff --git a/src/std/shared/lladd.src b/src/std/shared/lladd.src index 11891a6d9..71e215ed5 100644 --- a/src/std/shared/lladd.src +++ b/src/std/shared/lladd.src @@ -1,3 +1,6 @@ + assume adl=1 + + section .text public __lladd __lladd: push iy diff --git a/src/std/shared/lladd_1.src b/src/std/shared/lladd_1.src index 4a9cfcba1..402f5bbb7 100644 --- a/src/std/shared/lladd_1.src +++ b/src/std/shared/lladd_1.src @@ -1,5 +1,6 @@ assume adl=1 + section .text public __lladd_1 __lladd_1: inc hl diff --git a/src/std/shared/lladd_b.src b/src/std/shared/lladd_b.src index 742699bab..2003f5d71 100644 --- a/src/std/shared/lladd_b.src +++ b/src/std/shared/lladd_b.src @@ -1,3 +1,6 @@ + assume adl=1 + + section .text public __lladd_b __lladd_b: push bc diff --git a/src/std/shared/lladd_b_fast.src b/src/std/shared/lladd_b_fast.src index f73597653..864a0132f 100644 --- a/src/std/shared/lladd_b_fast.src +++ b/src/std/shared/lladd_b_fast.src @@ -1,3 +1,6 @@ + assume adl=1 + + section .text public __lladd_b_fast __lladd_b_fast: add a, l ; a=L+A diff --git a/src/std/shared/lladd_fast.src b/src/std/shared/lladd_fast.src index 0b15e0b82..a73610681 100644 --- a/src/std/shared/lladd_fast.src +++ b/src/std/shared/lladd_fast.src @@ -1,3 +1,6 @@ + assume adl=1 + + section .text public __lladd_fast __lladd_fast: ld iy, 0 diff --git a/src/std/shared/lland.src b/src/std/shared/lland.src index ce6ff5a75..608f848af 100644 --- a/src/std/shared/lland.src +++ b/src/std/shared/lland.src @@ -1,3 +1,6 @@ + assume adl=1 + + section .text public __lland __lland: ; CC: 60*r(PC)+22*r(SPL)+11*w(SPL)+1 diff --git a/src/std/shared/lland_fast.src b/src/std/shared/lland_fast.src index bad06317f..dec0f7560 100644 --- a/src/std/shared/lland_fast.src +++ b/src/std/shared/lland_fast.src @@ -1,3 +1,6 @@ + assume adl=1 + + section .text public __lland_fast __lland_fast: ; CC: 56*r(PC)+19*r(SPL)+8*w(SPL)+1 diff --git a/src/std/shared/llbitrev.src b/src/std/shared/llbitrev.src index b4a7010a2..16696b1a5 100644 --- a/src/std/shared/llbitrev.src +++ b/src/std/shared/llbitrev.src @@ -1,5 +1,6 @@ assume adl=1 + section .text public __llbitrev __llbitrev: push af ; stack: 0AF diff --git a/src/std/shared/llcmpu.src b/src/std/shared/llcmpu.src index 0a1a0500b..b2fb9884a 100644 --- a/src/std/shared/llcmpu.src +++ b/src/std/shared/llcmpu.src @@ -1,3 +1,6 @@ + assume adl=1 + + section .text public __llcmpu __llcmpu: push iy diff --git a/src/std/shared/llcmpu_fast.src b/src/std/shared/llcmpu_fast.src index 5d8f97c44..7223329ed 100644 --- a/src/std/shared/llcmpu_fast.src +++ b/src/std/shared/llcmpu_fast.src @@ -1,3 +1,6 @@ + assume adl=1 + + section .text public __llcmpu_fast __llcmpu_fast: ld iy, 0 diff --git a/src/std/shared/llcmpzero.src b/src/std/shared/llcmpzero.src index 77a81340d..6c3b369d1 100644 --- a/src/std/shared/llcmpzero.src +++ b/src/std/shared/llcmpzero.src @@ -1,3 +1,6 @@ + assume adl=1 + + section .text public __llcmpzero __llcmpzero: inc b diff --git a/src/std/shared/llcmpzero_fast.src b/src/std/shared/llcmpzero_fast.src index 165b3119a..44ed2d1ae 100644 --- a/src/std/shared/llcmpzero_fast.src +++ b/src/std/shared/llcmpzero_fast.src @@ -1,3 +1,6 @@ + assume adl=1 + + section .text public __llcmpzero_fast __llcmpzero_fast: xor a, a diff --git a/src/std/shared/lldiv.src b/src/std/shared/lldiv.src index 6e69d7da8..9a49bb01c 100644 --- a/src/std/shared/lldiv.src +++ b/src/std/shared/lldiv.src @@ -1,8 +1,8 @@ - assume adl=1 + assume adl=1 - public _imaxdiv + section .text + public _imaxdiv, _lldiv _imaxdiv: - public _lldiv _lldiv: pop hl diff --git a/src/std/shared/lldivs.src b/src/std/shared/lldivs.src index 23f44855b..06d35456e 100644 --- a/src/std/shared/lldivs.src +++ b/src/std/shared/lldivs.src @@ -1,5 +1,6 @@ assume adl=1 + section .text public __llrems __llrems: diff --git a/src/std/shared/lldivu.src b/src/std/shared/lldivu.src index d3e1ad2ed..53d1e0043 100644 --- a/src/std/shared/lldivu.src +++ b/src/std/shared/lldivu.src @@ -1,5 +1,6 @@ assume adl=1 + section .text public __lldivu __lldivu: diff --git a/src/std/shared/lldivu_b.src b/src/std/shared/lldivu_b.src index 9650e355b..9dcd5780d 100644 --- a/src/std/shared/lldivu_b.src +++ b/src/std/shared/lldivu_b.src @@ -1,3 +1,6 @@ + assume adl=1 + + section .text public __lldivu_b __lldivu_b: ; Very slow diff --git a/src/std/shared/lldvrmu.src b/src/std/shared/lldvrmu.src index 032a96dd2..0e31c87ac 100644 --- a/src/std/shared/lldvrmu.src +++ b/src/std/shared/lldvrmu.src @@ -1,5 +1,6 @@ assume adl=1 + section .text public __lldvrmu __lldvrmu: ; Atrociously slow. diff --git a/src/std/shared/llmulu.src b/src/std/shared/llmulu.src index 298cb7a04..f48d80887 100644 --- a/src/std/shared/llmulu.src +++ b/src/std/shared/llmulu.src @@ -1,8 +1,8 @@ assume adl=1 - public __llmuls + section .text + public __llmuls, __llmulu __llmuls: - public __llmulu __llmulu: ; Really slow diff --git a/src/std/shared/llmulu_b.src b/src/std/shared/llmulu_b.src index 1dcf64224..a2c0b9fd3 100644 --- a/src/std/shared/llmulu_b.src +++ b/src/std/shared/llmulu_b.src @@ -1,3 +1,6 @@ + assume adl=1 + + section .text public __llmulu_b __llmulu_b: push af diff --git a/src/std/shared/llneg.src b/src/std/shared/llneg.src index 8ba3a5894..bf13f5dfa 100644 --- a/src/std/shared/llneg.src +++ b/src/std/shared/llneg.src @@ -1,5 +1,6 @@ assume adl=1 + section .text public __llneg __llneg: or a, a diff --git a/src/std/shared/llneg_fast.src b/src/std/shared/llneg_fast.src index c754d5825..bd4373850 100644 --- a/src/std/shared/llneg_fast.src +++ b/src/std/shared/llneg_fast.src @@ -1,5 +1,6 @@ assume adl=1 + section .text public __llneg_fast __llneg_fast: push de ; *spl=UDE diff --git a/src/std/shared/llnot.src b/src/std/shared/llnot.src index d90200ae4..d2151e07f 100644 --- a/src/std/shared/llnot.src +++ b/src/std/shared/llnot.src @@ -1,5 +1,6 @@ assume adl=1 + section .text public __llnot __llnot: push hl ; *spl=UHL diff --git a/src/std/shared/llnot_fast.src b/src/std/shared/llnot_fast.src index 410411ce6..9b7ce9465 100644 --- a/src/std/shared/llnot_fast.src +++ b/src/std/shared/llnot_fast.src @@ -1,5 +1,6 @@ assume adl=1 + section .text public __llnot_fast __llnot_fast: push hl ; *spl=UHL diff --git a/src/std/shared/llor.src b/src/std/shared/llor.src index 369c5d1ca..ba649c740 100644 --- a/src/std/shared/llor.src +++ b/src/std/shared/llor.src @@ -1,3 +1,6 @@ + assume adl=1 + + section .text public __llor __llor: ; CC: 60*r(PC)+22*r(SPL)+11*w(SPL)+1 diff --git a/src/std/shared/llor_fast.src b/src/std/shared/llor_fast.src index 0b15d7d30..ad2bbc3b8 100644 --- a/src/std/shared/llor_fast.src +++ b/src/std/shared/llor_fast.src @@ -1,3 +1,6 @@ + assume adl=1 + + section .text public __llor_fast __llor_fast: ; CC: 56*r(PC)+19*r(SPL)+8*w(SPL)+1 diff --git a/src/std/shared/llpopcnt.src b/src/std/shared/llpopcnt.src index 363db5754..cc6425748 100644 --- a/src/std/shared/llpopcnt.src +++ b/src/std/shared/llpopcnt.src @@ -1,5 +1,6 @@ assume adl=1 + section .text public __llpopcnt __llpopcnt: call __lpopcnt diff --git a/src/std/shared/llremu.src b/src/std/shared/llremu.src index 9e4e8e38d..431799162 100644 --- a/src/std/shared/llremu.src +++ b/src/std/shared/llremu.src @@ -1,5 +1,6 @@ assume adl=1 + section .text public __llremu __llremu: diff --git a/src/std/shared/llshl.src b/src/std/shared/llshl.src index 2940aea4b..1e7f91ef5 100644 --- a/src/std/shared/llshl.src +++ b/src/std/shared/llshl.src @@ -1,3 +1,6 @@ + assume adl=1 + + section .text public __llshl __llshl: ; Suboptimal for large shift amounts diff --git a/src/std/shared/llshl_1_fast.src b/src/std/shared/llshl_1_fast.src index 15bc25601..6cb8a0fa3 100644 --- a/src/std/shared/llshl_1_fast.src +++ b/src/std/shared/llshl_1_fast.src @@ -1,5 +1,6 @@ assume adl=1 + section .text public __llshl_1_fast __llshl_1_fast: add hl, hl diff --git a/src/std/shared/llshrs.src b/src/std/shared/llshrs.src index bc7cea7a0..da6cfb832 100644 --- a/src/std/shared/llshrs.src +++ b/src/std/shared/llshrs.src @@ -1,3 +1,6 @@ + assume adl=1 + + section .text public __llshrs __llshrs: ; Suboptimal for large shift amounts diff --git a/src/std/shared/llshrs_1_fast.src b/src/std/shared/llshrs_1_fast.src index cfc2d8785..76452de02 100644 --- a/src/std/shared/llshrs_1_fast.src +++ b/src/std/shared/llshrs_1_fast.src @@ -1,5 +1,6 @@ assume adl=1 + section .text public __llshrs_1_fast __llshrs_1_fast: push hl diff --git a/src/std/shared/llshrs_fast.src b/src/std/shared/llshrs_fast.src index 7cb4134a7..c2edb05e7 100644 --- a/src/std/shared/llshrs_fast.src +++ b/src/std/shared/llshrs_fast.src @@ -1,3 +1,6 @@ + assume adl=1 + + section .text public __llshrs_fast __llshrs_fast: ; Not well-optimized, but it probably works diff --git a/src/std/shared/llshru.src b/src/std/shared/llshru.src index 15fdb323b..94cc373bb 100644 --- a/src/std/shared/llshru.src +++ b/src/std/shared/llshru.src @@ -1,3 +1,6 @@ + assume adl=1 + + section .text public __llshru __llshru: ; Suboptimal for large shift amounts diff --git a/src/std/shared/llshru_1_fast.src b/src/std/shared/llshru_1_fast.src index f175fed56..6af6e39fd 100644 --- a/src/std/shared/llshru_1_fast.src +++ b/src/std/shared/llshru_1_fast.src @@ -1,5 +1,6 @@ assume adl=1 + section .text public __llshru_1_fast __llshru_1_fast: push hl diff --git a/src/std/shared/llsub.src b/src/std/shared/llsub.src index 7005ae965..bbdca06a2 100644 --- a/src/std/shared/llsub.src +++ b/src/std/shared/llsub.src @@ -1,3 +1,6 @@ + assume adl=1 + + section .text public __llsub __llsub: push iy diff --git a/src/std/shared/llsub_1.src b/src/std/shared/llsub_1.src index 27b6a73b3..d21d768fd 100644 --- a/src/std/shared/llsub_1.src +++ b/src/std/shared/llsub_1.src @@ -1,5 +1,6 @@ assume adl=1 + section .text public __llsub_1 __llsub_1: add hl, de diff --git a/src/std/shared/llsub_fast.src b/src/std/shared/llsub_fast.src index 1d83cd036..374a5a3b7 100644 --- a/src/std/shared/llsub_fast.src +++ b/src/std/shared/llsub_fast.src @@ -1,3 +1,6 @@ + assume adl=1 + + section .text public __llsub_fast __llsub_fast: ld iy, 0 diff --git a/src/std/shared/llxor.src b/src/std/shared/llxor.src index 7d1f48432..c574e6747 100644 --- a/src/std/shared/llxor.src +++ b/src/std/shared/llxor.src @@ -1,3 +1,6 @@ + assume adl=1 + + section .text public __llxor __llxor: ; CC: 60*r(PC)+22*r(SPL)+11*w(SPL)+1 diff --git a/src/std/shared/llxor_fast.src b/src/std/shared/llxor_fast.src index cdae789b2..26ce685a4 100644 --- a/src/std/shared/llxor_fast.src +++ b/src/std/shared/llxor_fast.src @@ -1,3 +1,6 @@ + assume adl=1 + + section .text public __llxor_fast __llxor_fast: ; CC: 56*r(PC)+19*r(SPL)+8*w(SPL)+1 diff --git a/src/std/shared/lmulu_fast.src b/src/std/shared/lmulu_fast.src index 4d827a4da..59b1e40a4 100644 --- a/src/std/shared/lmulu_fast.src +++ b/src/std/shared/lmulu_fast.src @@ -1,8 +1,8 @@ assume adl=1 - public __lmuls_fast + section .text + public __lmuls_fast, __lmulu_fast __lmuls_fast: - public __lmulu_fast __lmulu_fast: ; Multiplies EUHL by AUBC and returns the 32-bit product euhl. diff --git a/src/std/shared/lneg_fast.src b/src/std/shared/lneg_fast.src index a9dc7c315..7c5d9dad9 100644 --- a/src/std/shared/lneg_fast.src +++ b/src/std/shared/lneg_fast.src @@ -1,5 +1,6 @@ assume adl=1 + section .text public __lneg_fast __lneg_fast: ld c, e ; c=E diff --git a/src/std/shared/lnot_fast.src b/src/std/shared/lnot_fast.src index 4f91b9508..cf3d20d08 100644 --- a/src/std/shared/lnot_fast.src +++ b/src/std/shared/lnot_fast.src @@ -1,5 +1,6 @@ assume adl=1 + section .text public __lnot_fast __lnot_fast: ld a, e ; a=E diff --git a/src/std/shared/lor_fast.src b/src/std/shared/lor_fast.src index a824bfe32..f8d8d8db4 100644 --- a/src/std/shared/lor_fast.src +++ b/src/std/shared/lor_fast.src @@ -1,3 +1,6 @@ + assume adl=1 + + section .text public __lor_fast __lor_fast: or a, e diff --git a/src/std/shared/lrems.src b/src/std/shared/lrems.src index e8656a05d..5b8f8f386 100644 --- a/src/std/shared/lrems.src +++ b/src/std/shared/lrems.src @@ -1,5 +1,6 @@ assume adl=1 + section .text public __lrems __lrems: ; I: EUHL=dividend, AUBC=divisor diff --git a/src/std/shared/lshrs_1_fast.src b/src/std/shared/lshrs_1_fast.src index 1105217ca..f5a3d85a6 100644 --- a/src/std/shared/lshrs_1_fast.src +++ b/src/std/shared/lshrs_1_fast.src @@ -1,5 +1,6 @@ assume adl=1 + section .text public __lshru_1_fast __lshru_1_fast: push hl diff --git a/src/std/shared/lshru_1_fast.src b/src/std/shared/lshru_1_fast.src index edb931a22..86c81891e 100644 --- a/src/std/shared/lshru_1_fast.src +++ b/src/std/shared/lshru_1_fast.src @@ -1,5 +1,6 @@ assume adl=1 + section .text public __lshru_1_fast __lshru_1_fast: push hl diff --git a/src/std/shared/lsub_1.src b/src/std/shared/lsub_1.src index e4db59b63..61a0d8ee0 100644 --- a/src/std/shared/lsub_1.src +++ b/src/std/shared/lsub_1.src @@ -1,5 +1,6 @@ assume adl=1 + section .text public __lsub_1 __lsub_1: add hl, bc diff --git a/src/std/shared/lsub_fast.src b/src/std/shared/lsub_fast.src index 12e5824a8..2e8776118 100644 --- a/src/std/shared/lsub_fast.src +++ b/src/std/shared/lsub_fast.src @@ -1,3 +1,6 @@ + assume adl=1 + + section .text public __lsub_fast __lsub_fast: or a, a diff --git a/src/std/shared/lxor_fast.src b/src/std/shared/lxor_fast.src index f47c219fe..6dae8c1d8 100644 --- a/src/std/shared/lxor_fast.src +++ b/src/std/shared/lxor_fast.src @@ -1,3 +1,6 @@ + assume adl=1 + + section .text public __lxor_fast __lxor_fast: xor a, e diff --git a/src/std/shared/memset.src b/src/std/shared/memset.src index f6ff66fa1..6e2dff1d0 100644 --- a/src/std/shared/memset.src +++ b/src/std/shared/memset.src @@ -1,4 +1,3 @@ -; void *memset(void *dest, int c, size_t count); assume adl=1 section .text diff --git a/src/std/shared/outchar.src b/src/std/shared/outchar.src index 62b6efd6b..81ef54839 100644 --- a/src/std/shared/outchar.src +++ b/src/std/shared/outchar.src @@ -1,4 +1,4 @@ - assume adl = 1 + assume adl=1 section .text weak _outchar diff --git a/src/std/shared/putchar.src b/src/std/shared/putchar.src index ec1e0ec6c..8cbb72c58 100644 --- a/src/std/shared/putchar.src +++ b/src/std/shared/putchar.src @@ -1,4 +1,4 @@ - assume adl = 1 + assume adl=1 section .text public _putchar diff --git a/src/std/shared/puts.src b/src/std/shared/puts.src index b4691396e..0dc70d5c1 100644 --- a/src/std/shared/puts.src +++ b/src/std/shared/puts.src @@ -1,4 +1,4 @@ - assume adl = 1 + assume adl=1 section .text public _puts diff --git a/src/std/shared/sand_fast.src b/src/std/shared/sand_fast.src index bf4cf05a1..78d91ee15 100644 --- a/src/std/shared/sand_fast.src +++ b/src/std/shared/sand_fast.src @@ -1,3 +1,6 @@ + assume adl=1 + + section .text public __sand_fast __sand_fast: ld a, h diff --git a/src/std/shared/scmpzero_fast.src b/src/std/shared/scmpzero_fast.src index 8a46474b6..635c42a24 100644 --- a/src/std/shared/scmpzero_fast.src +++ b/src/std/shared/scmpzero_fast.src @@ -1,3 +1,6 @@ + assume adl=1 + + section .text public __scmpzero_fast __scmpzero_fast: xor a, a diff --git a/src/std/shared/smulu_fast.src b/src/std/shared/smulu_fast.src index 4a1c286a4..434997af9 100644 --- a/src/std/shared/smulu_fast.src +++ b/src/std/shared/smulu_fast.src @@ -1,6 +1,8 @@ - public __smuls_fast + assume adl=1 + + section .text + public __smuls_fast, __smulu_fast __smuls_fast: - public __smulu_fast __smulu_fast: ; Multiplies HL by BC and returns the 16-bit product hl. diff --git a/src/std/shared/sneg_fast.src b/src/std/shared/sneg_fast.src index 09f9d05d1..dac70fdd7 100644 --- a/src/std/shared/sneg_fast.src +++ b/src/std/shared/sneg_fast.src @@ -1,3 +1,6 @@ + assume adl=1 + + section .text public __sneg_fast __sneg_fast := __ineg_fast diff --git a/src/std/shared/snot_fast.src b/src/std/shared/snot_fast.src index 8856b658b..6ebdb78cf 100644 --- a/src/std/shared/snot_fast.src +++ b/src/std/shared/snot_fast.src @@ -1,3 +1,6 @@ + assume adl=1 + + section .text public __snot_fast __snot_fast := __inot_fast diff --git a/src/std/shared/sor_fast.src b/src/std/shared/sor_fast.src index d10f20acf..0ccb26a81 100644 --- a/src/std/shared/sor_fast.src +++ b/src/std/shared/sor_fast.src @@ -1,3 +1,6 @@ + assume adl=1 + + section .text public __sor_fast __sor_fast: ld a, h diff --git a/src/std/shared/spopcnt_fast.src b/src/std/shared/spopcnt_fast.src index 57d75e86e..ad7685838 100644 --- a/src/std/shared/spopcnt_fast.src +++ b/src/std/shared/spopcnt_fast.src @@ -1,5 +1,6 @@ assume adl=1 + section .text public __spopcnt_fast __spopcnt_fast: push hl diff --git a/src/std/shared/srand.src b/src/std/shared/srand.src index e0a95ebb4..ba7313bfc 100644 --- a/src/std/shared/srand.src +++ b/src/std/shared/srand.src @@ -1,4 +1,3 @@ -; void srand(unsigned int seed); assume adl=1 section .text diff --git a/src/std/shared/strncasecmp.src b/src/std/shared/strncasecmp.src index 1c05b27c3..74d0880f1 100644 --- a/src/std/shared/strncasecmp.src +++ b/src/std/shared/strncasecmp.src @@ -1,4 +1,3 @@ -; int strncasecmp ( const char *str1 , const char *str2 , int len ); assume adl=1 section .text diff --git a/src/std/shared/sxor_fast.src b/src/std/shared/sxor_fast.src index 09551cdec..beeb04b0c 100644 --- a/src/std/shared/sxor_fast.src +++ b/src/std/shared/sxor_fast.src @@ -1,3 +1,6 @@ + assume adl=1 + + section .text public __sxor_fast __sxor_fast: ld a, h diff --git a/src/std/static/bshl.src b/src/std/static/bshl.src index 7bae8e816..851efb589 100644 --- a/src/std/static/bshl.src +++ b/src/std/static/bshl.src @@ -1,3 +1,6 @@ + assume adl=1 + + section .text public __bshl __bshl: ; CC: if B!=0: B*(4*r(PC)+1)+6*r(PC)+(ADL?6*r(SPL)+3*w(SPL):4*r(SPS)+2*w(SPS))+1 diff --git a/src/std/static/bshru.src b/src/std/static/bshru.src index 817d5ddcc..e7976e6b9 100644 --- a/src/std/static/bshru.src +++ b/src/std/static/bshru.src @@ -1,3 +1,6 @@ + assume adl=1 + + section .text public __bshru __bshru: ; CC: if B!=0: B*(5*r(PC)+1)+6*r(PC)+(ADL?6*r(SPL)+3*w(SPL):4*r(SPS)+2*w(SPS))+1 diff --git a/src/std/static/cos.src b/src/std/static/cos.src index 84b2579a6..b46b685c0 100644 --- a/src/std/static/cos.src +++ b/src/std/static/cos.src @@ -1,4 +1,3 @@ -; double cos(double arg); assume adl=1 section .text diff --git a/src/std/static/iand.src b/src/std/static/iand.src index 863eee6cc..294895f2c 100644 --- a/src/std/static/iand.src +++ b/src/std/static/iand.src @@ -1,3 +1,6 @@ + assume adl=1 + + section .text public __iand __iand: push af diff --git a/src/std/static/icmpzero.src b/src/std/static/icmpzero.src index eba3a276e..6d01b2bb0 100644 --- a/src/std/static/icmpzero.src +++ b/src/std/static/icmpzero.src @@ -1,3 +1,6 @@ + assume adl=1 + + section .text public __icmpzero __icmpzero: push bc diff --git a/src/std/static/idivs.src b/src/std/static/idivs.src index 2213867b0..b6b97d407 100644 --- a/src/std/static/idivs.src +++ b/src/std/static/idivs.src @@ -1,5 +1,6 @@ assume adl=1 + section .text public __idivs __idivs: ; I: UHL=dividend, UBC=divisor diff --git a/src/std/static/idivu.src b/src/std/static/idivu.src index a077372e3..8a6e9d9e6 100644 --- a/src/std/static/idivu.src +++ b/src/std/static/idivu.src @@ -1,5 +1,6 @@ assume adl=1 + section .text public __idivu __idivu: ; I: UHL=dividend, UBC=divisor diff --git a/src/std/static/idvrms.src b/src/std/static/idvrms.src index ebd251fc1..c135d5b50 100644 --- a/src/std/static/idvrms.src +++ b/src/std/static/idvrms.src @@ -1,5 +1,6 @@ assume adl=1 + section .text public __idvrms __idvrms: ; I: UHL=dividend, UBC=divisor diff --git a/src/std/static/idvrmu.src b/src/std/static/idvrmu.src index a5d6b96b5..9b3d45694 100644 --- a/src/std/static/idvrmu.src +++ b/src/std/static/idvrmu.src @@ -1,5 +1,6 @@ assume adl=1 + section .text public __idvrmu __idvrmu: ; I: UHL=dividend, UBC=divisor diff --git a/src/std/static/imulu.src b/src/std/static/imulu.src index 764304e53..f52ec8843 100644 --- a/src/std/static/imulu.src +++ b/src/std/static/imulu.src @@ -1,8 +1,8 @@ assume adl=1 - public __imuls + section .text + public __imuls, __imulu __imuls: - public __imulu __imulu: ; Multiplies UHL by UBC and returns the 24-bit product uhl. diff --git a/src/std/static/ineg.src b/src/std/static/ineg.src index 2f78ba0be..dec4d057a 100644 --- a/src/std/static/ineg.src +++ b/src/std/static/ineg.src @@ -1,3 +1,6 @@ + assume adl=1 + + section .text public __ineg __ineg: if 0 diff --git a/src/std/static/inot.src b/src/std/static/inot.src index eb8b5dd4f..e133ee514 100644 --- a/src/std/static/inot.src +++ b/src/std/static/inot.src @@ -1,3 +1,6 @@ + assume adl=1 + + section .text public __inot __inot: if 0 diff --git a/src/std/static/ior.src b/src/std/static/ior.src index 8f94e8b3c..9fc3e3d40 100644 --- a/src/std/static/ior.src +++ b/src/std/static/ior.src @@ -1,3 +1,6 @@ + assume adl=1 + + section .text public __ior __ior: push af diff --git a/src/std/static/iremu.src b/src/std/static/iremu.src index 2851493f8..46c091e8f 100644 --- a/src/std/static/iremu.src +++ b/src/std/static/iremu.src @@ -1,5 +1,6 @@ assume adl=1 + section .text public __iremu __iremu: ; I: UHL=dividend, UBC=divisor diff --git a/src/std/static/ishl.src b/src/std/static/ishl.src index 91822d8a1..2dc3749f5 100644 --- a/src/std/static/ishl.src +++ b/src/std/static/ishl.src @@ -1,3 +1,6 @@ + assume adl=1 + + section .text public __ishl __ishl: ; Suboptimal for large shift amounts diff --git a/src/std/static/ishrs.src b/src/std/static/ishrs.src index a242e0a26..9e623e560 100644 --- a/src/std/static/ishrs.src +++ b/src/std/static/ishrs.src @@ -1,5 +1,6 @@ assume adl=1 + section .text public __ishrs __ishrs: ; Suboptimal for large shift amounts diff --git a/src/std/static/ishru.src b/src/std/static/ishru.src index e0b1c8d14..bc92ad1a5 100644 --- a/src/std/static/ishru.src +++ b/src/std/static/ishru.src @@ -1,5 +1,6 @@ assume adl=1 + section .text public __ishru __ishru: ; Suboptimal for large shift amounts diff --git a/src/std/static/ixor.src b/src/std/static/ixor.src index 58c76548f..637d5f021 100644 --- a/src/std/static/ixor.src +++ b/src/std/static/ixor.src @@ -1,3 +1,6 @@ + assume adl=1 + + section .text public __ixor __ixor: push af diff --git a/src/std/static/ladd.src b/src/std/static/ladd.src index 79cfaae73..5cfb14e4f 100644 --- a/src/std/static/ladd.src +++ b/src/std/static/ladd.src @@ -1,3 +1,6 @@ + assume adl=1 + + section .text public __ladd __ladd: push af diff --git a/src/std/static/ladd_b.src b/src/std/static/ladd_b.src index 232f13bfc..51f34a8cb 100644 --- a/src/std/static/ladd_b.src +++ b/src/std/static/ladd_b.src @@ -1,3 +1,6 @@ + assume adl=1 + + section .text public __ladd_b __ladd_b: push bc diff --git a/src/std/static/land.src b/src/std/static/land.src index ccd1ef8d4..b75886bb5 100644 --- a/src/std/static/land.src +++ b/src/std/static/land.src @@ -1,3 +1,6 @@ + assume adl=1 + + section .text public __land __land: push af diff --git a/src/std/static/lcmps.src b/src/std/static/lcmps.src index 3983a59a5..925a049b7 100644 --- a/src/std/static/lcmps.src +++ b/src/std/static/lcmps.src @@ -1,3 +1,6 @@ + assume adl=1 + + section .text public __lcmps __lcmps: if 1 diff --git a/src/std/static/lcmpu.src b/src/std/static/lcmpu.src index 5abcc3a44..af4ca875a 100644 --- a/src/std/static/lcmpu.src +++ b/src/std/static/lcmpu.src @@ -1,3 +1,6 @@ + assume adl=1 + + section .text public __lcmpu __lcmpu: cp a, e diff --git a/src/std/static/lcmpzero.src b/src/std/static/lcmpzero.src index 31b4ad26b..372ce9269 100644 --- a/src/std/static/lcmpzero.src +++ b/src/std/static/lcmpzero.src @@ -1,3 +1,6 @@ + assume adl=1 + + section .text public __lcmpzero __lcmpzero: inc e diff --git a/src/std/static/ldivs.src b/src/std/static/ldivs.src index 3e9add9ad..28312d862 100644 --- a/src/std/static/ldivs.src +++ b/src/std/static/ldivs.src @@ -1,5 +1,6 @@ assume adl=1 + section .text public __ldivs __ldivs: ; I: EUHL=dividend, AUBC=divisor diff --git a/src/std/static/ldivs_lrems_common.src b/src/std/static/ldivs_lrems_common.src index fdbca5a2f..771f9903e 100644 --- a/src/std/static/ldivs_lrems_common.src +++ b/src/std/static/ldivs_lrems_common.src @@ -1,5 +1,6 @@ assume adl=1 + section .text public __ldivs_lrems_common __ldivs_lrems_common: ; I: ZF=EUHL>=0 diff --git a/src/std/static/ldivu.src b/src/std/static/ldivu.src index 5c35b7bb4..9c9446028 100644 --- a/src/std/static/ldivu.src +++ b/src/std/static/ldivu.src @@ -1,5 +1,6 @@ assume adl=1 + section .text public __ldivu __ldivu: ; I: EUHL=dividend, AUBC=divisor diff --git a/src/std/static/ldvrmu.src b/src/std/static/ldvrmu.src index 6a2639310..a798757b3 100644 --- a/src/std/static/ldvrmu.src +++ b/src/std/static/ldvrmu.src @@ -1,5 +1,6 @@ assume adl=1 + section .text ;;; struct u32div_t { ;;; uint32_t rem; ;;; uint32_t quot; diff --git a/src/std/static/lmulu.src b/src/std/static/lmulu.src index fb199fd60..371343b36 100644 --- a/src/std/static/lmulu.src +++ b/src/std/static/lmulu.src @@ -1,8 +1,8 @@ assume adl=1 - public __lmuls + section .text + public __lmuls, __lmulu __lmuls: - public __lmulu __lmulu: ; Multiplies EUHL by AUBC and returns the 32-bit product euhl. diff --git a/src/std/static/lneg.src b/src/std/static/lneg.src index 5d903ca3f..de7718ce3 100644 --- a/src/std/static/lneg.src +++ b/src/std/static/lneg.src @@ -1,5 +1,6 @@ assume adl=1 + section .text public __lneg __lneg: push af diff --git a/src/std/static/lnot.src b/src/std/static/lnot.src index 91797f4bb..04341a3fd 100644 --- a/src/std/static/lnot.src +++ b/src/std/static/lnot.src @@ -1,5 +1,6 @@ assume adl=1 + section .text public __lnot __lnot: push af diff --git a/src/std/static/lor.src b/src/std/static/lor.src index abbcf929e..314b7f818 100644 --- a/src/std/static/lor.src +++ b/src/std/static/lor.src @@ -1,3 +1,6 @@ + assume adl=1 + + section .text public __lor __lor: push af diff --git a/src/std/static/lremu.src b/src/std/static/lremu.src index a2993f5d0..b866be210 100644 --- a/src/std/static/lremu.src +++ b/src/std/static/lremu.src @@ -1,5 +1,6 @@ assume adl=1 + section .text public __lremu __lremu: ; I: EUHL=dividend, AUBC=divisor diff --git a/src/std/static/lshl.src b/src/std/static/lshl.src index c17767998..fef7f3734 100644 --- a/src/std/static/lshl.src +++ b/src/std/static/lshl.src @@ -1,3 +1,6 @@ + assume adl=1 + + section .text public __lshl __lshl: ; Suboptimal for large shift amounts diff --git a/src/std/static/lshrs.src b/src/std/static/lshrs.src index 4e2b56819..4c8d5adff 100644 --- a/src/std/static/lshrs.src +++ b/src/std/static/lshrs.src @@ -1,5 +1,6 @@ assume adl=1 + section .text public __lshrs __lshrs: ; Suboptimal for large shift amounts diff --git a/src/std/static/lshru.src b/src/std/static/lshru.src index 39c3826df..8fd765b9e 100644 --- a/src/std/static/lshru.src +++ b/src/std/static/lshru.src @@ -1,5 +1,6 @@ assume adl=1 + section .text public __lshru __lshru: ; Suboptimal for large shift amounts diff --git a/src/std/static/lsub.src b/src/std/static/lsub.src index 6f8f2abfa..b18b06e6f 100644 --- a/src/std/static/lsub.src +++ b/src/std/static/lsub.src @@ -1,3 +1,6 @@ + assume adl=1 + + section .text public __lsub __lsub: push af diff --git a/src/std/static/lxor.src b/src/std/static/lxor.src index 6b0cfbb8d..2e6555735 100644 --- a/src/std/static/lxor.src +++ b/src/std/static/lxor.src @@ -1,3 +1,6 @@ + assume adl=1 + + section .text public __lxor __lxor: push af diff --git a/src/std/static/sand.src b/src/std/static/sand.src index 89d91fa04..643a9e706 100644 --- a/src/std/static/sand.src +++ b/src/std/static/sand.src @@ -1,3 +1,6 @@ + assume adl=1 + + section .text public __sand __sand: push af diff --git a/src/std/static/scmpzero.src b/src/std/static/scmpzero.src index 2adf837d4..d72098b69 100644 --- a/src/std/static/scmpzero.src +++ b/src/std/static/scmpzero.src @@ -1,3 +1,6 @@ + assume adl=1 + + section .text public __scmpzero __scmpzero: add hl, bc diff --git a/src/std/static/smulu.src b/src/std/static/smulu.src index 9f604a156..0b3767889 100644 --- a/src/std/static/smulu.src +++ b/src/std/static/smulu.src @@ -1,6 +1,8 @@ - public __smuls + assume adl=1 + + section .text + public __smuls, __smulu __smuls: - public __smulu __smulu: ; Multiplies HL by BC and returns the 16-bit product hl. diff --git a/src/std/static/sneg.src b/src/std/static/sneg.src index 75dcd9d4e..3d63da803 100644 --- a/src/std/static/sneg.src +++ b/src/std/static/sneg.src @@ -1,3 +1,6 @@ + assume adl=1 + + section .text public __sneg __sneg := __ineg diff --git a/src/std/static/snot.src b/src/std/static/snot.src index 7aa8935a3..f46a13485 100644 --- a/src/std/static/snot.src +++ b/src/std/static/snot.src @@ -1,3 +1,6 @@ + assume adl=1 + + section .text public __snot __snot := __inot diff --git a/src/std/static/sor.src b/src/std/static/sor.src index 4c8087e0b..354125f0d 100644 --- a/src/std/static/sor.src +++ b/src/std/static/sor.src @@ -1,3 +1,6 @@ + assume adl=1 + + section .text public __sor __sor: push af diff --git a/src/std/static/sshl.src b/src/std/static/sshl.src index 7ae40984f..4300b4d22 100644 --- a/src/std/static/sshl.src +++ b/src/std/static/sshl.src @@ -1,3 +1,6 @@ + assume adl=1 + + section .text public __sshl __sshl := __ishl diff --git a/src/std/static/sshrs.src b/src/std/static/sshrs.src index 8f8696845..cafd166da 100644 --- a/src/std/static/sshrs.src +++ b/src/std/static/sshrs.src @@ -1,3 +1,6 @@ + assume adl=1 + + section .text public __sshrs __sshrs: ; Suboptimal for large shift amounts diff --git a/src/std/static/sshru.src b/src/std/static/sshru.src index a27e5440d..379e56c8a 100644 --- a/src/std/static/sshru.src +++ b/src/std/static/sshru.src @@ -1,3 +1,6 @@ + assume adl=1 + + section .text public __sshru __sshru: ; Suboptimal for large shift amounts diff --git a/src/std/static/sxor.src b/src/std/static/sxor.src index cd1acb351..21a9d4041 100644 --- a/src/std/static/sxor.src +++ b/src/std/static/sxor.src @@ -1,3 +1,6 @@ + assume adl=1 + + section .text public __sxor __sxor: push af diff --git a/src/std/static/tolower.src b/src/std/static/tolower.src index 99a280054..ff8fc5d49 100644 --- a/src/std/static/tolower.src +++ b/src/std/static/tolower.src @@ -1,3 +1,6 @@ + assume adl=1 + + section .text public _tolower _tolower: pop de diff --git a/src/std/static/toupper.src b/src/std/static/toupper.src index b71288115..ddfba1900 100644 --- a/src/std/static/toupper.src +++ b/src/std/static/toupper.src @@ -1,3 +1,6 @@ + assume adl=1 + + section .text public _toupper _toupper: pop de From d721e3402bd9ee15795529b7d9b7ade384dc31fb Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Fri, 4 Mar 2022 23:48:41 -0500 Subject: [PATCH 117/142] Add a missing section --- src/std/static/irems.src | 1 + 1 file changed, 1 insertion(+) diff --git a/src/std/static/irems.src b/src/std/static/irems.src index dfed236ae..fed933a7c 100644 --- a/src/std/static/irems.src +++ b/src/std/static/irems.src @@ -1,5 +1,6 @@ assume adl=1 + section .text public __irems __irems: ; I: UHL=dividend, UBC=divisor From 3e5848538ffaef1c16dff02b01c7fc682f97302d Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Sun, 6 Mar 2022 22:01:05 -0500 Subject: [PATCH 118/142] Optimize lcmps --- src/std/shared/lcmps_fast.src | 4 ---- src/std/static/lcmps.src | 23 ----------------------- 2 files changed, 27 deletions(-) diff --git a/src/std/shared/lcmps_fast.src b/src/std/shared/lcmps_fast.src index 2ec9531e5..aa3fcb1d5 100644 --- a/src/std/shared/lcmps_fast.src +++ b/src/std/shared/lcmps_fast.src @@ -8,8 +8,4 @@ __lcmps_fast: sub a, d ret nz sbc hl, bc - ret z - sbc a, a - ret nz - inc a ret diff --git a/src/std/static/lcmps.src b/src/std/static/lcmps.src index 925a049b7..b842d57a3 100644 --- a/src/std/static/lcmps.src +++ b/src/std/static/lcmps.src @@ -3,7 +3,6 @@ section .text public __lcmps __lcmps: -if 1 push de ld d, a ld a, e @@ -11,28 +10,6 @@ if 1 jr nz, .finish sbc hl, bc add hl, bc - jr z, .finish - sbc a, a - scf - adc a, a -.finish: ld a, d pop de ret -else - push hl - or a, a - sbc hl, bc - ld l, a - ld a, e - jr z, .eq24 - sbc a, l - jr nz, .finish - inc a -.eq24: - sbc a, l -.finish: - ld a, l - pop hl - ret -end if From c2ae8af0460ae2bd0f3cf7b8f64af4562fdad628 Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Sun, 6 Mar 2022 23:21:24 -0500 Subject: [PATCH 119/142] Optimize scmpzero, icmpzero, lcmpzero, and llcmpzero Requires llvm-project commit ecd4dd5e54173991aa1ca3737f92b7d5324a6e09. --- src/std/linked/linked.src | 6 ----- src/std/{static => shared}/icmpzero.src | 6 ++--- src/std/shared/icmpzero_fast.src | 10 --------- src/std/{static => shared}/lcmpzero.src | 10 +-------- src/std/shared/lcmpzero_fast.src | 13 ----------- src/std/shared/llcmpzero.src | 30 +++++++++---------------- src/std/shared/llcmpzero_fast.src | 6 ++--- src/std/{static => shared}/scmpzero.src | 10 ++------- src/std/shared/scmpzero_fast.src | 10 --------- 9 files changed, 19 insertions(+), 82 deletions(-) rename src/std/{static => shared}/icmpzero.src (65%) delete mode 100644 src/std/shared/icmpzero_fast.src rename src/std/{static => shared}/lcmpzero.src (61%) delete mode 100644 src/std/shared/lcmpzero_fast.src rename src/std/{static => shared}/scmpzero.src (51%) delete mode 100644 src/std/shared/scmpzero_fast.src diff --git a/src/std/linked/linked.src b/src/std/linked/linked.src index d1a490045..e64352919 100644 --- a/src/std/linked/linked.src +++ b/src/std/linked/linked.src @@ -62,8 +62,6 @@ __bshru := 000104h ; short (16-bit) math public __sand __sand := 000200h - public __scmpzero -__scmpzero := 000204h public __smuls __smuls := 000224h public __smulu @@ -86,8 +84,6 @@ __sxor := 000268h ; int (24-bit) math public __iand __iand := 000134h - public __icmpzero -__icmpzero := 000138h public __idivs __idivs := 00013Ch public __idivu @@ -128,8 +124,6 @@ __land := 0001A4h __lcmps := 0001A8h public __lcmpu __lcmpu := 0001ACh - public __lcmpzero -__lcmpzero := 0001B0h public __ldivs __ldivs := 0001B4h public __ldivu diff --git a/src/std/static/icmpzero.src b/src/std/shared/icmpzero.src similarity index 65% rename from src/std/static/icmpzero.src rename to src/std/shared/icmpzero.src index 6d01b2bb0..286355bf0 100644 --- a/src/std/static/icmpzero.src +++ b/src/std/shared/icmpzero.src @@ -3,9 +3,7 @@ section .text public __icmpzero __icmpzero: - push bc - ld bc, 0 + add hl, de or a, a - sbc hl, bc - pop bc + sbc hl, de ret diff --git a/src/std/shared/icmpzero_fast.src b/src/std/shared/icmpzero_fast.src deleted file mode 100644 index 2e044e743..000000000 --- a/src/std/shared/icmpzero_fast.src +++ /dev/null @@ -1,10 +0,0 @@ - assume adl=1 - - section .text - public __icmpzero_fast -__icmpzero_fast: - xor a, a - ld c, a - mlt bc - sbc hl, bc - ret diff --git a/src/std/static/lcmpzero.src b/src/std/shared/lcmpzero.src similarity index 61% rename from src/std/static/lcmpzero.src rename to src/std/shared/lcmpzero.src index 372ce9269..11d15d16a 100644 --- a/src/std/static/lcmpzero.src +++ b/src/std/shared/lcmpzero.src @@ -5,16 +5,8 @@ __lcmpzero: inc e dec e - jr nz, .e_nz + ret nz add hl, bc or a, a sbc hl, bc - ret z - inc e - dec de - ret -.e_nz: - ret po - dec e - inc e ret diff --git a/src/std/shared/lcmpzero_fast.src b/src/std/shared/lcmpzero_fast.src deleted file mode 100644 index 75d2eb24f..000000000 --- a/src/std/shared/lcmpzero_fast.src +++ /dev/null @@ -1,13 +0,0 @@ - assume adl=1 - - section .text - public __lcmpzero_fast -__lcmpzero_fast: - xor a, a - add a, e - ret nz - mlt de - sbc hl, de - ret p - inc e - ret diff --git a/src/std/shared/llcmpzero.src b/src/std/shared/llcmpzero.src index 6c3b369d1..5b2b19b44 100644 --- a/src/std/shared/llcmpzero.src +++ b/src/std/shared/llcmpzero.src @@ -4,26 +4,18 @@ public __llcmpzero __llcmpzero: inc b - djnz .b_nz + dec b + ret nz + inc.s bc dec c - inc c - jr nz, .c_nz - mlt bc - adc hl, bc - jr nz, .udeuhl_nz - adc hl, de + jr nz, .p_nz + or a, a + sbc hl, bc + jr nz, .p_nz + sbc hl, de ret z -.udeuhl_nz: - inc c - dec bc - ret -.b_nz: - ret po - dec b + add hl, de +.p_nz: inc b - ret -.c_nz: - ret p - inc c - dec c + ld b, 0 ret diff --git a/src/std/shared/llcmpzero_fast.src b/src/std/shared/llcmpzero_fast.src index 44ed2d1ae..d68547aac 100644 --- a/src/std/shared/llcmpzero_fast.src +++ b/src/std/shared/llcmpzero_fast.src @@ -7,12 +7,12 @@ __llcmpzero_fast: add a, b ret nz cp a, c - jr nz, .nz + jr nz, .p_nz mlt bc adc hl, bc - jr nz, .nz + jr nz, .p_nz adc hl, de ret z -.nz: +.p_nz: inc a ret diff --git a/src/std/static/scmpzero.src b/src/std/shared/scmpzero.src similarity index 51% rename from src/std/static/scmpzero.src rename to src/std/shared/scmpzero.src index d72098b69..4fc7510d8 100644 --- a/src/std/static/scmpzero.src +++ b/src/std/shared/scmpzero.src @@ -3,13 +3,7 @@ section .text public __scmpzero __scmpzero: - add hl, bc + add hl, de or a, a - sbc.s hl, bc - ret po - inc h - dec h - ret po - dec h - inc h + sbc.s hl, de ret diff --git a/src/std/shared/scmpzero_fast.src b/src/std/shared/scmpzero_fast.src deleted file mode 100644 index 635c42a24..000000000 --- a/src/std/shared/scmpzero_fast.src +++ /dev/null @@ -1,10 +0,0 @@ - assume adl=1 - - section .text - public __scmpzero_fast -__scmpzero_fast: - xor a, a - ld c, a - ld b, a - sbc.s hl, bc - ret From a3f9ef22b59368a18ae622b8a0add3f6267b07dd Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Sun, 6 Mar 2022 23:52:01 -0500 Subject: [PATCH 120/142] Fix llcmpu --- src/std/shared/llcmpu.src | 1 + 1 file changed, 1 insertion(+) diff --git a/src/std/shared/llcmpu.src b/src/std/shared/llcmpu.src index b2fb9884a..d98aff645 100644 --- a/src/std/shared/llcmpu.src +++ b/src/std/shared/llcmpu.src @@ -17,6 +17,7 @@ __llcmpu: push bc ld bc, (iy + 6) sbc hl, bc + add hl, bc pop bc pop iy ret From f8f54c2e99d15901059dca539236d2a405691bc1 Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Mon, 7 Mar 2022 00:00:35 -0500 Subject: [PATCH 121/142] Implement llcmps --- src/std/shared/llcmps.src | 29 +++++++++++++++++++++++++++++ src/std/shared/llcmpu.src | 8 ++++---- src/std/shared/llcmpu_fast.src | 2 ++ 3 files changed, 35 insertions(+), 4 deletions(-) create mode 100644 src/std/shared/llcmps.src diff --git a/src/std/shared/llcmps.src b/src/std/shared/llcmps.src new file mode 100644 index 000000000..42fbf4687 --- /dev/null +++ b/src/std/shared/llcmps.src @@ -0,0 +1,29 @@ + assume adl=1 + + section .text + public __llcmps +__llcmps: + push iy + ld iy, 0 + add iy, sp + push bc + push hl + ld l, c + ld h, b + ld bc, (iy + 12) + sbc.s hl, bc + pop hl + jr nz, .ne + ld bc, (iy + 9) + ex de, hl + sbc hl, de + add hl, de + ex de, hl + jr nz, .ne + ld bc, (iy + 6) + sbc hl, bc + add hl, bc +.ne: + pop bc + pop iy + ret diff --git a/src/std/shared/llcmpu.src b/src/std/shared/llcmpu.src index d98aff645..16b9f3131 100644 --- a/src/std/shared/llcmpu.src +++ b/src/std/shared/llcmpu.src @@ -9,10 +9,10 @@ __llcmpu: push hl ld hl, (iy + 12) sbc.s hl, bc - jr nz, .bc_ne + jr nz, .ne ld hl, (iy + 9) sbc hl, de - jr nz, .ude_ne + jr nz, .ne pop hl push bc ld bc, (iy + 6) @@ -21,8 +21,8 @@ __llcmpu: pop bc pop iy ret -.bc_ne: -.ude_ne: + +.ne: ccf pop hl pop iy diff --git a/src/std/shared/llcmpu_fast.src b/src/std/shared/llcmpu_fast.src index 7223329ed..e8e8f0ea3 100644 --- a/src/std/shared/llcmpu_fast.src +++ b/src/std/shared/llcmpu_fast.src @@ -1,6 +1,8 @@ assume adl=1 section .text + public __llcmps_fast +__llcmps_fast: public __llcmpu_fast __llcmpu_fast: ld iy, 0 From 1356316dae9668aa5a0108b2f667a019c1a85ea1 Mon Sep 17 00:00:00 2001 From: mateoconlechuga Date: Mon, 7 Mar 2022 07:45:08 -0700 Subject: [PATCH 122/142] add missing lcmps label Signed-off-by: mateoconlechuga --- src/std/static/lcmps.src | 1 + 1 file changed, 1 insertion(+) diff --git a/src/std/static/lcmps.src b/src/std/static/lcmps.src index b842d57a3..f2caf241d 100644 --- a/src/std/static/lcmps.src +++ b/src/std/static/lcmps.src @@ -10,6 +10,7 @@ __lcmps: jr nz, .finish sbc hl, bc add hl, bc +.finish: ld a, d pop de ret From 94052f63ec12294ee2c07360ed08a4cd01d3f1fc Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Mon, 7 Mar 2022 23:56:53 -0500 Subject: [PATCH 123/142] Preserve AF in lland, llor, and llxor --- src/std/shared/lland.src | 10 ++++++---- src/std/shared/llor.src | 10 ++++++---- src/std/shared/llxor.src | 10 ++++++---- 3 files changed, 18 insertions(+), 12 deletions(-) diff --git a/src/std/shared/lland.src b/src/std/shared/lland.src index 608f848af..79ab59823 100644 --- a/src/std/shared/lland.src +++ b/src/std/shared/lland.src @@ -3,13 +3,14 @@ section .text public __lland __lland: -; CC: 60*r(PC)+22*r(SPL)+11*w(SPL)+1 +; CC: 62*r(PC)+25*r(SPL)+14*w(SPL)+1 + push af push iy ld iy, 0 add iy, sp push hl push de - lea hl, iy + 13 + lea hl, iy + 16 ld a, b and a, (hl) ld b, a @@ -37,10 +38,11 @@ __lland: ld (iy - 1), a pop hl ld a, h - and a, (iy + 7) + and a, (iy + 10) ld h, a ld a, l - and a, (iy + 6) + and a, (iy + 9) ld l, a pop iy + pop af ret diff --git a/src/std/shared/llor.src b/src/std/shared/llor.src index ba649c740..03ba491ed 100644 --- a/src/std/shared/llor.src +++ b/src/std/shared/llor.src @@ -3,13 +3,14 @@ section .text public __llor __llor: -; CC: 60*r(PC)+22*r(SPL)+11*w(SPL)+1 +; CC: 62*r(PC)+25*r(SPL)+14*w(SPL)+1 + push af push iy ld iy, 0 add iy, sp push hl push de - lea hl, iy + 13 + lea hl, iy + 16 ld a, b or a, (hl) ld b, a @@ -37,10 +38,11 @@ __llor: ld (iy - 1), a pop hl ld a, h - or a, (iy + 7) + or a, (iy + 10) ld h, a ld a, l - or a, (iy + 6) + or a, (iy + 9) ld l, a pop iy + pop af ret diff --git a/src/std/shared/llxor.src b/src/std/shared/llxor.src index c574e6747..8a3d3cefa 100644 --- a/src/std/shared/llxor.src +++ b/src/std/shared/llxor.src @@ -3,13 +3,14 @@ section .text public __llxor __llxor: -; CC: 60*r(PC)+22*r(SPL)+11*w(SPL)+1 +; CC: 62*r(PC)+25*r(SPL)+14*w(SPL)+1 + push af push iy ld iy, 0 add iy, sp push hl push de - lea hl, iy + 13 + lea hl, iy + 16 ld a, b xor a, (hl) ld b, a @@ -37,10 +38,11 @@ __llxor: ld (iy - 1), a pop hl ld a, h - xor a, (iy + 7) + xor a, (iy + 10) ld h, a ld a, l - xor a, (iy + 6) + xor a, (iy + 9) ld l, a pop iy + pop af ret From 210b033b1dcb80374db5cfe7bdc9dec253947a96 Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Tue, 8 Mar 2022 00:33:13 -0500 Subject: [PATCH 124/142] Fix lldivs and llrems --- src/std/shared/lldivs.src | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/std/shared/lldivs.src b/src/std/shared/lldivs.src index 06d35456e..0d3d62c63 100644 --- a/src/std/shared/lldivs.src +++ b/src/std/shared/lldivs.src @@ -35,10 +35,12 @@ __lldivs: push hl lea hl, iy + 7 + xor a, a + ld c, a ld b, 8 .neg_divisor_loop: - ld a, 0 + ld a, c sbc a, (hl) ld (hl), a inc hl @@ -48,8 +50,9 @@ __lldivs: pop bc .neg_divisor_skip: - bit 7, b - call nz, __llneg + ld a, b + or a, a + call m, __llneg call __lldvrmu.hijack From 6f3a13c6fb9a2efa40d45a3dd4f118fffe900141 Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Tue, 8 Mar 2022 01:06:43 -0500 Subject: [PATCH 125/142] Implement atoll --- examples/standalone/math_test/src/main.c | 31 ++---------------------- src/std/shared/{atoi.src => atol.src} | 10 ++++---- src/std/shared/atoll.src | 20 +++++++++++++++ src/std/stdlib.h | 2 ++ 4 files changed, 29 insertions(+), 34 deletions(-) rename src/std/shared/{atoi.src => atol.src} (77%) create mode 100644 src/std/shared/atoll.src diff --git a/examples/standalone/math_test/src/main.c b/examples/standalone/math_test/src/main.c index 9abf40d69..f7ccf219b 100644 --- a/examples/standalone/math_test/src/main.c +++ b/examples/standalone/math_test/src/main.c @@ -82,33 +82,6 @@ static void finishOutput() } -static uint64_t atoull_(const char *str) -{ - uint64_t result = 0; - while (isdigit(*str)) - { - result = result * 10 + (unsigned)(*str++ - '0'); - } - - return result; -} - -static int64_t atoll_(const char *str) -{ - bool negative = false; - if (*str == '-') - { - negative = true; - str++; - } - - uint64_t x = atoull_(str); - int64_t result = (int64_t)(negative ? -x : x); - - return result; -} - - #define DEFINE_UNOP_PREFIX_FUNC(type, name, op) \ static type name##_(type x) \ { \ @@ -399,8 +372,8 @@ static const BinOp *binops[] = { int main(int argc, char *argv[]) { - int64_t x = argc > 1 ? atoll_(argv[1]) : (int64_t)0xDFA5FBC197EDB389LL; - int64_t y = argc > 2 ? atoll_(argv[2]) : (int64_t)0x08010A030C050E07LL; + int64_t x = argc > 1 ? atoll(argv[1]) : (int64_t)0xDFA5FBC197EDB389LL; + int64_t y = argc > 2 ? atoll(argv[2]) : (int64_t)0x08010A030C050E07LL; separateOutput(); diff --git a/src/std/shared/atoi.src b/src/std/shared/atol.src similarity index 77% rename from src/std/shared/atoi.src rename to src/std/shared/atol.src index 93981e070..ee1bc9435 100644 --- a/src/std/shared/atoi.src +++ b/src/std/shared/atol.src @@ -1,9 +1,9 @@ assume adl=1 section .text - public _atol, _atoi -_atol: + public _atoi, _atol _atoi: +_atol: pop bc ex (sp),hl push bc @@ -13,9 +13,9 @@ _atoi: push bc push hl call _strtol - pop bc - pop bc - pop bc + pop af + pop af + pop af ret extern _strtol diff --git a/src/std/shared/atoll.src b/src/std/shared/atoll.src new file mode 100644 index 000000000..d9f5f4903 --- /dev/null +++ b/src/std/shared/atoll.src @@ -0,0 +1,20 @@ + assume adl=1 + + section .text + public _atoll +_atoll: + pop bc + ex (sp),hl + push bc + ld bc,10 + push bc + ld c,b + push bc + push hl + call _strtoll + pop af + pop af + pop af + ret + + extern _strtoll diff --git a/src/std/stdlib.h b/src/std/stdlib.h index 9c1c38ab0..a94702f5a 100644 --- a/src/std/stdlib.h +++ b/src/std/stdlib.h @@ -53,6 +53,8 @@ int atoi(const char *nptr) __attribute__((nonnull(1))); long atol(const char *nptr) __attribute__((nonnull(1))); +long long atoll(const char *nptr) __attribute__((nonnull(1))); + float strtof(const char *__restrict nptr, char **__restrict endptr) __attribute__((nonnull(1))); From 2df25d845f3d27f8f6bf2bbe4e7d33a5dc1f3398 Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Tue, 8 Mar 2022 02:16:46 -0500 Subject: [PATCH 126/142] Fix lldivs and llrems more --- src/std/shared/lldivs.src | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/std/shared/lldivs.src b/src/std/shared/lldivs.src index 0d3d62c63..75c166e40 100644 --- a/src/std/shared/lldivs.src +++ b/src/std/shared/lldivs.src @@ -50,10 +50,10 @@ __lldivs: pop bc .neg_divisor_skip: - ld a, b - or a, a - call m, __llneg + bit 7, b + call nz, __llneg + or a, a call __lldvrmu.hijack pop af From 6beb48e85d1d3c13c110f873b1ac064ae12cec34 Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Fri, 18 Mar 2022 18:22:12 -0400 Subject: [PATCH 127/142] Remove duplicate data --- examples/standalone/math_test/autotest.json | 8 -------- 1 file changed, 8 deletions(-) diff --git a/examples/standalone/math_test/autotest.json b/examples/standalone/math_test/autotest.json index 2b8707ddf..2c23b5336 100644 --- a/examples/standalone/math_test/autotest.json +++ b/examples/standalone/math_test/autotest.json @@ -115,14 +115,6 @@ "257F7002" ] }, - "or": { - "description": "or", - "start": "vram_start", - "size": "vram_16_size", - "expected_CRCs": [ - "4E247920" - ] - }, "add": { "description": "add", "start": "vram_start", From 11123a1b8888314708b9fe2985a3c30a7c1a9e8b Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Fri, 18 Mar 2022 18:34:18 -0400 Subject: [PATCH 128/142] Clear any args before launching autotest --- examples/standalone/math_test/autotest.json | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/examples/standalone/math_test/autotest.json b/examples/standalone/math_test/autotest.json index 2c23b5336..ac6436982 100644 --- a/examples/standalone/math_test/autotest.json +++ b/examples/standalone/math_test/autotest.json @@ -7,6 +7,10 @@ "isASM": true }, "sequence": [ + "key|clear", + "delay|1000", + "key|0", + "key|enter", "action|launch", "hashWait|not", "key|enter", From 581736680c6b30e2373e6eb09bc89ac484850adf Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Fri, 18 Mar 2022 18:38:23 -0400 Subject: [PATCH 129/142] Fix bremu and brems --- src/std/shared/bdvrms.src | 2 +- src/std/shared/bdvrmu.src | 2 +- src/std/shared/brems.src | 1 + src/std/shared/bremu.src | 1 + 4 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/std/shared/bdvrms.src b/src/std/shared/bdvrms.src index d0d92a804..c5723bb0d 100644 --- a/src/std/shared/bdvrms.src +++ b/src/std/shared/bdvrms.src @@ -1,7 +1,7 @@ public __bdvrms __bdvrms: ; I: B=dividend, C=divisor -; O: a=abs(B)%abs(C), h=?, l=abs(B)/abs(C) +; O: a=?, h=abs(B)%abs(C), l=abs(B)/abs(C) push bc diff --git a/src/std/shared/bdvrmu.src b/src/std/shared/bdvrmu.src index 36963ec2c..59083a72b 100644 --- a/src/std/shared/bdvrmu.src +++ b/src/std/shared/bdvrmu.src @@ -4,7 +4,7 @@ public __bdvrmu __bdvrmu: ; I: B=dividend, C=divisor -; O: a=B%C, b=0, h=?, l=B/C +; O: a=?, b=0, h=B%C, l=B/C ld l, b ld h, 0 diff --git a/src/std/shared/brems.src b/src/std/shared/brems.src index d34f8de04..7c693c571 100644 --- a/src/std/shared/brems.src +++ b/src/std/shared/brems.src @@ -10,6 +10,7 @@ __brems: ld b, a call __bdvrms + ld a, h bit 7, b diff --git a/src/std/shared/bremu.src b/src/std/shared/bremu.src index 1ad30970f..ca5701717 100644 --- a/src/std/shared/bremu.src +++ b/src/std/shared/bremu.src @@ -11,6 +11,7 @@ __bremu: ld b, a call __bdvrmu + ld a, h pop hl pop bc From 9564ec65877d522bb4896dd7e35bd9038a6eae46 Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Fri, 18 Mar 2022 19:49:29 -0400 Subject: [PATCH 130/142] Add a missing section --- src/std/shared/bdvrms.src | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/std/shared/bdvrms.src b/src/std/shared/bdvrms.src index c5723bb0d..f191a7a5c 100644 --- a/src/std/shared/bdvrms.src +++ b/src/std/shared/bdvrms.src @@ -1,3 +1,6 @@ + assume adl=1 + + section .text public __bdvrms __bdvrms: ; I: B=dividend, C=divisor From 0546bcf63a9893900c783729de63aa275e62e28a Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Fri, 18 Mar 2022 21:44:14 -0400 Subject: [PATCH 131/142] Fix brems more --- src/std/shared/bdivs.src | 1 + src/std/shared/bdvrms.src | 20 +++++++++++--------- src/std/shared/bdvrmu.src | 2 ++ src/std/shared/brems.src | 6 +++--- 4 files changed, 17 insertions(+), 12 deletions(-) diff --git a/src/std/shared/bdivs.src b/src/std/shared/bdivs.src index b0ddc39c2..6dd9e93a4 100644 --- a/src/std/shared/bdivs.src +++ b/src/std/shared/bdivs.src @@ -8,6 +8,7 @@ __bdivs: push hl + ld l, b call __bdvrms ld a, c diff --git a/src/std/shared/bdvrms.src b/src/std/shared/bdvrms.src index f191a7a5c..acd1b569c 100644 --- a/src/std/shared/bdvrms.src +++ b/src/std/shared/bdvrms.src @@ -3,19 +3,21 @@ section .text public __bdvrms __bdvrms: -; I: B=dividend, C=divisor -; O: a=?, h=abs(B)%abs(C), l=abs(B)/abs(C) +; I: C=divisor, L=dividend +; O: a=?, h=abs(L)%abs(C), l=abs(L)/abs(C) - push bc - - ld a, c + ld a, l + public __bdvrms.hijack_a_l_dividend +.hijack_a_l_dividend: rla jr nc, .neg_divisor_skip xor a, a - sub a, c - ld c, a + sub a, l + ld l, a .neg_divisor_skip: + push bc + ld a, b rla jr nc, .neg_dividend_skip @@ -24,10 +26,10 @@ __bdvrms: ld b, a .neg_dividend_skip: - call __bdvrmu + call __bdvrmu.hijack_l_dividend pop bc ret - extern __bdvrmu + extern __bdvrmu.hijack_l_dividend diff --git a/src/std/shared/bdvrmu.src b/src/std/shared/bdvrmu.src index 59083a72b..8f7af268d 100644 --- a/src/std/shared/bdvrmu.src +++ b/src/std/shared/bdvrmu.src @@ -7,6 +7,8 @@ __bdvrmu: ; O: a=?, b=0, h=B%C, l=B/C ld l, b + public __bdvrmu.hijack_l_dividend +.hijack_l_dividend: ld h, 0 ld b, 8 diff --git a/src/std/shared/brems.src b/src/std/shared/brems.src index 7c693c571..a9c03e906 100644 --- a/src/std/shared/brems.src +++ b/src/std/shared/brems.src @@ -8,8 +8,8 @@ __brems: push hl - ld b, a - call __bdvrms + ld l, a + call __bdvrms.hijack_a_l_dividend ld a, h bit 7, b @@ -21,4 +21,4 @@ __brems: ret - extern __bdvrms + extern __bdvrms.hijack_a_l_dividend From d156c4dc2989bf63dec0ad6d2d880adabbf39eac Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Fri, 18 Mar 2022 22:00:09 -0400 Subject: [PATCH 132/142] Probably fix llshrs_fast --- src/std/shared/llshrs_fast.src | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/src/std/shared/llshrs_fast.src b/src/std/shared/llshrs_fast.src index c2edb05e7..5ef91f579 100644 --- a/src/std/shared/llshrs_fast.src +++ b/src/std/shared/llshrs_fast.src @@ -3,17 +3,20 @@ section .text public __llshrs_fast __llshrs_fast: -; Not well-optimized, but it probably works +; Could use optimization for shifting whole bytes at a time. ld iy, 0 add iy, sp + ld a, c ld c, b ld b, (iy + 3) inc b dec b ret z + push de push hl + .loop: sra c rra @@ -24,8 +27,21 @@ __llshrs_fast: rr h rr l djnz .loop + ld b, c ld c, a + + ld a, l + ld (iy - 5), h + + pop hl + ld l, e + ld h, d + ex de, hl + + pop hl + ld l, a + ret From 1fd3f95948e22a367054801574983acefe2c6bfc Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Fri, 18 Mar 2022 22:04:42 -0400 Subject: [PATCH 133/142] Implement llshru_fast --- src/std/shared/llshrs_fast.src | 2 ++ src/std/shared/llshru.src | 1 - src/std/shared/llshru_fast.src | 23 +++++++++++++++++++++++ 3 files changed, 25 insertions(+), 1 deletion(-) create mode 100644 src/std/shared/llshru_fast.src diff --git a/src/std/shared/llshrs_fast.src b/src/std/shared/llshrs_fast.src index 5ef91f579..9e125f4b2 100644 --- a/src/std/shared/llshrs_fast.src +++ b/src/std/shared/llshrs_fast.src @@ -19,6 +19,8 @@ __llshrs_fast: .loop: sra c + public __llshrs_fast.hijack_llshru +.hijack_llshru: rra rr (iy - 1) rr d diff --git a/src/std/shared/llshru.src b/src/std/shared/llshru.src index 94cc373bb..d2f30982d 100644 --- a/src/std/shared/llshru.src +++ b/src/std/shared/llshru.src @@ -13,7 +13,6 @@ __llshru: jp z, __llshrs.finish push de push hl -.loop: srl b jp __llshrs.hijack_llshru diff --git a/src/std/shared/llshru_fast.src b/src/std/shared/llshru_fast.src new file mode 100644 index 000000000..b15869a98 --- /dev/null +++ b/src/std/shared/llshru_fast.src @@ -0,0 +1,23 @@ + assume adl=1 + + section .text + public __llshru_fast +__llshru_fast: + ld iy, 0 + add iy, sp + + ld a, c + ld c, b + ld b, (iy + 3) + inc b + dec b + ret z + + push de + push hl + + srl c + jp __llshrs_fast.hijack_llshru + + + extern __llshrs_fast.hijack_llshru From 862fc4f9307338bad08717b3126d848ed33928da Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Fri, 18 Mar 2022 22:08:20 -0400 Subject: [PATCH 134/142] Fix lnot_fast --- src/std/shared/lnot_fast.src | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/std/shared/lnot_fast.src b/src/std/shared/lnot_fast.src index cf3d20d08..474cd6527 100644 --- a/src/std/shared/lnot_fast.src +++ b/src/std/shared/lnot_fast.src @@ -4,8 +4,6 @@ public __lnot_fast __lnot_fast: ld a, e ; a=E - cpl ; a=~E - ld e, a ; e=~E add hl, de ; uhl=UHL+x ex de, hl ; ude=UHL+x, uhl=x scf @@ -13,4 +11,6 @@ __lnot_fast: ; =x-UHL-x-1 ; =-UHL-1 ; =~UHL + cpl ; a=~E + ld e, a ; e=~E ret From be8f4e401d834d2ce3cf9be7f2d856af11f4ff73 Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Fri, 18 Mar 2022 23:20:16 -0400 Subject: [PATCH 135/142] Fix brems again (and bdivs) --- src/std/shared/bdvrms.src | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/std/shared/bdvrms.src b/src/std/shared/bdvrms.src index acd1b569c..3de9d52ed 100644 --- a/src/std/shared/bdvrms.src +++ b/src/std/shared/bdvrms.src @@ -10,21 +10,22 @@ __bdvrms: public __bdvrms.hijack_a_l_dividend .hijack_a_l_dividend: rla - jr nc, .neg_divisor_skip + jr nc, .neg_dividend_skip xor a, a sub a, l ld l, a -.neg_divisor_skip: +.neg_dividend_skip: push bc - ld a, b + ld a, c rla - jr nc, .neg_dividend_skip + jr nc, .neg_divisor_skip xor a, a - sub a, b - ld b, a -.neg_dividend_skip: + sub a, c + ld c, a + jr nc, .neg_dividend_skip +.neg_divisor_skip: call __bdvrmu.hijack_l_dividend From 66259f4a479c7d062f408568f92d3e61fbde9213 Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Fri, 18 Mar 2022 23:30:01 -0400 Subject: [PATCH 136/142] Fix brems, hopefully for good --- src/std/shared/brems.src | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/std/shared/brems.src b/src/std/shared/brems.src index a9c03e906..85ff57832 100644 --- a/src/std/shared/brems.src +++ b/src/std/shared/brems.src @@ -9,13 +9,14 @@ __brems: push hl ld l, a + push hl call __bdvrms.hijack_a_l_dividend - ld a, h - bit 7, b + pop af + ld a, h pop hl - ret z + ret p neg ret From dc5443be0d33556a7e9707cbe3bc40169495e3ca Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Sat, 19 Mar 2022 10:08:30 -0400 Subject: [PATCH 137/142] Fix llshrs_fast and llshru_fast --- src/std/shared/llshrs_fast.src | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/std/shared/llshrs_fast.src b/src/std/shared/llshrs_fast.src index 9e125f4b2..6a2f92bf1 100644 --- a/src/std/shared/llshrs_fast.src +++ b/src/std/shared/llshrs_fast.src @@ -33,16 +33,16 @@ __llshrs_fast: ld b, c ld c, a - ld a, l - ld (iy - 5), h + ld a, e + ld (iy - 2), d + ex de, hl pop hl ld l, e ld h, d - ex de, hl - pop hl - ld l, a + pop de + ld e, a ret From 0fdd2daec2a99b944cff26e0e2861b2e65e4db65 Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Sat, 19 Mar 2022 10:26:58 -0400 Subject: [PATCH 138/142] Optimize llshl slightly --- src/std/shared/llshl.src | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/src/std/shared/llshl.src b/src/std/shared/llshl.src index 1e7f91ef5..b2e700bc9 100644 --- a/src/std/shared/llshl.src +++ b/src/std/shared/llshl.src @@ -5,14 +5,17 @@ __llshl: ; Suboptimal for large shift amounts push af - push iy - ld iy, 0 - add iy, sp ld a, b - ld b, (iy + 9) + + push hl + ld hl, 9 + add hl, sp + ld b, (hl) + pop hl + inc b - dec b - jr z, .finish + jr .begin + .loop: add hl, hl ex de, hl @@ -20,9 +23,9 @@ __llshl: ex de, hl rl c rla +.begin: djnz .loop -.finish: + ld b, a - pop iy pop af ret From 1d9865bcfd0d295fc959df476a0b313bff8d9765 Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Sat, 19 Mar 2022 12:07:06 -0400 Subject: [PATCH 139/142] Implement optimized sdivu, sdivs, sremu, and srems --- src/std/linked/linked.src | 8 ++++ src/std/shared/bdivs.src | 4 +- src/std/shared/{bdvrms.src => bdvrms_abs.src} | 6 +-- src/std/shared/brems.src | 4 +- src/std/shared/shared.src | 8 ---- src/std/static/idvrmu.src | 7 ++- src/std/static/sdivs.src | 21 +++++++++ src/std/static/sdivu.src | 18 ++++++++ src/std/static/sdvrms_abs.src | 44 +++++++++++++++++++ src/std/static/sdvrmu.src | 23 ++++++++++ src/std/static/srems.src | 20 +++++++++ src/std/static/sremu.src | 17 +++++++ 12 files changed, 163 insertions(+), 17 deletions(-) rename src/std/shared/{bdvrms.src => bdvrms_abs.src} (84%) create mode 100644 src/std/static/sdivs.src create mode 100644 src/std/static/sdivu.src create mode 100644 src/std/static/sdvrms_abs.src create mode 100644 src/std/static/sdvrmu.src create mode 100644 src/std/static/srems.src create mode 100644 src/std/static/sremu.src diff --git a/src/std/linked/linked.src b/src/std/linked/linked.src index e64352919..06113dbd2 100644 --- a/src/std/linked/linked.src +++ b/src/std/linked/linked.src @@ -62,6 +62,10 @@ __bshru := 000104h ; short (16-bit) math public __sand __sand := 000200h + public __sdivs +__sdivs := 000208h + public __sdivu +__sdivu := 00020Ch public __smuls __smuls := 000224h public __smulu @@ -72,6 +76,10 @@ __sneg := 00022Ch __snot := 000230h public __sor __sor := 000234h + public __srems +__srems := 000238h + public __sremu +__sremu := 00023Ch public __sshl __sshl := 000240h public __sshrs diff --git a/src/std/shared/bdivs.src b/src/std/shared/bdivs.src index 6dd9e93a4..dad74b4b0 100644 --- a/src/std/shared/bdivs.src +++ b/src/std/shared/bdivs.src @@ -9,7 +9,7 @@ __bdivs: push hl ld l, b - call __bdvrms + call __bdvrms_abs ld a, c xor a, b @@ -22,4 +22,4 @@ __bdivs: ret - extern __bdvrms + extern __bdvrms_abs diff --git a/src/std/shared/bdvrms.src b/src/std/shared/bdvrms_abs.src similarity index 84% rename from src/std/shared/bdvrms.src rename to src/std/shared/bdvrms_abs.src index 3de9d52ed..3c0fc9c1b 100644 --- a/src/std/shared/bdvrms.src +++ b/src/std/shared/bdvrms_abs.src @@ -1,13 +1,13 @@ assume adl=1 section .text - public __bdvrms -__bdvrms: + public __bdvrms_abs +__bdvrms_abs: ; I: C=divisor, L=dividend ; O: a=?, h=abs(L)%abs(C), l=abs(L)/abs(C) ld a, l - public __bdvrms.hijack_a_l_dividend + public __bdvrms_abs.hijack_a_l_dividend .hijack_a_l_dividend: rla jr nc, .neg_dividend_skip diff --git a/src/std/shared/brems.src b/src/std/shared/brems.src index 85ff57832..ca1c80c8f 100644 --- a/src/std/shared/brems.src +++ b/src/std/shared/brems.src @@ -10,7 +10,7 @@ __brems: ld l, a push hl - call __bdvrms.hijack_a_l_dividend + call __bdvrms_abs.hijack_a_l_dividend pop af @@ -22,4 +22,4 @@ __brems: ret - extern __bdvrms.hijack_a_l_dividend + extern __bdvrms_abs.hijack_a_l_dividend diff --git a/src/std/shared/shared.src b/src/std/shared/shared.src index d7c71c9c9..d82a7f87d 100644 --- a/src/std/shared/shared.src +++ b/src/std/shared/shared.src @@ -64,20 +64,12 @@ __ishru_b := 000188h __itol := 000194h public __ltof __ltof := 000284h - public __sdivs -__sdivs := 000208h - public __sdivu -__sdivu := 00020Ch public __seqcase __seqcase := 000210h public __seqcaseD __seqcaseD := 000214h public __setflag __setflag := 000218h - public __srems -__srems := 000238h - public __sremu -__sremu := 00023Ch public __sshl_b __sshl_b := 000244h public __sshrs_b diff --git a/src/std/static/idvrmu.src b/src/std/static/idvrmu.src index 9b3d45694..227123c9b 100644 --- a/src/std/static/idvrmu.src +++ b/src/std/static/idvrmu.src @@ -7,10 +7,13 @@ __idvrmu: ; O: a=0, ude=UHL/UBC, uhl=UHL%UBC ex de, hl - or a, a - sbc hl, hl ld a, 24 + public __idvrmu.hijack_a_iters_ude_dividend +.hijack_a_iters_ude_dividend: + + or a, a + sbc hl, hl .loop: ex de, hl diff --git a/src/std/static/sdivs.src b/src/std/static/sdivs.src new file mode 100644 index 000000000..9a741a1ab --- /dev/null +++ b/src/std/static/sdivs.src @@ -0,0 +1,21 @@ + assume adl=1 + + section .text + public __sdivs +__sdivs: +; I: HL=dividend, BC=divisor +; O: uhl=HL/BC + + push de + + call __sdvrms_abs + ex de, hl + + pop de + ret p + + jp __ineg + + + extern __ineg + extern __sdvrms_abs diff --git a/src/std/static/sdivu.src b/src/std/static/sdivu.src new file mode 100644 index 000000000..f255645bb --- /dev/null +++ b/src/std/static/sdivu.src @@ -0,0 +1,18 @@ + assume adl=1 + + section .text + public __sdivu +__sdivu: +; I: HL=dividend, BC=divisor +; O: uhl=HL/BC + + push de + + call __sdvrmu + ex de, hl + + pop de + ret + + + extern __sdvrmu diff --git a/src/std/static/sdvrms_abs.src b/src/std/static/sdvrms_abs.src new file mode 100644 index 000000000..357199194 --- /dev/null +++ b/src/std/static/sdvrms_abs.src @@ -0,0 +1,44 @@ + assume adl=1 + + section .text + public __sdvrms_abs +__sdvrms_abs: +; I: HL=dividend, BC=divisor +; O: ude=abs(HL)/abs(BC), uhl=abs(HL)%abs(BC), sf=(HL<0)^(BC<0), cf=HL<0 + + push bc + + ld e, a + ld a, h + xor a, b + ld a, h + rla + ld a, e + push af + + jr nc, .neg_dividend_skip + add hl, de + ex de, hl + or a, a + sbc hl, de +.neg_dividend_skip: + + ld a, b + rla + jr nc, .neg_divisor_skip + xor a, a + sub a, c + ld c, a + sbc a, a + sub a, b + ld b, a +.neg_divisor_skip: + + call __sdvrmu + + pop af + pop bc + ret + + + extern __sdvrmu diff --git a/src/std/static/sdvrmu.src b/src/std/static/sdvrmu.src new file mode 100644 index 000000000..00839e8a7 --- /dev/null +++ b/src/std/static/sdvrmu.src @@ -0,0 +1,23 @@ + assume adl=1 + + section .text + public __sdvrmu +__sdvrmu: +; I: HL=dividend, BC=divisor +; O: a=0, ude=HL/BC, uhl=HL%BC + + push hl + dec sp + pop de + inc sp + ld e, 0 + + inc bc + dec.s bc + + ld a, 16 + + jp __idvrmu.hijack_a_iters_ude_dividend + + + extern __idvrmu.hijack_a_iters_ude_dividend diff --git a/src/std/static/srems.src b/src/std/static/srems.src new file mode 100644 index 000000000..c608922b5 --- /dev/null +++ b/src/std/static/srems.src @@ -0,0 +1,20 @@ + assume adl=1 + + section .text + public __srems +__srems: +; I: HL=dividend, BC=divisor +; O: uhl=HL%BC + + push de + + call __sdvrms_abs + + pop de + ret nc + + jp __ineg + + + extern __ineg + extern __sdvrms_abs diff --git a/src/std/static/sremu.src b/src/std/static/sremu.src new file mode 100644 index 000000000..4da4be230 --- /dev/null +++ b/src/std/static/sremu.src @@ -0,0 +1,17 @@ + assume adl=1 + + section .text + public __sremu +__sremu: +; I: HL=dividend, BC=divisor +; O: uhl=HL%BC + + push de + + call __sdvrmu + + pop de + ret + + + extern __sdvrmu From 77f9eeb36d79b76faa873ba4012c8c3c3bed4cf3 Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Sat, 19 Mar 2022 17:38:58 -0400 Subject: [PATCH 140/142] Fix sdivu and sdivs to preserve A --- src/std/static/idivu.src | 2 -- src/std/static/idvrms.src | 3 --- src/std/static/idvrmu.src | 4 +++- src/std/static/iremu.src | 2 -- src/std/static/sdvrms_abs.src | 2 +- src/std/static/sdvrmu.src | 1 + 6 files changed, 5 insertions(+), 9 deletions(-) diff --git a/src/std/static/idivu.src b/src/std/static/idivu.src index 8a6e9d9e6..17039c500 100644 --- a/src/std/static/idivu.src +++ b/src/std/static/idivu.src @@ -6,14 +6,12 @@ __idivu: ; I: UHL=dividend, UBC=divisor ; O: uhl=UHL/UBC - push af push de call __idvrmu ex de, hl pop de - pop af ret diff --git a/src/std/static/idvrms.src b/src/std/static/idvrms.src index c135d5b50..81ab79a37 100644 --- a/src/std/static/idvrms.src +++ b/src/std/static/idvrms.src @@ -30,11 +30,8 @@ __idvrms: ex de, hl .pos_dividend_skip: - push af - call __idvrmu - pop af call p, __ineg ld b, a diff --git a/src/std/static/idvrmu.src b/src/std/static/idvrmu.src index 227123c9b..967669dd6 100644 --- a/src/std/static/idvrmu.src +++ b/src/std/static/idvrmu.src @@ -4,10 +4,11 @@ public __idvrmu __idvrmu: ; I: UHL=dividend, UBC=divisor -; O: a=0, ude=UHL/UBC, uhl=UHL%UBC +; O: ude=UHL/UBC, uhl=UHL%UBC ex de, hl + push af ld a, 24 public __idvrmu.hijack_a_iters_ude_dividend .hijack_a_iters_ude_dividend: @@ -32,4 +33,5 @@ __idvrmu: dec a jr nz, .loop + pop af ret diff --git a/src/std/static/iremu.src b/src/std/static/iremu.src index 46c091e8f..105428371 100644 --- a/src/std/static/iremu.src +++ b/src/std/static/iremu.src @@ -6,13 +6,11 @@ __iremu: ; I: UHL=dividend, UBC=divisor ; O: uhl=UHL%UBC - push af push de call __idvrmu pop de - pop af ret diff --git a/src/std/static/sdvrms_abs.src b/src/std/static/sdvrms_abs.src index 357199194..9bbb1496b 100644 --- a/src/std/static/sdvrms_abs.src +++ b/src/std/static/sdvrms_abs.src @@ -34,9 +34,9 @@ __sdvrms_abs: ld b, a .neg_divisor_skip: + pop af call __sdvrmu - pop af pop bc ret diff --git a/src/std/static/sdvrmu.src b/src/std/static/sdvrmu.src index 00839e8a7..27fe2a40d 100644 --- a/src/std/static/sdvrmu.src +++ b/src/std/static/sdvrmu.src @@ -15,6 +15,7 @@ __sdvrmu: inc bc dec.s bc + push af ld a, 16 jp __idvrmu.hijack_a_iters_ude_dividend From ffbe5bb862721ef57d7dc0fcdcdfc1036bcbfb3f Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Sat, 19 Mar 2022 18:27:58 -0400 Subject: [PATCH 141/142] Fix llshrs_fast and llshru_fast for shift by zero --- src/std/shared/llshrs_fast.src | 13 +++++++------ src/std/shared/llshru_fast.src | 7 ++++--- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/src/std/shared/llshrs_fast.src b/src/std/shared/llshrs_fast.src index 6a2f92bf1..01e603c45 100644 --- a/src/std/shared/llshrs_fast.src +++ b/src/std/shared/llshrs_fast.src @@ -12,7 +12,7 @@ __llshrs_fast: ld b, (iy + 3) inc b dec b - ret z + jr z, .skip push de push hl @@ -30,10 +30,7 @@ __llshrs_fast: rr l djnz .loop - ld b, c - ld c, a - - ld a, e + ld b, e ld (iy - 2), d ex de, hl @@ -42,7 +39,11 @@ __llshrs_fast: ld h, d pop de - ld e, a + ld e, b + +.skip: + ld b, c + ld c, a ret diff --git a/src/std/shared/llshru_fast.src b/src/std/shared/llshru_fast.src index b15869a98..09d162c84 100644 --- a/src/std/shared/llshru_fast.src +++ b/src/std/shared/llshru_fast.src @@ -6,12 +6,13 @@ __llshru_fast: ld iy, 0 add iy, sp + ld a, (iy + 3) + or a, a + ret z + ld a, c ld c, b ld b, (iy + 3) - inc b - dec b - ret z push de push hl From 03e8e0add92af5856bf398d88b97b95bfd0cfd91 Mon Sep 17 00:00:00 2001 From: Zachary Wassall Date: Sun, 20 Mar 2022 09:37:57 -0400 Subject: [PATCH 142/142] Implement size-optimized bctlz, sctlz, ictlz, lctlz, and llctlz --- src/std/shared/bctlz.src | 18 ++++++++++++++++++ src/std/shared/ictlz.src | 17 +++++++++++++++++ src/std/shared/lctlz.src | 20 ++++++++++++++++++++ src/std/shared/llctlz.src | 26 ++++++++++++++++++++++++++ src/std/shared/sctlz.src | 17 +++++++++++++++++ 5 files changed, 98 insertions(+) create mode 100644 src/std/shared/bctlz.src create mode 100644 src/std/shared/ictlz.src create mode 100644 src/std/shared/lctlz.src create mode 100644 src/std/shared/llctlz.src create mode 100644 src/std/shared/sctlz.src diff --git a/src/std/shared/bctlz.src b/src/std/shared/bctlz.src new file mode 100644 index 000000000..8a9579b69 --- /dev/null +++ b/src/std/shared/bctlz.src @@ -0,0 +1,18 @@ + assume adl=1 + + section .text + public __bctlz +__bctlz: + push hl + + scf + sbc hl, hl + +.loop: + rla + inc hl + jr nc, .loop + + ld a, l + pop hl + ret diff --git a/src/std/shared/ictlz.src b/src/std/shared/ictlz.src new file mode 100644 index 000000000..34dcb3ed1 --- /dev/null +++ b/src/std/shared/ictlz.src @@ -0,0 +1,17 @@ + assume adl=1 + + section .text + public __ictlz +__ictlz: + push hl + + scf + sbc a, a + +.loop: + adc hl, hl + inc a + jr nc, .loop + + pop hl + ret diff --git a/src/std/shared/lctlz.src b/src/std/shared/lctlz.src new file mode 100644 index 000000000..9943bad91 --- /dev/null +++ b/src/std/shared/lctlz.src @@ -0,0 +1,20 @@ + assume adl=1 + + section .text + public __lctlz +__lctlz: + push de + push hl + + scf + sbc a, a + +.loop: + adc hl, hl + rl e + inc a + jr nc, .loop + + pop hl + pop de + ret diff --git a/src/std/shared/llctlz.src b/src/std/shared/llctlz.src new file mode 100644 index 000000000..ad43d18ae --- /dev/null +++ b/src/std/shared/llctlz.src @@ -0,0 +1,26 @@ + assume adl=1 + + section .text + public __llctlz +__llctlz: + push bc + push de + push hl + + scf + sbc a, a + +.loop: + adc hl, hl + ex de, hl + adc hl, hl + ex de, hl + rl c + rl b + inc a + jr nc, .loop + + pop hl + pop de + pop bc + ret diff --git a/src/std/shared/sctlz.src b/src/std/shared/sctlz.src new file mode 100644 index 000000000..96815e2c9 --- /dev/null +++ b/src/std/shared/sctlz.src @@ -0,0 +1,17 @@ + assume adl=1 + + section .text + public __sctlz +__sctlz: + push hl + + scf + sbc a, a + +.loop: + adc.s hl, hl + inc a + jr nc, .loop + + pop hl + ret