I tried using macros to simulate loops in assembler and finally ended up with a higher language compiler into Z80 assembler. I chose Forth as a language because it is simpler than C and in a sense it is somewhere halfway between C and Asm.
When I started I knew almost nothing about Forth, now I know a little more, but not much, because I still haven't used one and I'm looking at it more from the point of view of how to translate an interpreter into asm. Things like "DOES>" keep surprising me.
But the result is quite good. It's the clearest asm code I've ever seen. When each word or group of words is clearly recognizable.
Forth code:
Code: Select all
: fib2 ( n1 -- n2 )
0 1 rot 0 ?do
over + swap
loop
drop
;
: fib2_bench ( -- )
1000 0 do
20 0 do
I fib2 drop
loop
loop
;
fib2_bench
macro M4 FORTH code (../fth2m4.sh fib2.fth > fib2.m4):
Code: Select all
include(`../M4/FIRST.M4')dnl
ifdef __ORG
org __ORG
else
org 32768
endif
INIT(60000)
CALL(_fib2_bench)
STOP
COLON(_fib2,({{{{ n1 -- n2 }}}}))
PUSH(0) PUSH(1) ROT PUSH(0) QUESTIONDO
OVER ADD SWAP
LOOP
DROP
SEMICOLON
COLON(_fib2_bench,({{{{ -- }}}}))
PUSH(1000) PUSH(0) DO
PUSH(20) PUSH(0) DO
I CALL(_fib2) DROP
LOOP
LOOP
SEMICOLON
Code: Select all
ifdef __ORG
org __ORG
else
org 32768
endif
; === b e g i n ===
ld [Stop+1], SP ; 4:20 init storing the original SP value when the "bye" word is used
ld L, 0x1A ; 2:7 init Upper screen
call 0x1605 ; 3:17 init Open channel
ld HL, 0xEA60 ; 3:10 init Return address stack = 60000
exx ; 1:4 init
call _fib2_bench ; 3:17 call ( -- )
Stop: ; stop
ld SP, 0x0000 ; 3:10 stop restoring the original SP value when the "bye" word is used
ld HL, 0x2758 ; 3:10 stop
exx ; 1:4 stop
ret ; 1:10 stop
; ===== e n d =====
; --- the beginning of a non-recursive function ---
_fib2: ; ( n1 -- n2 )
pop BC ; 1:10 : ret
ld [_fib2_end+1],BC; 4:20 : ( ret -- )
;[6:36] 0 1 rot ( x -- 0 1 x )
push DE ; 1:11 0 1 rot
ld DE, 0x0000 ; 3:10 0 1 rot
push DE ; 1:11 0 1 rot
inc E ; 1:4 0 1 rot
ld A, L ; 1:4 0 ?do_101(m) ( stop 0 -- )
ld [stp_lo101], A ; 3:13 0 ?do_101(m) lo stop
ld A, H ; 1:4 0 ?do_101(m)
ld [stp_hi101], A ; 3:13 0 ?do_101(m) hi stop
or L ; 1:4 0 ?do_101(m)
ex DE, HL ; 1:4 0 ?do_101(m)
pop DE ; 1:10 0 ?do_101(m)
jp z, exit101 ; 3:10 0 ?do_101(m)
ld BC, 0x0000 ; 3:10 0 ?do_101(m)
do101save: ; 0 ?do_101(m)
ld [idx101], BC ; 4:20 0 ?do_101(m) save index
do101: ; 0 ?do_101(m)
add HL, DE ; 1:11 over +
ex DE, HL ; 1:4 swap ( b a -- a b )
idx101 EQU $+1 ;[16:57/78] loop_101(m) idx always points to a 16-bit index
ld BC, 0x0000 ; 3:10 loop_101(m)
inc BC ; 1:6 loop_101(m) index++
ld A, C ; 1:4 loop_101(m) lo new index
stp_lo101 EQU $+1 ; loop_101(m)
xor 0x00 ; 2:7 loop_101(m) lo stop
jp nz, do101save ; 3:10 loop_101(m)
ld A, B ; 1:4 loop_101(m) hi new index
stp_hi101 EQU $+1 ; loop_101(m)
xor 0x00 ; 2:7 loop_101(m) hi stop
jp nz, do101save ; 3:10 loop_101(m)
leave101: ; loop_101(m)
exit101: ; loop_101(m)
ex DE, HL ; 1:4 drop
pop DE ; 1:10 drop ( a -- )
_fib2_end:
jp 0x0000 ; 3:10 ;
; --------- end of non-recursive function ---------
; --- the beginning of a non-recursive function ---
_fib2_bench: ; ( -- )
pop BC ; 1:10 : ret
ld [_fib2_bench_end+1],BC; 4:20 : ( ret -- )
ld BC, 0x0000 ; 3:10 1000 0 do_102(xm)
do102save: ; 1000 0 do_102(xm)
ld [idx102],BC ; 4:20 1000 0 do_102(xm)
xor A ; 1:4 20 0 do_103 i_103(m) 8-bit loop ( 20 0 -- i )
do103saveA: ; 20 0 do_103 i_103(m)
push DE ; 1:11 20 0 do_103 i_103(m)
ex DE, HL ; 1:4 20 0 do_103 i_103(m)
ld [idx103], A ; 3:13 20 0 do_103 i_103(m) save lo(index)
ld L, A ; 1:4 20 0 do_103 i_103(m)
ld H, 0x00 ; 2:7 20 0 do_103 i_103(m)
call _fib2 ; 3:17 call ( -- )
ex DE, HL ; 1:4 drop
pop DE ; 1:10 drop ( a -- )
;[9:32/32] loop_103(m) variant +1.ignore: 8-bit loop, run 20x
idx103 EQU $+1 ; loop_103(m) idx always points to a 16-bit index
ld A, 0 ; 2:7 loop_103(m) 0.. +1 ..(20), real_stop:0x0014
db 0x00 ; 1:4 loop_103(m) ignore opcode = hi(index) -> idx always points to a 16-bit index.
inc A ; 1:4 loop_103(m) index++
cp 0x14 ; 2:7 loop_103(m) lo(real_stop)
jp nz, do103saveA ; 3:10 loop_103(m) index<>real_stop?
;[16:57/58] loop_102(xm) variant +1.default: step one, run 1000x
idx102 EQU $+1 ; loop_102(xm) idx always points to a 16-bit index
ld BC, 0x0000 ; 3:10 loop_102(xm) 0.. +1 ..(1000), real_stop:0x03E8
inc BC ; 1:6 loop_102(xm) index++
ld A, C ; 1:4 loop_102(xm)
xor 0xE8 ; 2:7 loop_102(xm) lo(real_stop) first (232>3)
jp nz, do102save ; 3:10 loop_102(xm) 3x false positive
ld A, B ; 1:4 loop_102(xm)
xor 0x03 ; 2:7 loop_102(xm) hi(real_stop)
jp nz, do102save ; 3:10 loop_102(xm) 232x false positive if he was first
leave102: ; loop_102(xm)
exit102: ; loop_102(xm)
_fib2_bench_end:
jp 0x0000 ; 3:10 ;
; --------- end of non-recursive function ---------
Code: Select all
| Forth / C | Benchmark | Time (sec/round) | Bytes
| :---------------------: | :---------: | :--------------- | :------
| M4_FORTH | Fib2 | 0m5.65s | 133
| M4_FORTH use data stack | Fib2s | 0m5.03s | 112
| M4_FORTH use assembler | Fib2a | 0m2.55s | 96
| Boriel Basic zxbc 1.16.4| Fib2 a = a+c| 0m14.38s |
| zcc z88dk v16209 | Fib2 a = a+c| 0m49.19s |
| zcc z88dk v16209 | Fib2 a+= c | 0m43.97s |
| zcc z88dk v19766 -O2 | Fib2 a+= c | 0m36.09s | 2222