simpleForth/simpleForth.c

977 lines
20 KiB
C

#include <stdio.h>
#include <stddef.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <termios.h>
#include <unistd.h>
#include <fcntl.h>
/*
* Constants and Helper Macros
*/
#define TRACE 0
#define TRACE2 0
#define CELLCOUNT 0x1000
#define CELLL (sizeof (uintptr_t))
#define FALSE 0
#define TRUE -1
#define IMM_MASK 0x80
#define BACKSPACE_IN 127
#define BACKSPACE_OUT 8
#define INC(lval) do { lval += sizeof (lval); } while (0)
#define DEC(lval) do { lval -= sizeof (lval); } while (0)
#define AT(addr) (*((Cell *)addr))
#define C_AT(addr) (*((char *)addr))
#define HEADER_AT(addr) ((DictEntry *)( addr ))
#define RPUSH(rval) do { \
AT(FS.RP) = rval; \
DEC(FS.RP); \
} while (0)
#define RPOP(lval) do { \
INC(FS.RP); \
lval = AT(FS.RP); \
} while (0)
#define SPUSH(rval) do { \
AT(FS.SP) = rval; \
DEC(FS.SP); \
} while (0)
#define SPOP(lval) do { \
INC(FS.SP); \
lval = AT(FS.SP); \
} while (0)
#define NEXT return
/*
* Core System Datatypes
*/
typedef uintptr_t Cell;
typedef void (*Code)(void);
typedef Code * XT;
#define HDRNAMESZ 7
typedef struct forthDictEntry DictEntry;
struct forthDictEntry {
union {
struct {
unsigned char nameLength;
unsigned char nameBegin[HDRNAMESZ];
};
Cell padding[2];
};
DictEntry *link;
Code codeAddress;
};
typedef struct forthState {
/* Interpreter Registers */
Cell IP; /* Interpreter Pointer */
Cell WP; /* Word Pointer */
Cell SP; /* Argument Stack Pointer */
Cell RP; /* Return Stack Pointer */
int running;
} ForthState;
/*
* Interpreter State and Main Memory
*/
ForthState FS;
Cell FM[CELLCOUNT];
char TIB[128];
DictEntry **VOCAB = (DictEntry **)&FM[0];
Cell *BASE = &FM[1];
Cell *NUM_TIB = &FM[2];
Cell *TO_IN = &FM[3];
Cell *STATE = &FM[4];
Cell *DP = &FM[5];
Cell *SP0 = &FM[6];
Cell *RP0 = &FM[7];
Cell *DP0 = &FM[8];
Cell *IP0 = &FM[9];
/*
* Utility and I/O Functions
*/
void dumpStack(void) {
Cell *BOS = (Cell *)(FM + CELLCOUNT - 1);
Cell *TOS = (Cell *)(FS.SP);
Cell val;
int depth = (BOS-TOS);
int i;
for (i = 0; i < depth; i++) {
val = AT(BOS-i);
fprintf(stderr, "%d: %lu\n", i, val);
}
}
DictEntry *findWord(int len, char *name, DictEntry *vocab) {
char length;
length = (len > HDRNAMESZ) ? HDRNAMESZ : len;
while (vocab != NULL) {
if ((vocab->nameLength & ~(IMM_MASK)) == len) {
if (strncmp(name, (char*)vocab->nameBegin, length) == 0) {
return vocab;
}
}
vocab = vocab->link;
}
return NULL;
}
char rxchar(void) {
int rv;
char b;
struct termios oldt, newt;
tcgetattr(STDIN_FILENO, &oldt);
newt = oldt;
newt.c_lflag &= ~( ICANON | ECHO );
tcsetattr(STDIN_FILENO, TCSANOW, &newt);
rv = read(STDIN_FILENO, &b, 1);
tcsetattr(STDIN_FILENO, TCSANOW, &oldt);
return b;
}
void txchar(char b) {
int rv;
struct termios oldt, newt;
tcgetattr(STDIN_FILENO, &oldt);
newt = oldt;
newt.c_lflag &= ~( ICANON | ECHO );
tcsetattr(STDIN_FILENO, TCSANOW, &newt);
rv = write(STDIN_FILENO, &b, 1);
tcsetattr(STDIN_FILENO, TCSANOW, &oldt);
}
/*
* Terminal IO Primitives
*/
// accept ( a l -- l2 ) Read a string of up to l characters, storing it at a.
// Return the length of the string actually read.
void ACCEPT(void) {
Cell addr, len;
char *inbuf, in;
int incount = 0;
SPOP(len);
SPOP(addr);
inbuf = (char*)addr;
for (;;) {
in = rxchar();
switch (in) {
case BACKSPACE_IN:
if (incount > 0) {
txchar(BACKSPACE_OUT);
txchar(' ');
txchar(BACKSPACE_OUT);
incount--;
}
break;
case '\n':
if (incount > 0) {
goto finished;
}
break;
default:
if (incount < (int)len) {
inbuf[incount] = in;
incount++;
txchar(in);
}
break;
}
}
finished:
txchar('\n');
SPUSH(incount);
NEXT;
}
// word ( c -- cstring ) Consume the next word in TIB as delimited by c,
// returning the address of the word as a counted string.
void WORD(void) {
Cell cell;
char delim, *ch, *last, *lenp, *strp;
SPOP(cell);
delim = cell;
ch = TIB + *TO_IN;
last = TIB + *NUM_TIB;
lenp = (char*)*DP;
strp = lenp + 1;
*lenp = 0;
/* skip leading delimiters */
while (*ch == delim) {
ch++;
}
/* copy to pad area until a delimiter is reached */
for (;;) {
if (ch == last) {
break;
}
if (*ch == delim) {
break;
}
*strp = *ch;
strp++; ch++;
*lenp += 1;
}
*strp = ' ';
*TO_IN = ch - TIB;
cell = (Cell)lenp;
SPUSH(cell);
NEXT;
}
// emit ( c -- ) display c on the terminal
void EMIT(void) {
Cell cell;
char c;
SPOP(cell);
c = cell;
txchar(c);
NEXT;
}
void NL(void) {
txchar('\n');
NEXT;
}
void DOT(void) {
Cell a;
char buf[64];
Cell i;
SPOP(a);
sprintf(buf, "%ld", (long)a);
for (i = 0; i < strlen(buf); i++) {
txchar(buf[i]);
}
txchar('\n');
NEXT;
}
/*
* Memory Access Primitives
*/
// @ ( addr -- x ) read x from addr
void FETCH(void) {
Cell addr, val;
SPOP(addr);
val = AT(addr);
SPUSH(val);
NEXT;
}
// ! ( x addr -- ) store x at addr
void STORE(void) {
Cell addr, val;
SPOP(addr);
SPOP(val);
AT(addr) = val;
NEXT;
}
// c@ ( addr -- c ) read character from addr
void CFETCH(void) {
Cell addr;
char c;
SPOP(addr);
c = AT(addr);
SPUSH(c);
NEXT;
}
/*
* Stack Manipulation Primitives
*/
// drop ( x -- ) drop x from stack
void DROP(void) {
INC(FS.SP);
NEXT;
}
// dup ( x -- x x ) push a copy of x to stack
void DUP(void) {
Cell addr, val;
addr = FS.SP;
INC(addr);
val = AT(addr);
SPUSH(val);
NEXT;
}
// swap ( x y -- y x ) swap top two values on stack
void SWAP(void) {
Cell vx, vy;
SPOP(vy);
SPOP(vx);
SPUSH(vy);
SPUSH(vx);
NEXT;
}
// rot ( x y z -- y z x ) rotate-left top 3 values on stack
void ROT(void) {
Cell vx, vy, vz;
SPOP(vz);
SPOP(vy);
SPOP(vx);
SPUSH(vy);
SPUSH(vz);
SPUSH(vx);
NEXT;
}
/*
* Flow Control Primitives
*/
// next ( -- ) Decrement index on return stack and exit loop if < 0
void DONEXT(void) {
AT(FS.RP) -= 1;
if ((int)AT(FS.RP) < 0) {
RPOP(FS.RP);
INC(FS.IP);
} else {
FS.IP = AT(FS.IP);
}
NEXT;
}
// ?branch ( f -- ) Branch if flag is 0
void DOQBRANCH(void) {
Cell flag;
SPOP(flag);
if (flag == FALSE) {
FS.IP = AT(FS.IP);
} else {
INC(FS.IP);
}
NEXT;
}
// branch ( -- ) Branch to an inline address.
void DOBRANCH(void) {
FS.IP = AT(FS.IP);
NEXT;
}
/*
* Compiling Primitives
*/
// , ( x -- ) Compile x to the current definition
void COMMA(void) {
Cell x;
SPOP(x);
AT(*DP) = x;
*DP += sizeof x;
NEXT;
}
// c, ( c -- ) Compile character c to the current definition
void CCOMMA(void) {
char c;
SPOP(c);
C_AT(*DP) = c;
*DP += 1;
NEXT;
}
/* DOVAR is the machine code for variable definitions. When it is executed, WP
will be pointing at the codeAddress field of its dictionary entry, so the
next cell will be the variable cell. DOVAR pushes the address of the variable
cell to the data stack. */
void DOVAR(void) {
Cell LP;
LP = FS.WP;
INC(LP);
SPUSH(LP);
NEXT;
}
// create ( -- ) build a header for a new word in the dictionary, taking the
// name as the next word in the input buffer
void CREATE(void) {
DictEntry *newHead = HEADER_AT(*DP);
Cell cell;
char *name;
int len;
SPUSH(' ');
WORD();
SPOP(cell);
name = (char *)(cell + 1);
len = *(char *)(cell);
newHead->link = HEADER_AT(*VOCAB);
newHead->codeAddress = DOVAR;
*VOCAB = newHead;
*DP += sizeof (DictEntry);
NEXT;
}
void DOCOL(void);
// : ( -- ) start defining a new word.
void COLON(void) {
*STATE = -1;
CREATE();
(*VOCAB)->codeAddress = DOCOL;
NEXT;
}
void compileCode(XT codeAddress);
void compileLiteral(Cell literal);
static XT DOEXIT_XT;
// ; ( -- ) finish defining a new word.
void SEMICOLON(void) {
compileCode(DOEXIT_XT);
*STATE = 0;
NEXT;
}
void DOCONST(void);
// constant ( x -- ) create a new constant of value x, with name from input buffer
void CONSTANT(void) {
Cell value;
SPOP(value);
CREATE();
(*VOCAB)->codeAddress = DOCONST;
AT(*DP) = value;
*DP += sizeof value;
NEXT;
}
/*
* Math Primitives
*/
// + ( x y -- z ) \ Push the result of x + y
void PLUS(void) {
Cell a, b;
SPOP(a);
SPOP(b);
a = a + b;
SPUSH(a);
NEXT;
}
// = ( x y -- f ) \ Push true if x = y, false otherwise
void EQUALS(void) {
Cell x, y, f;
SPOP(y);
SPOP(x);
f = (x == y) ? TRUE : FALSE;
SPUSH(f);
NEXT;
}
/*
* String Primitives
*/
// count ( a -- a2 len ) a is addr of counted string; a2 is addr of first char
void COUNT(void) {
char *a, *a2;
Cell len, addr;
SPOP(addr);
a = (char *)addr;
len = a[0];
a2 = a+1;
SPUSH((Cell)a2);
SPUSH(len);
}
/* >number ( n a l -- n2 a2 l2 ) Convert string to number; n is starting number,
a is addr of first char of string, l is number of chars in string. If
successful, n2 is the converted number, l2 is zero, and a2 points just
after the last char in the string. If unsuccessful, l2 is the non-zero
length of the remaining characters in the string, and a2 is the address
of the character that caused conversion to fail. */
void TONUMBER(void) {
Cell num, addr, len, num2;
char *str, *end;
char convbuf[128];
SPOP(len);
SPOP(addr);
SPOP(num);
memcpy(convbuf, (char*)addr, len);
convbuf[len] = '\0';
str = convbuf;
num2 = strtol(str, &end, *BASE);
if (*end == '\0') {
addr += len;
num += num2;
len = 0;
} else {
len = len - (end - str);
addr += len;
}
SPUSH(num);
SPUSH(addr);
SPUSH(len);
NEXT;
}
/*
* Dictionary Lookup Primitive
*/
// find ( cstring -- a2 flag ) look up a word in the dictionary.
// If found, a2 is XT of the word; flag is 1 for imm, -1 otherwise
// If not found, a2 is cstring and flag is 0
void FIND(void) {
Cell cstr;
int len;
char *name;
DictEntry *hdr;
SPOP(cstr);
len = ((char*)cstr)[0];
name = ((char*)cstr)+1;
hdr = findWord(len, name, *VOCAB);
if (hdr == NULL) {
SPUSH(cstr);
SPUSH(0);
} else if (hdr->nameLength & IMM_MASK) {
SPUSH((Cell)&hdr->codeAddress);
SPUSH(1);
} else {
SPUSH((Cell)&hdr->codeAddress);
SPUSH((Cell)-1);
}
NEXT;
}
/*
* Inner Interpreter Primitives
*/
/* DOCOL pushes IP onto the R stack, sets IP to the next cell after WP, and
jumps to NEXT */
void DOCOL(void) {
RPUSH(FS.IP);
FS.IP = FS.WP;
INC(FS.IP);
NEXT;
}
/* EXIT terminates a colon word, popping IP from R stack. */
void EXIT(void) {
RPOP(FS.IP);
NEXT;
}
/* DOLIT fetches the value pointed to by IP, pushes it to the data stack, and
increments IP; it is used for in-line literals in a secondary definition */
void DOLIT(void) {
Cell L;
L = AT(FS.IP);
SPUSH(L);
INC(FS.IP);
NEXT;
}
static Code DOLIT_codeAddress = DOLIT;
/* DOCONST is like DOVAR, but it pushes the value of the cell after the
codeAddress rather than the address of the cell */
void DOCONST(void) {
Cell LP, L;
LP = FS.WP;
INC(LP);
L = AT(LP);
SPUSH(L);
NEXT;
}
/* BYE instructs the inner interpreter to stop */
void BYE(void) {
FS.running = 0;
NEXT;
}
/* NEXT extracts the word address of the next instruction pointed to by IP,
stores it in WP, and increments IP to the next cell. It then retrieves the
address stored at the location indicated by WP and jumps to that address. */
void inner_interpreter(void) {
Code XP;
#if TRACE2
DictEntry *hdr;
char buf[HDRNAMESZ+1];
int sz;
#endif
while (FS.running) {
/* The body of this loop constitutes NEXT */
FS.WP = AT(FS.IP);
#if TRACE2
if (FS.WP == (Cell)&DOLIT_codeAddress) {
memcpy(buf, "LIT", 4);
} else {
hdr = HEADER_AT(FS.WP-(2* sizeof (Cell)));
sz = hdr->nameLength; sz = (sz > HDRNAMESZ) ? HDRNAMESZ : sz;
memcpy(buf, hdr->nameBegin, sz); buf[sz] = '\0';
}
fprintf(stderr, "-----------------\n");
fprintf(stderr, "\nExecuting word %s\n", buf);
fprintf(stderr, "Stack before:\n");
dumpStack();
#endif
INC(FS.IP);
XP = (Code)AT(FS.WP);
XP();
#if TRACE2
fprintf(stderr, "Stack after:\n");
dumpStack();
fprintf(stderr, "-----------------\n");
#endif
}
}
/* EXECUTE pops a code address from the data stack into the IP, and jumps to
NEXT */
void EXECUTE(void) {
Code XP;
#if TRACE
DictEntry *hdr;
char buf[HDRNAMESZ+1];
int sz;
#endif
SPOP(FS.WP);
#if TRACE
if (FS.WP == (Cell)&DOLIT_codeAddress) {
memcpy(buf, "LIT", 4);
} else {
hdr = HEADER_AT(FS.WP-(2* sizeof (Cell)));
sz = hdr->nameLength; sz = (sz > HDRNAMESZ) ? HDRNAMESZ : sz;
memcpy(buf, hdr->nameBegin, sz); buf[sz] = '\0';
}
fprintf(stderr, "-----------------\n");
fprintf(stderr, "EXECUTE: Executing word %s\n", buf);
fprintf(stderr, "Stack before:\n");
dumpStack();
#endif
XP = (Code)AT(FS.WP);
XP();
#if TRACE
fprintf(stderr, "Stack after:\n");
dumpStack();
fprintf(stderr, "-----------------\n");
#endif
NEXT;
}
/* ABORT resets the interpreter state and resumes execution at COLD */
void ABORT(void) {
FS.SP = *SP0;
FS.RP = *RP0;
FS.IP = *IP0;
*NUM_TIB = 0;
*TO_IN = 0;
*STATE = 0;
NEXT;
}
/*
* Bootstrap Helper Functions
*/
void compileHeader(const char *name, Code code) {
DictEntry *newHead = HEADER_AT(*DP);
int i, len;
len = strlen(name);
newHead->nameLength = len;
for (i = 0; i < len && i < HDRNAMESZ; i++) {
newHead->nameBegin[i] = name[i];
}
newHead->link = HEADER_AT(*VOCAB);
newHead->codeAddress = code;
*VOCAB = newHead;
*DP += sizeof (DictEntry);
}
void compileCode(XT codeAddress) {
AT(*DP) = (Cell)codeAddress;
*DP += sizeof codeAddress;
}
void compileLiteral(Cell literal) {
AT(*DP) = (Cell)&DOLIT_codeAddress;
*DP += sizeof literal;
AT(*DP) = literal;
*DP += sizeof literal;
}
XT lookupWord(const char *name, DictEntry *vocab) {
int len;
DictEntry *hdr;
len = strlen(name);
hdr = findWord(len, (char *)name, vocab);
if (hdr != NULL) {
return &hdr->codeAddress;
} else {
return NULL;
}
}
void addPrimitive(const char *name, Code code) {
compileHeader(name, code);
}
void addImmPrimitive(const char *name, Code code) {
compileHeader(name, code);
(*VOCAB)->nameLength |= IMM_MASK;
}
void addConstant(const char *name, Cell value) {
compileHeader(name, DOCONST);
AT(*DP) = value;
*DP += sizeof value;
}
Cell cW(const char *name) {
XT code;
Cell codeAddr = 0;
code = lookupWord(name, *VOCAB);
if (code != NULL) {
codeAddr = *DP;
compileCode(code);
} else {
fprintf(stderr, "Error: %s not in dictionary\n", name);
exit(1);
}
return codeAddr;
}
Cell *cB(const char *name) {
Cell labelAddr;
cW(name);
labelAddr = *DP;
*DP += sizeof (Cell);
return (Cell *)labelAddr;
}
Cell cL(Cell literal) {
Cell codeAddr = *DP;
compileLiteral(literal);
return codeAddr;
}
void buildOuterInterpreter(void) {
Cell interpt, intpar, intexc, intnf, intnc, intskip, intdone;
Cell *b_interpt, *b_intpar, *b_intexc, *b_intnf, *b_intnc,
*b_intskip, *b_intdone, *b_intdone2, *b_intdone3;
/*
: INTERPRET ( -- )
BEGIN #TIB @ >IN @ = IF
TIB 50 ACCEPT #TIB ! 0 >IN ! / Refill Input Buffer
THEN 32 WORD FIND DUP IF
STATE @ = IF , ELSE EXECUTE THEN / Compile or Execute Word
ELSE
SWAP COUNT >NUMBER IF / Not a number; cleanup and abort
STATE @ IF VOCAB @ DUP CELLL + @ VOCAB ! DP !
THEN ABORT
ELSE / A number; compile or retain on stack
DROP STATE @ IF ['] DOLIT , , THEN
THEN
THEN AGAIN / Loop forever!
*/
compileHeader("INTERPRET", DOCOL);
interpt = cW("#TIB");
cW("@");
cW(">IN");
cW("@");
cW("=");
b_intpar = cB("?BRANCH");
cW("TIB");
cL(50);
cW("ACCEPT");
cW("#TIB");
cW("!");
cL(0);
cW(">IN");
cW("!");
intpar = cL(32);
cW("WORD");
cW("FIND");
cW("DUP");
b_intnf = cB("?BRANCH");
cW("STATE");
cW("@");
cW("=");
b_intexc = cB("?BRANCH");
cW(",");
b_intdone = cB("BRANCH");
intexc = cW("EXECUTE");
b_intdone2 = cB("BRANCH");
intnf = cW("SWAP");
cW("COUNT");
cW(">NUMBER");
b_intskip = cB("?BRANCH");
cW("STATE");
cW("@");
b_intnc = cB("?BRANCH");
cW("VOCAB");
cW("@");
cW("DUP");
cL(sizeof (Cell));
cW("+");
cW("@");
cW("VOCAB");
cW("!");
cL((Cell)&(*DP));
cW("!");
intnc = cW("ABORT");
intskip = cW("DROP");
cW("STATE");
cW("@");
b_intdone3 = cB("?BRANCH");
cL((Cell)&DOLIT_codeAddress);
cW(",");
cW(","); intdone = *DP;
b_interpt = cB("BRANCH");
cW("EXIT");
compileHeader("COLD", DOCOL);
cW("INTERPRET");
cW("EXIT");
/* Fix up branch targets */
*b_interpt = interpt;
*b_intpar = intpar;
*b_intexc = intexc;
*b_intnf = intnf;
*b_intnc = intnc;
*b_intskip = intskip;
*b_intdone = intdone;
*b_intdone2 = intdone;
*b_intdone3 = intdone;
}
void bootstrapCore(void) {
FS.SP = *SP0 = (Cell)(FM + CELLCOUNT - 1);
FS.RP = *RP0 = (Cell)(FM + CELLCOUNT - 65);
*DP = *DP0 = (Cell)(FM + 128);
*VOCAB = NULL;
/* Inner Interpreter Primitives */
addPrimitive("EXECUTE", EXECUTE);
addPrimitive("BYE", BYE);
addPrimitive("EXIT", EXIT);
DOEXIT_XT = &(*VOCAB)->codeAddress;
/* Core address constants */
addConstant("TIB", (Cell)TIB);
addConstant("VOCAB", (Cell)VOCAB);
addConstant("BASE", (Cell)BASE);
addConstant("#TIB", (Cell)NUM_TIB);
addConstant(">IN", (Cell)TO_IN);
addConstant("STATE", (Cell)STATE);
addConstant("DP", (Cell)DP);
addConstant("SP0", (Cell)SP0);
addConstant("RP0", (Cell)RP0);
/* Terminal I/O Primitives */
addPrimitive("ACCEPT", ACCEPT);
addPrimitive("WORD", WORD);
addPrimitive("EMIT", EMIT);
addPrimitive("NL", NL);
addPrimitive(".", DOT);
/* Memory Access Primitives */
addPrimitive("@", FETCH);
addPrimitive("!", STORE);
addPrimitive("c@", CFETCH);
/* Stack Manipulation Primitives */
addPrimitive("DROP", DROP);
addPrimitive("DUP", DUP);
addPrimitive("SWAP", SWAP);
addPrimitive("ROT", ROT);
/* Flow Control Primitives */
addPrimitive("DONEXT", DONEXT);
addPrimitive("?BRANCH", DOQBRANCH);
addPrimitive("BRANCH", DOBRANCH);
/* Compiling Primitives */
addPrimitive(",", COMMA);
addPrimitive("c,", CCOMMA);
addPrimitive("CREATE", CREATE);
addPrimitive(":", COLON);
addImmPrimitive(";", SEMICOLON);
addPrimitive("CONSTANT", CONSTANT);
/* Math Primitives */
addPrimitive("+", PLUS);
addPrimitive("=", EQUALS);
/* String Primitives */
addPrimitive("COUNT", COUNT);
addPrimitive(">NUMBER", TONUMBER);
/* Dictionary Search Primitives */
addPrimitive("FIND", FIND);
/* Outer Interpreter */
addPrimitive("ABORT", ABORT);
buildOuterInterpreter();
*IP0 = (Cell)((Cell *)lookupWord("COLD", *VOCAB) + 1);
}
void initialize(void) {
/* Set the running state to a true value */
FS.running = 1;
ABORT();
}
int main(void) {
bootstrapCore();
initialize();
inner_interpreter();
return 0;
}