From 62066d614f196bda9d4889498b00333059240fd4 Mon Sep 17 00:00:00 2001 From: Payton Turnage Date: Sun, 12 Jan 2014 20:00:20 -0500 Subject: [PATCH 01/15] Add a structure for searching strings. The tvm_tree structure is a binary search tree. It will be used to hold preprocessor defines, and variable names for when defining bytes, words, and double words is implemented. Each node structure and its own string are stored contiguously (in that order) so the free's are easier to keep track of, and memory doesn't need to be a concern when adding a string to the tree. --- include/tvm/tvm_tree.h | 16 ++++++++ libtvm/tvm_tree.c | 93 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 109 insertions(+) create mode 100644 include/tvm/tvm_tree.h create mode 100644 libtvm/tvm_tree.c diff --git a/include/tvm/tvm_tree.h b/include/tvm/tvm_tree.h new file mode 100644 index 0000000..f24c94a --- /dev/null +++ b/include/tvm/tvm_tree.h @@ -0,0 +1,16 @@ +#ifndef TVM_TREE_H_ +#define TVM_TREE_H_ + +typedef struct tvm_tree_t +{ + char *str; + struct tvm_tree_t *left; + struct tvm_tree_t *right; +} tvm_tree_t; + +tvm_tree_t *tvm_tree_create(tvm_tree_t *node, const char *str); +int tvm_tree_add(tvm_tree_t *node, const char *str); +int tvm_tree_find(tvm_tree_t *node, const char *str); +int tvm_tree_destroy(tvm_tree_t *node); + +#endif \ No newline at end of file diff --git a/libtvm/tvm_tree.c b/libtvm/tvm_tree.c new file mode 100644 index 0000000..e0d6ee4 --- /dev/null +++ b/libtvm/tvm_tree.c @@ -0,0 +1,93 @@ +#include +#include + +#include + +tvm_tree_t *tvm_tree_create(tvm_tree_t *node, const char *str) +{ + int len = strlen(str); + node = calloc(1, sizeof(tvm_tree_t) + len + 1); + node->str = (char *)node + sizeof(tvm_tree_t); + memcpy(node->str, str, len); + + return node; +} + +int tvm_tree_add(tvm_tree_t *node, const char *str) +{ + if (!str || !node) + return 1; + + int done = 0, diff; + tvm_tree_t *current = node; + while(!done) + { + diff = strcmp(current->str, str); + if(!diff) + done = 1; + else if(diff < 0) + { + if(current->left) + { + current = current->left; + } + else + { + current->left = tvm_tree_create(NULL, str); + done = 1; + } + } + else if(diff > 0) + { + if(current->right) + { + current = current->right; + } + else + { + current->right = tvm_tree_create(NULL, str); + done = 1; + } + } + } + + /* String already exists in the tree. */ + if(!diff) + return 2; + + return 0; +} + +int tvm_tree_find(tvm_tree_t *node, const char *str) +{ + if(!node || !str) + return 0; + + int diff, found = 0; + tvm_tree_t *current = node; + while(current && !found) + { + diff = strcmp(current->str, str); + if(!diff) + found = 1; + else if(diff < 0) + current = current->left; + else if(diff > 0) + current = current->right; + } + + return found; +} + +void tvm_destroy(tvm_tree_t *node) +{ + if(node) + { + if (node->right) + tvm_destroy(node->right); + if (node->left) + tvm_destroy(node->left); + + free(node); + } +} \ No newline at end of file From 58f11eba575dab1331974b9d28d6d0952276c276 Mon Sep 17 00:00:00 2001 From: Payton Turnage Date: Sun, 12 Jan 2014 20:30:04 -0500 Subject: [PATCH 02/15] Enable keeping track of associated values. The tvm_tree structure should optionally be able to keep track of values associated with the strings by which its nodes are sorted. In the case of defines, this is the replacement string. In the case of variables, this will be a pointer to the variable's location in memory. Searching should return the value, or NULL. To opt out of storing a value, pass NULL and 0 as the val and len arguments. --- include/tvm/tvm_tree.h | 9 +++++---- libtvm/tvm_tree.c | 38 +++++++++++++++++++++++--------------- 2 files changed, 28 insertions(+), 19 deletions(-) diff --git a/include/tvm/tvm_tree.h b/include/tvm/tvm_tree.h index f24c94a..f8ecf5a 100644 --- a/include/tvm/tvm_tree.h +++ b/include/tvm/tvm_tree.h @@ -3,14 +3,15 @@ typedef struct tvm_tree_t { - char *str; + char *keystr; + char *val; struct tvm_tree_t *left; struct tvm_tree_t *right; } tvm_tree_t; -tvm_tree_t *tvm_tree_create(tvm_tree_t *node, const char *str); -int tvm_tree_add(tvm_tree_t *node, const char *str); -int tvm_tree_find(tvm_tree_t *node, const char *str); +tvm_tree_t *tvm_tree_create(tvm_tree_t *node, const char *str, const void *val, int len); +int tvm_tree_add(tvm_tree_t *node, const char *str, const void *val, int len); +void *tvm_tree_find(tvm_tree_t *node, const char *str); int tvm_tree_destroy(tvm_tree_t *node); #endif \ No newline at end of file diff --git a/libtvm/tvm_tree.c b/libtvm/tvm_tree.c index e0d6ee4..70ba744 100644 --- a/libtvm/tvm_tree.c +++ b/libtvm/tvm_tree.c @@ -3,17 +3,24 @@ #include -tvm_tree_t *tvm_tree_create(tvm_tree_t *node, const char *str) +tvm_tree_t *tvm_tree_create(tvm_tree_t *node, const char *str, const void *val, int len) { - int len = strlen(str); - node = calloc(1, sizeof(tvm_tree_t) + len + 1); - node->str = (char *)node + sizeof(tvm_tree_t); - memcpy(node->str, str, len); + int keylen = strlen(str); + node = calloc(1, sizeof(tvm_tree_t) + len + keylen + 1); + + if(val && len) + { + node->val = (char *)node + sizeof(tvm_tree_t); + memcpy(node->val, val, len); + } + + node->keystr = (char *)node + sizeof(tvm_tree_t) + len; + memcpy(node->keystr, str, keylen); return node; } -int tvm_tree_add(tvm_tree_t *node, const char *str) +int tvm_tree_add(tvm_tree_t *node, const char *str, const void *val, int len) { if (!str || !node) return 1; @@ -22,7 +29,7 @@ int tvm_tree_add(tvm_tree_t *node, const char *str) tvm_tree_t *current = node; while(!done) { - diff = strcmp(current->str, str); + diff = strcmp(current->keystr, str); if(!diff) done = 1; else if(diff < 0) @@ -33,7 +40,7 @@ int tvm_tree_add(tvm_tree_t *node, const char *str) } else { - current->left = tvm_tree_create(NULL, str); + current->left = tvm_tree_create(NULL, str, val, len); done = 1; } } @@ -45,7 +52,7 @@ int tvm_tree_add(tvm_tree_t *node, const char *str) } else { - current->right = tvm_tree_create(NULL, str); + current->right = tvm_tree_create(NULL, str, val, len); done = 1; } } @@ -58,25 +65,26 @@ int tvm_tree_add(tvm_tree_t *node, const char *str) return 0; } -int tvm_tree_find(tvm_tree_t *node, const char *str) +void *tvm_tree_find(tvm_tree_t *node, const char *str) { if(!node || !str) return 0; - int diff, found = 0; + int diff; + void *value = NULL; tvm_tree_t *current = node; - while(current && !found) + while(current && !value) { - diff = strcmp(current->str, str); + diff = strcmp(current->keystr, str); if(!diff) - found = 1; + value = current->val; else if(diff < 0) current = current->left; else if(diff > 0) current = current->right; } - return found; + return value; } void tvm_destroy(tvm_tree_t *node) From 9c9eece4d9ae6b2850fc6e3e0d3fea1f678890dd Mon Sep 17 00:00:00 2001 From: Payton Turnage Date: Sun, 12 Jan 2014 21:04:56 -0500 Subject: [PATCH 03/15] Fix naming conflict. --- include/tvm/tvm_tree.h | 2 +- libtvm/tvm_tree.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/tvm/tvm_tree.h b/include/tvm/tvm_tree.h index f8ecf5a..d450893 100644 --- a/include/tvm/tvm_tree.h +++ b/include/tvm/tvm_tree.h @@ -12,6 +12,6 @@ typedef struct tvm_tree_t tvm_tree_t *tvm_tree_create(tvm_tree_t *node, const char *str, const void *val, int len); int tvm_tree_add(tvm_tree_t *node, const char *str, const void *val, int len); void *tvm_tree_find(tvm_tree_t *node, const char *str); -int tvm_tree_destroy(tvm_tree_t *node); +void tvm_tree_destroy(tvm_tree_t *node); #endif \ No newline at end of file diff --git a/libtvm/tvm_tree.c b/libtvm/tvm_tree.c index 70ba744..77cb96f 100644 --- a/libtvm/tvm_tree.c +++ b/libtvm/tvm_tree.c @@ -87,14 +87,14 @@ void *tvm_tree_find(tvm_tree_t *node, const char *str) return value; } -void tvm_destroy(tvm_tree_t *node) +void tvm_tree_destroy(tvm_tree_t *node) { if(node) { if (node->right) - tvm_destroy(node->right); + tvm_tree_destroy(node->right); if (node->left) - tvm_destroy(node->left); + tvm_tree_destroy(node->left); free(node); } From 23e72a8ca7fc149008852707e658769dbfabfc93 Mon Sep 17 00:00:00 2001 From: Payton Turnage Date: Sun, 12 Jan 2014 21:11:31 -0500 Subject: [PATCH 04/15] Implement preprocessor defines. This commit adds behavior to the preprocessor which fills a tree with defines and their replacements. In future commits, the parser will substitute instances of the defines with their replacements in the source code. --- include/tvm/tvm_preprocessor.h | 4 ++- include/tvm/tvm_program.h | 3 ++ libtvm/tvm_preprocessor.c | 56 +++++++++++++++++++++++++++++++++- libtvm/tvm_program.c | 3 +- 4 files changed, 63 insertions(+), 3 deletions(-) diff --git a/include/tvm/tvm_preprocessor.h b/include/tvm/tvm_preprocessor.h index e999950..2b5c41a 100644 --- a/include/tvm/tvm_preprocessor.h +++ b/include/tvm/tvm_preprocessor.h @@ -1,6 +1,8 @@ #ifndef TVM_PREPROCESSOR_H_ #define TVM_PREPROCESSOR_H_ -int tvm_preprocess(char *src, int *src_len); +#include "tvm_tree.h" + +int tvm_preprocess(char *src, int *src_len, tvm_tree_t **node); #endif diff --git a/include/tvm/tvm_program.h b/include/tvm/tvm_program.h index e5722f1..3201e51 100644 --- a/include/tvm/tvm_program.h +++ b/include/tvm/tvm_program.h @@ -7,6 +7,7 @@ #include "tvm_htab.h" #include "tvm_memory.h" +#include "tvm_tree.h" typedef struct tvm_program_s { @@ -20,6 +21,8 @@ typedef struct tvm_program_s int **values; int num_values; + tvm_tree_t *defines; + tvm_htab_t *label_htab; } tvm_program_t; diff --git a/libtvm/tvm_preprocessor.c b/libtvm/tvm_preprocessor.c index 6718fc4..ed57097 100644 --- a/libtvm/tvm_preprocessor.c +++ b/libtvm/tvm_preprocessor.c @@ -3,7 +3,7 @@ #include -int tvm_preprocess(char *src, int *src_len) +int tvm_preprocess(char *src, int *src_len, tvm_tree_t **node) { char* pp_directive_delimiter = NULL; if((pp_directive_delimiter = strstr(src, "%include"))) @@ -48,6 +48,60 @@ int tvm_preprocess(char *src, int *src_len) *src_len = strlen(src); return 1; } + else if((pp_directive_delimiter = strstr(src, "%define "))) + { + char *begin = pp_directive_delimiter; + char *end = strchr(begin, '\n'); + + if(!end) return 0; + + int offset = strlen("%define "); + + if(begin + offset >= end) + { + printf("Define missing arguments.\n"); + return 0; + } + + int length = (end - (begin + offset)); + char tempstr[length + 1]; + memset(tempstr, 0, length + 1); + memcpy(tempstr, begin + offset, length); + + char *keystr = tempstr; + char *valstr = strchr(tempstr, ' '); + + /* If there is a value, seperate the key and value + with a null character. */ + if(valstr) + { + *valstr = 0; + valstr += 1; + } + + if(!keystr || !valstr) + { + printf("Define missing arguments.\n"); + return 0; + } + + if(!*node) + *node = tvm_tree_create(*node, keystr, valstr, strlen(valstr)); + else + tvm_tree_add(*node, keystr, valstr, strlen(valstr)); + + /* Remove the define line so it is not processed again. */ + size_t new_length = *src_len - (end - begin); + size_t first_block_len = begin - src; + size_t second_block_len = (src + *src_len) - end; + + memmove(&src[first_block_len], end, second_block_len); + + src = realloc(src, sizeof(char) * new_length); + *src_len = new_length; + + return 1; + } return 0; diff --git a/libtvm/tvm_program.c b/libtvm/tvm_program.c index b8ee315..b9f8448 100644 --- a/libtvm/tvm_program.c +++ b/libtvm/tvm_program.c @@ -15,6 +15,7 @@ tvm_program_t *program_create() void program_destroy(tvm_program_t *p) { htab_destroy(p->label_htab); + tvm_tree_destroy(p->defines); if(p->values) { @@ -54,7 +55,7 @@ pi_interpret: tvm_fcopy(source, source_length, pFile); fclose(pFile); - while(tvm_preprocess(source, &source_length)); + while(tvm_preprocess(source, &source_length, &p->defines)); tvm_lexer_t *lexer_ctx = lexer_create(); lex(lexer_ctx, source); From c80d6974cbd53345b3fe4427073d9e0cf206e883 Mon Sep 17 00:00:00 2001 From: Payton Turnage Date: Sun, 12 Jan 2014 21:43:57 -0500 Subject: [PATCH 05/15] Substitute defines with their strings. Before allocating space for a token, the lexer will first check to see if that token is a defined name. If it is, it will allocate space for the defined string instead. A test file is included in programs/tinyvm/preprocessor to demonstrate the behavior. When functioning, the program will print the fibonacci sequence. --- include/tvm/tvm_lexer.h | 4 +++- libtvm/tvm_lexer.c | 10 +++++++--- libtvm/tvm_program.c | 2 +- programs/tinyvm/preprocessor/define.vm | 20 ++++++++++++++++++++ 4 files changed, 31 insertions(+), 5 deletions(-) create mode 100644 programs/tinyvm/preprocessor/define.vm diff --git a/include/tvm/tvm_lexer.h b/include/tvm/tvm_lexer.h index 6b28a6b..d10cfbc 100644 --- a/include/tvm/tvm_lexer.h +++ b/include/tvm/tvm_lexer.h @@ -4,6 +4,8 @@ #define MAX_ARGS 2 #define MAX_TOKENS 4 +#include "tvm_tree.h" + typedef struct tvm_lexer_s { char **source_lines; @@ -14,6 +16,6 @@ tvm_lexer_t *lexer_create(); void lexer_destroy(tvm_lexer_t *l); /* Tokenize the character array "source" into lines and tokens */ -void lex(tvm_lexer_t *lexer, char *source); +void lex(tvm_lexer_t *lexer, char *source, tvm_tree_t **node); #endif diff --git a/libtvm/tvm_lexer.c b/libtvm/tvm_lexer.c index 9b3aa5e..dec87b3 100644 --- a/libtvm/tvm_lexer.c +++ b/libtvm/tvm_lexer.c @@ -26,7 +26,7 @@ void lexer_destroy(tvm_lexer_t *lexer) free(lexer); } -void lex(tvm_lexer_t *lexer, char *source) +void lex(tvm_lexer_t *lexer, char *source, tvm_tree_t **node) { int i, j; char *pToken, *pLine = strtok(source, "\n"); @@ -60,8 +60,12 @@ void lex(tvm_lexer_t *lexer, char *source) for(j = 0; (pToken && j < MAX_TOKENS); j++) { - lexer->tokens[i][j] = (char *)calloc(1, (strlen(pToken) + 1)); - strcpy(lexer->tokens[i][j], pToken); + /* Check if this token is a define. */ + char *token = (char *)tvm_tree_find(*node, pToken); + if(!token) token = pToken; + + lexer->tokens[i][j] = (char *)calloc(1, (strlen(token) + 1)); + strcpy(lexer->tokens[i][j], token); pToken = strtok(NULL, " \t,"); } diff --git a/libtvm/tvm_program.c b/libtvm/tvm_program.c index b9f8448..42642ac 100644 --- a/libtvm/tvm_program.c +++ b/libtvm/tvm_program.c @@ -58,7 +58,7 @@ pi_interpret: while(tvm_preprocess(source, &source_length, &p->defines)); tvm_lexer_t *lexer_ctx = lexer_create(); - lex(lexer_ctx, source); + lex(lexer_ctx, source, &p->defines); free(source); if(parse_labels(p, (const char ***)lexer_ctx->tokens) != 0) return 1; diff --git a/programs/tinyvm/preprocessor/define.vm b/programs/tinyvm/preprocessor/define.vm new file mode 100644 index 0000000..2ade114 --- /dev/null +++ b/programs/tinyvm/preprocessor/define.vm @@ -0,0 +1,20 @@ +%define ONE 1 +%define ZERO 0 + +start: + mov eax, ONE + mov ebx, ZERO + +loop: add eax, ebx + add ebx, eax + + prn eax + prn ebx + + cmp eax, ZERO + jl end + + cmp ebx, ZERO + jg loop + +end: From d152751cf8fcec76bcfa1566c68b1fdf94e237cc Mon Sep 17 00:00:00 2001 From: Payton Turnage Date: Sun, 12 Jan 2014 21:49:17 -0500 Subject: [PATCH 06/15] Free the defines memory earlier. There is no need to keep this memory for the life of the program. --- libtvm/tvm_lexer.c | 2 ++ libtvm/tvm_program.c | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/libtvm/tvm_lexer.c b/libtvm/tvm_lexer.c index dec87b3..a5e5e70 100644 --- a/libtvm/tvm_lexer.c +++ b/libtvm/tvm_lexer.c @@ -72,4 +72,6 @@ void lex(tvm_lexer_t *lexer, char *source, tvm_tree_t **node) } lexer->tokens[i] = NULL; + tvm_tree_destroy(*node); + *node = NULL; } diff --git a/libtvm/tvm_program.c b/libtvm/tvm_program.c index 42642ac..7003591 100644 --- a/libtvm/tvm_program.c +++ b/libtvm/tvm_program.c @@ -15,7 +15,6 @@ tvm_program_t *program_create() void program_destroy(tvm_program_t *p) { htab_destroy(p->label_htab); - tvm_tree_destroy(p->defines); if(p->values) { From 27cde3f429436336a74d90aedbc788b5537ed2cd Mon Sep 17 00:00:00 2001 From: Payton Turnage Date: Sun, 12 Jan 2014 21:53:03 -0500 Subject: [PATCH 07/15] Warn on redefining. --- libtvm/tvm_preprocessor.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/libtvm/tvm_preprocessor.c b/libtvm/tvm_preprocessor.c index ed57097..347da51 100644 --- a/libtvm/tvm_preprocessor.c +++ b/libtvm/tvm_preprocessor.c @@ -85,10 +85,17 @@ int tvm_preprocess(char *src, int *src_len, tvm_tree_t **node) return 0; } + int err = 0; if(!*node) *node = tvm_tree_create(*node, keystr, valstr, strlen(valstr)); else - tvm_tree_add(*node, keystr, valstr, strlen(valstr)); + err = tvm_tree_add(*node, keystr, valstr, strlen(valstr)); + + if(err == 2) + { + printf("Multiple definitions for %s.\n", keystr); + return 0; + } /* Remove the define line so it is not processed again. */ size_t new_length = *src_len - (end - begin); From b208237960ed4c2cf8082136de3623d14254a40f Mon Sep 17 00:00:00 2001 From: Payton Turnage Date: Sun, 12 Jan 2014 21:56:54 -0500 Subject: [PATCH 08/15] Allow the preprocessor to abort. If the preprocessor encounters a problem, such as failing to load a source file or an improperly formatted define, it should be able to abort running the program. --- libtvm/tvm_preprocessor.c | 8 ++++---- libtvm/tvm_program.c | 7 ++++++- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/libtvm/tvm_preprocessor.c b/libtvm/tvm_preprocessor.c index 347da51..82286ee 100644 --- a/libtvm/tvm_preprocessor.c +++ b/libtvm/tvm_preprocessor.c @@ -22,7 +22,7 @@ int tvm_preprocess(char *src, int *src_len, tvm_tree_t **node) if(!pFile) { printf("Unable to open file \"%s\"\n", filename); - return 0; + return -1; } free(temp_str); @@ -60,7 +60,7 @@ int tvm_preprocess(char *src, int *src_len, tvm_tree_t **node) if(begin + offset >= end) { printf("Define missing arguments.\n"); - return 0; + return -1; } int length = (end - (begin + offset)); @@ -82,7 +82,7 @@ int tvm_preprocess(char *src, int *src_len, tvm_tree_t **node) if(!keystr || !valstr) { printf("Define missing arguments.\n"); - return 0; + return -1; } int err = 0; @@ -94,7 +94,7 @@ int tvm_preprocess(char *src, int *src_len, tvm_tree_t **node) if(err == 2) { printf("Multiple definitions for %s.\n", keystr); - return 0; + return -1; } /* Remove the define line so it is not processed again. */ diff --git a/libtvm/tvm_program.c b/libtvm/tvm_program.c index 7003591..ca74421 100644 --- a/libtvm/tvm_program.c +++ b/libtvm/tvm_program.c @@ -54,7 +54,12 @@ pi_interpret: tvm_fcopy(source, source_length, pFile); fclose(pFile); - while(tvm_preprocess(source, &source_length, &p->defines)); + int err = 0; + while((err = tvm_preprocess(source, &source_length, &p->defines)) > 0); + + /* The preprocessor encountered a problem. */ + if (err < 0) + return 1; tvm_lexer_t *lexer_ctx = lexer_create(); lex(lexer_ctx, source, &p->defines); From ed12ce05dba6456d428dfd5b6d864fca3622079a Mon Sep 17 00:00:00 2001 From: Payton Turnage Date: Mon, 13 Jan 2014 09:45:27 -0500 Subject: [PATCH 09/15] Remove all traces of the tvm_tree structure. It is to be replaced with a htab hash table. --- include/tvm/tvm_lexer.h | 4 +- include/tvm/tvm_preprocessor.h | 4 +- include/tvm/tvm_program.h | 3 - include/tvm/tvm_tree.h | 17 ------ libtvm/tvm_lexer.c | 8 +-- libtvm/tvm_preprocessor.c | 14 +---- libtvm/tvm_program.c | 4 +- libtvm/tvm_tree.c | 101 --------------------------------- 8 files changed, 7 insertions(+), 148 deletions(-) delete mode 100644 include/tvm/tvm_tree.h delete mode 100644 libtvm/tvm_tree.c diff --git a/include/tvm/tvm_lexer.h b/include/tvm/tvm_lexer.h index d10cfbc..6b28a6b 100644 --- a/include/tvm/tvm_lexer.h +++ b/include/tvm/tvm_lexer.h @@ -4,8 +4,6 @@ #define MAX_ARGS 2 #define MAX_TOKENS 4 -#include "tvm_tree.h" - typedef struct tvm_lexer_s { char **source_lines; @@ -16,6 +14,6 @@ tvm_lexer_t *lexer_create(); void lexer_destroy(tvm_lexer_t *l); /* Tokenize the character array "source" into lines and tokens */ -void lex(tvm_lexer_t *lexer, char *source, tvm_tree_t **node); +void lex(tvm_lexer_t *lexer, char *source); #endif diff --git a/include/tvm/tvm_preprocessor.h b/include/tvm/tvm_preprocessor.h index 2b5c41a..e999950 100644 --- a/include/tvm/tvm_preprocessor.h +++ b/include/tvm/tvm_preprocessor.h @@ -1,8 +1,6 @@ #ifndef TVM_PREPROCESSOR_H_ #define TVM_PREPROCESSOR_H_ -#include "tvm_tree.h" - -int tvm_preprocess(char *src, int *src_len, tvm_tree_t **node); +int tvm_preprocess(char *src, int *src_len); #endif diff --git a/include/tvm/tvm_program.h b/include/tvm/tvm_program.h index 3201e51..e5722f1 100644 --- a/include/tvm/tvm_program.h +++ b/include/tvm/tvm_program.h @@ -7,7 +7,6 @@ #include "tvm_htab.h" #include "tvm_memory.h" -#include "tvm_tree.h" typedef struct tvm_program_s { @@ -21,8 +20,6 @@ typedef struct tvm_program_s int **values; int num_values; - tvm_tree_t *defines; - tvm_htab_t *label_htab; } tvm_program_t; diff --git a/include/tvm/tvm_tree.h b/include/tvm/tvm_tree.h deleted file mode 100644 index d450893..0000000 --- a/include/tvm/tvm_tree.h +++ /dev/null @@ -1,17 +0,0 @@ -#ifndef TVM_TREE_H_ -#define TVM_TREE_H_ - -typedef struct tvm_tree_t -{ - char *keystr; - char *val; - struct tvm_tree_t *left; - struct tvm_tree_t *right; -} tvm_tree_t; - -tvm_tree_t *tvm_tree_create(tvm_tree_t *node, const char *str, const void *val, int len); -int tvm_tree_add(tvm_tree_t *node, const char *str, const void *val, int len); -void *tvm_tree_find(tvm_tree_t *node, const char *str); -void tvm_tree_destroy(tvm_tree_t *node); - -#endif \ No newline at end of file diff --git a/libtvm/tvm_lexer.c b/libtvm/tvm_lexer.c index a5e5e70..0db1d1d 100644 --- a/libtvm/tvm_lexer.c +++ b/libtvm/tvm_lexer.c @@ -26,7 +26,7 @@ void lexer_destroy(tvm_lexer_t *lexer) free(lexer); } -void lex(tvm_lexer_t *lexer, char *source, tvm_tree_t **node) +void lex(tvm_lexer_t *lexer, char *source) { int i, j; char *pToken, *pLine = strtok(source, "\n"); @@ -60,9 +60,7 @@ void lex(tvm_lexer_t *lexer, char *source, tvm_tree_t **node) for(j = 0; (pToken && j < MAX_TOKENS); j++) { - /* Check if this token is a define. */ - char *token = (char *)tvm_tree_find(*node, pToken); - if(!token) token = pToken; + char *token = pToken; lexer->tokens[i][j] = (char *)calloc(1, (strlen(token) + 1)); strcpy(lexer->tokens[i][j], token); @@ -72,6 +70,4 @@ void lex(tvm_lexer_t *lexer, char *source, tvm_tree_t **node) } lexer->tokens[i] = NULL; - tvm_tree_destroy(*node); - *node = NULL; } diff --git a/libtvm/tvm_preprocessor.c b/libtvm/tvm_preprocessor.c index 82286ee..1fce20a 100644 --- a/libtvm/tvm_preprocessor.c +++ b/libtvm/tvm_preprocessor.c @@ -3,7 +3,7 @@ #include -int tvm_preprocess(char *src, int *src_len, tvm_tree_t **node) +int tvm_preprocess(char *src, int *src_len) { char* pp_directive_delimiter = NULL; if((pp_directive_delimiter = strstr(src, "%include"))) @@ -85,18 +85,6 @@ int tvm_preprocess(char *src, int *src_len, tvm_tree_t **node) return -1; } - int err = 0; - if(!*node) - *node = tvm_tree_create(*node, keystr, valstr, strlen(valstr)); - else - err = tvm_tree_add(*node, keystr, valstr, strlen(valstr)); - - if(err == 2) - { - printf("Multiple definitions for %s.\n", keystr); - return -1; - } - /* Remove the define line so it is not processed again. */ size_t new_length = *src_len - (end - begin); size_t first_block_len = begin - src; diff --git a/libtvm/tvm_program.c b/libtvm/tvm_program.c index ca74421..c31ab7a 100644 --- a/libtvm/tvm_program.c +++ b/libtvm/tvm_program.c @@ -55,14 +55,14 @@ pi_interpret: fclose(pFile); int err = 0; - while((err = tvm_preprocess(source, &source_length, &p->defines)) > 0); + while((err = tvm_preprocess(source, &source_length)) > 0); /* The preprocessor encountered a problem. */ if (err < 0) return 1; tvm_lexer_t *lexer_ctx = lexer_create(); - lex(lexer_ctx, source, &p->defines); + lex(lexer_ctx, source); free(source); if(parse_labels(p, (const char ***)lexer_ctx->tokens) != 0) return 1; diff --git a/libtvm/tvm_tree.c b/libtvm/tvm_tree.c deleted file mode 100644 index 77cb96f..0000000 --- a/libtvm/tvm_tree.c +++ /dev/null @@ -1,101 +0,0 @@ -#include -#include - -#include - -tvm_tree_t *tvm_tree_create(tvm_tree_t *node, const char *str, const void *val, int len) -{ - int keylen = strlen(str); - node = calloc(1, sizeof(tvm_tree_t) + len + keylen + 1); - - if(val && len) - { - node->val = (char *)node + sizeof(tvm_tree_t); - memcpy(node->val, val, len); - } - - node->keystr = (char *)node + sizeof(tvm_tree_t) + len; - memcpy(node->keystr, str, keylen); - - return node; -} - -int tvm_tree_add(tvm_tree_t *node, const char *str, const void *val, int len) -{ - if (!str || !node) - return 1; - - int done = 0, diff; - tvm_tree_t *current = node; - while(!done) - { - diff = strcmp(current->keystr, str); - if(!diff) - done = 1; - else if(diff < 0) - { - if(current->left) - { - current = current->left; - } - else - { - current->left = tvm_tree_create(NULL, str, val, len); - done = 1; - } - } - else if(diff > 0) - { - if(current->right) - { - current = current->right; - } - else - { - current->right = tvm_tree_create(NULL, str, val, len); - done = 1; - } - } - } - - /* String already exists in the tree. */ - if(!diff) - return 2; - - return 0; -} - -void *tvm_tree_find(tvm_tree_t *node, const char *str) -{ - if(!node || !str) - return 0; - - int diff; - void *value = NULL; - tvm_tree_t *current = node; - while(current && !value) - { - diff = strcmp(current->keystr, str); - if(!diff) - value = current->val; - else if(diff < 0) - current = current->left; - else if(diff > 0) - current = current->right; - } - - return value; -} - -void tvm_tree_destroy(tvm_tree_t *node) -{ - if(node) - { - if (node->right) - tvm_tree_destroy(node->right); - if (node->left) - tvm_tree_destroy(node->left); - - free(node); - } -} \ No newline at end of file From e99a0428f394219baaa3f49f1d302c6c5c00b1b8 Mon Sep 17 00:00:00 2001 From: Payton Turnage Date: Mon, 13 Jan 2014 10:20:21 -0500 Subject: [PATCH 10/15] Implement defines using the htab structure. The htab_structure has been modified in two ways, which should not break compatability with any of its uses. It includes a void pointer, which is in this commit used to point to a string for defines, and will in the future be used to point to the address space for words, bytes, and double words. It now includes a function htab_add_str specifically for storing strings. It calls htab_add so as not to be redundant, but makes the node's value their index for the lexer to fetch using htab_find, and assigns their void pointer. The lexer will now use htab_find on all tokens to see if they are a define string, and if so, substitute them with the appropriate token. The defines htab is destroyed after lexing, because that memory is done. --- include/tvm/tvm_htab.h | 2 ++ include/tvm/tvm_lexer.h | 4 +++- include/tvm/tvm_preprocessor.h | 4 +++- include/tvm/tvm_program.h | 2 ++ libtvm/tvm_htab.c | 15 ++++++++++++++- libtvm/tvm_lexer.c | 9 ++++++++- libtvm/tvm_preprocessor.c | 10 +++++++++- libtvm/tvm_program.c | 5 +++-- 8 files changed, 44 insertions(+), 7 deletions(-) diff --git a/include/tvm/tvm_htab.h b/include/tvm/tvm_htab.h index 0adc68e..d0aadaf 100644 --- a/include/tvm/tvm_htab.h +++ b/include/tvm/tvm_htab.h @@ -8,6 +8,7 @@ typedef struct tvm_htab_node_s { char *key; int value; + void *valptr; struct tvm_htab_node_s *next; } tvm_htab_node_t; @@ -22,6 +23,7 @@ tvm_htab_t* htab_create(); void htab_destroy(tvm_htab_t *htab); int htab_add(tvm_htab_t *htab, const char *key, int value); +int htab_add_str(tvm_htab_t *htab, const char *key, const void *valptr, int len); int htab_find(tvm_htab_t *htab, const char *key); #endif diff --git a/include/tvm/tvm_lexer.h b/include/tvm/tvm_lexer.h index 6b28a6b..6724b5a 100644 --- a/include/tvm/tvm_lexer.h +++ b/include/tvm/tvm_lexer.h @@ -4,6 +4,8 @@ #define MAX_ARGS 2 #define MAX_TOKENS 4 +#include "tvm_htab.h" + typedef struct tvm_lexer_s { char **source_lines; @@ -14,6 +16,6 @@ tvm_lexer_t *lexer_create(); void lexer_destroy(tvm_lexer_t *l); /* Tokenize the character array "source" into lines and tokens */ -void lex(tvm_lexer_t *lexer, char *source); +void lex(tvm_lexer_t *lexer, char *source, tvm_htab_t *defines); #endif diff --git a/include/tvm/tvm_preprocessor.h b/include/tvm/tvm_preprocessor.h index e999950..9832c62 100644 --- a/include/tvm/tvm_preprocessor.h +++ b/include/tvm/tvm_preprocessor.h @@ -1,6 +1,8 @@ #ifndef TVM_PREPROCESSOR_H_ #define TVM_PREPROCESSOR_H_ -int tvm_preprocess(char *src, int *src_len); +#include "tvm_htab.h" + +int tvm_preprocess(char *src, int *src_len, tvm_htab_t *defines); #endif diff --git a/include/tvm/tvm_program.h b/include/tvm/tvm_program.h index e5722f1..88a85c4 100644 --- a/include/tvm/tvm_program.h +++ b/include/tvm/tvm_program.h @@ -20,6 +20,8 @@ typedef struct tvm_program_s int **values; int num_values; + tvm_htab_t *defines; + tvm_htab_t *label_htab; } tvm_program_t; diff --git a/libtvm/tvm_htab.c b/libtvm/tvm_htab.c index 84f813b..73dce2a 100644 --- a/libtvm/tvm_htab.c +++ b/libtvm/tvm_htab.c @@ -25,6 +25,8 @@ void htab_destroy(tvm_htab_t *htab) while(node) { next = node->next; + if(node->valptr) + free(node->valptr); free(node->key); free(node); node = next; @@ -66,6 +68,8 @@ static void htab_rehash(tvm_htab_t *orig, unsigned int size) { next = node->next; htab_add(new, node->key, node->value); + if(node->valptr) + free(node->valptr); free(node->key); free(node); node = next; @@ -113,7 +117,16 @@ int htab_add(tvm_htab_t *htab, const char *k, int v) if((float)++htab->num_nodes / htab->size > HTAB_LOAD_FACTOR) htab_rehash(htab, htab->num_nodes * 2); - return 0; + return hash; +} + +int htab_add_str(tvm_htab_t *htab, const char *key, const void *valptr, int len) +{ + int hash = htab_add(htab, key, 0); + htab->nodes[hash]->value = hash; + htab->nodes[hash]->valptr = calloc(len, sizeof(char)); + memcpy(htab->nodes[hash]->valptr, valptr, len); + return hash; } int htab_find(tvm_htab_t *htab, const char *key) diff --git a/libtvm/tvm_lexer.c b/libtvm/tvm_lexer.c index 0db1d1d..5c4272a 100644 --- a/libtvm/tvm_lexer.c +++ b/libtvm/tvm_lexer.c @@ -26,7 +26,7 @@ void lexer_destroy(tvm_lexer_t *lexer) free(lexer); } -void lex(tvm_lexer_t *lexer, char *source) +void lex(tvm_lexer_t *lexer, char *source, tvm_htab_t *defines) { int i, j; char *pToken, *pLine = strtok(source, "\n"); @@ -60,8 +60,14 @@ void lex(tvm_lexer_t *lexer, char *source) for(j = 0; (pToken && j < MAX_TOKENS); j++) { + int hash; char *token = pToken; + if((hash = htab_find(defines, pToken)) >= 0) + { + token = (char *)defines->nodes[hash]->valptr; + } + lexer->tokens[i][j] = (char *)calloc(1, (strlen(token) + 1)); strcpy(lexer->tokens[i][j], token); @@ -70,4 +76,5 @@ void lex(tvm_lexer_t *lexer, char *source) } lexer->tokens[i] = NULL; + htab_destroy(defines); } diff --git a/libtvm/tvm_preprocessor.c b/libtvm/tvm_preprocessor.c index 1fce20a..09d58be 100644 --- a/libtvm/tvm_preprocessor.c +++ b/libtvm/tvm_preprocessor.c @@ -3,7 +3,7 @@ #include -int tvm_preprocess(char *src, int *src_len) +int tvm_preprocess(char *src, int *src_len, tvm_htab_t *defines) { char* pp_directive_delimiter = NULL; if((pp_directive_delimiter = strstr(src, "%include"))) @@ -85,6 +85,14 @@ int tvm_preprocess(char *src, int *src_len) return -1; } + if(htab_find(defines, keystr) < 0) + htab_add_str(defines, keystr, valstr, strlen(valstr) + 1); + else + { + printf("Multiple definitions for %s.\n", keystr); + return -1; + } + /* Remove the define line so it is not processed again. */ size_t new_length = *src_len - (end - begin); size_t first_block_len = begin - src; diff --git a/libtvm/tvm_program.c b/libtvm/tvm_program.c index c31ab7a..ed563be 100644 --- a/libtvm/tvm_program.c +++ b/libtvm/tvm_program.c @@ -8,6 +8,7 @@ tvm_program_t *program_create() { tvm_program_t *p = (tvm_program_t *)calloc(1, sizeof(tvm_program_t)); p->label_htab = htab_create(); + p->defines = htab_create(); return p; } @@ -55,14 +56,14 @@ pi_interpret: fclose(pFile); int err = 0; - while((err = tvm_preprocess(source, &source_length)) > 0); + while((err = tvm_preprocess(source, &source_length, p->defines)) > 0); /* The preprocessor encountered a problem. */ if (err < 0) return 1; tvm_lexer_t *lexer_ctx = lexer_create(); - lex(lexer_ctx, source); + lex(lexer_ctx, source, p->defines); free(source); if(parse_labels(p, (const char ***)lexer_ctx->tokens) != 0) return 1; From 7bea39a6b526ab37403674a1396427ddb85df8f7 Mon Sep 17 00:00:00 2001 From: Payton Turnage Date: Mon, 13 Jan 2014 11:07:16 -0500 Subject: [PATCH 11/15] Add preprocessor directives to the SYNTAX file. The preprocessor section of the SYNTAX file has been rewritten to match the other sections in style, and include both %include and %define directives. #27 --- SYNTAX | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/SYNTAX b/SYNTAX index e0d3299..5887d71 100644 --- a/SYNTAX +++ b/SYNTAX @@ -78,9 +78,17 @@ Labels must be specified at the beginning of a line or on their own line. // 4. PREPROCESSOR /////////////////////////////// ////////////////////////////////////////////////// -TVM has a preprocessor that allows other source files to be included with the directive "%include filename.vm" +TVM's preprocessor works similarly to many C compilers and uses the prefix "%". -During preprocessing, the entire included source file is inserted in place of the %include directive in memory. + // I. Include // + + %include filename + Pastes all of the contents of filename into the source code before interpretting it. + + // II. Define // + + %define identifier value + Define a constant so that all instances of the string "identifier" will be replaced by "value". ////////////////////////////////////////////////// // 5. INSTRUCTION LISTING //////////////////////// From aa57b007c7eac82f63f7162ad6844d140e420e2c Mon Sep 17 00:00:00 2001 From: Payton Turnage Date: Mon, 13 Jan 2014 11:16:00 -0500 Subject: [PATCH 12/15] Make sure strings are moved into the rehash. --- libtvm/tvm_htab.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/libtvm/tvm_htab.c b/libtvm/tvm_htab.c index 73dce2a..d083119 100644 --- a/libtvm/tvm_htab.c +++ b/libtvm/tvm_htab.c @@ -67,9 +67,13 @@ static void htab_rehash(tvm_htab_t *orig, unsigned int size) while(node) { next = node->next; - htab_add(new, node->key, node->value); - if(node->valptr) + if (node->valptr) + { + htab_add_str(new, node->key, node->valptr, strlen(node->valptr) + 1); free(node->valptr); + } + else + htab_add(new, node->key, node->value); free(node->key); free(node); node = next; From d945c9a3f6afd7280e146a5d7eb134aba98043ef Mon Sep 17 00:00:00 2001 From: Payton Turnage Date: Mon, 13 Jan 2014 11:31:58 -0500 Subject: [PATCH 13/15] Add a new function to return strings from htab. The htab_find_str function eliminates the assumption that the string is in the first place in the bucket. --- include/tvm/tvm_htab.h | 1 + libtvm/tvm_htab.c | 14 ++++++++++++++ libtvm/tvm_lexer.c | 9 ++------- 3 files changed, 17 insertions(+), 7 deletions(-) diff --git a/include/tvm/tvm_htab.h b/include/tvm/tvm_htab.h index d0aadaf..7838565 100644 --- a/include/tvm/tvm_htab.h +++ b/include/tvm/tvm_htab.h @@ -25,5 +25,6 @@ void htab_destroy(tvm_htab_t *htab); int htab_add(tvm_htab_t *htab, const char *key, int value); int htab_add_str(tvm_htab_t *htab, const char *key, const void *valptr, int len); int htab_find(tvm_htab_t *htab, const char *key); +char *htab_find_str(tvm_htab_t *htab, const char *key); #endif diff --git a/libtvm/tvm_htab.c b/libtvm/tvm_htab.c index d083119..e1bdb53 100644 --- a/libtvm/tvm_htab.c +++ b/libtvm/tvm_htab.c @@ -148,3 +148,17 @@ int htab_find(tvm_htab_t *htab, const char *key) return -1; } +char *htab_find_str(tvm_htab_t *htab, const char *key) +{ + int hash = htab_hash(key, htab->size); + tvm_htab_node_t *node = htab->nodes[hash]; + + while(node) + { + if(!strcmp(node->key, key)) + return node->valptr; + node = node->next; + } + + return NULL; +} \ No newline at end of file diff --git a/libtvm/tvm_lexer.c b/libtvm/tvm_lexer.c index 5c4272a..b03d49e 100644 --- a/libtvm/tvm_lexer.c +++ b/libtvm/tvm_lexer.c @@ -60,13 +60,8 @@ void lex(tvm_lexer_t *lexer, char *source, tvm_htab_t *defines) for(j = 0; (pToken && j < MAX_TOKENS); j++) { - int hash; - char *token = pToken; - - if((hash = htab_find(defines, pToken)) >= 0) - { - token = (char *)defines->nodes[hash]->valptr; - } + char *token = htab_find_str(defines, pToken); + token = token ? token : pToken; lexer->tokens[i][j] = (char *)calloc(1, (strlen(token) + 1)); strcpy(lexer->tokens[i][j], token); From e6d9d92d8cdceeac94b7fbe7d09140a15eea718e Mon Sep 17 00:00:00 2001 From: Payton Turnage Date: Mon, 13 Jan 2014 13:50:56 -0500 Subject: [PATCH 14/15] Do not write a value for strings in htab. The assignment is unnecessary, and was overwriting the first element in the bucket (not the added string's value) in the case of a collision. --- libtvm/tvm_htab.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/libtvm/tvm_htab.c b/libtvm/tvm_htab.c index e1bdb53..470e41f 100644 --- a/libtvm/tvm_htab.c +++ b/libtvm/tvm_htab.c @@ -127,7 +127,6 @@ int htab_add(tvm_htab_t *htab, const char *k, int v) int htab_add_str(tvm_htab_t *htab, const char *key, const void *valptr, int len) { int hash = htab_add(htab, key, 0); - htab->nodes[hash]->value = hash; htab->nodes[hash]->valptr = calloc(len, sizeof(char)); memcpy(htab->nodes[hash]->valptr, valptr, len); return hash; @@ -161,4 +160,4 @@ char *htab_find_str(tvm_htab_t *htab, const char *key) } return NULL; -} \ No newline at end of file +} From 27a090e704eca2b8cbcf477bb589974718cbd703 Mon Sep 17 00:00:00 2001 From: Payton Turnage Date: Tue, 14 Jan 2014 00:51:42 -0500 Subject: [PATCH 15/15] Remove assumption that node is in the bottom of the bucket. This is the last remaining instance of this assumption. --- libtvm/tvm_htab.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/libtvm/tvm_htab.c b/libtvm/tvm_htab.c index 470e41f..635d3c3 100644 --- a/libtvm/tvm_htab.c +++ b/libtvm/tvm_htab.c @@ -127,8 +127,19 @@ int htab_add(tvm_htab_t *htab, const char *k, int v) int htab_add_str(tvm_htab_t *htab, const char *key, const void *valptr, int len) { int hash = htab_add(htab, key, 0); - htab->nodes[hash]->valptr = calloc(len, sizeof(char)); - memcpy(htab->nodes[hash]->valptr, valptr, len); + int found = 0; + tvm_htab_node_t *node = htab->nodes[hash]; + + while (node && !found) + { + if (!strcmp(node->key, key)) + found = 1; + else + node = node->next; + } + + node->valptr = calloc(len, sizeof(char)); + memcpy(node->valptr, valptr, len); return hash; }