tinyvm/libtvm/tvm_parser.c
Jeff Epler 7cec815530 Don't allocate args when instruction parsing fails
.. it looks like (opcode == -1) should maybe lead to an error return
rather than a continue, but in any case we need to avoid leaking
'args' in this situation.

.. the previous arrangement lead to compiler diagnostics when
building with 'scan-build make', such as:

libtvm/tvm_parser.c:186:39: warning: Potential leak of memory pointed to by 'args'
        for (line_idx = 0; tokens[line_idx]; line_idx++) {
                                             ^~~~~~~~
1 warning generated.
2018-03-17 14:02:50 -05:00

286 lines
5.9 KiB
C

#include <tvm/tvm_parser.h>
#include <tvm/tvm_file.h>
#include <tvm/tvm_lexer.h>
#include <tvm/tvm_tokens.h>
const char *tvm_opcode_map[] = {
"nop", "int", "mov",
"push", "pop", "pushf", "popf",
"inc", "dec", "add", "sub", "mul", "div", "mod", "rem",
"not", "xor", "or", "and", "shl", "shr",
"cmp", "jmp", "call", "ret",
"je", "jne", "jg", "jge", "jl", "jle",
"prn", 0
};
const char *tvm_register_map[] = {
"eax", "ebx", "ecx", "edx",
"esi", "edi", "esp", "ebp",
"eip",
"r08", "r09", "r10", "r11",
"r12", "r13", "r14", "r15", 0};
static int *token_to_register(const char *token, struct tvm_mem *mem);
static int instr_to_opcode(const char *instr);
int tvm_parse_labels(struct tvm_ctx *vm, const char ***tokens)
{
int num_instr = 0;
struct tvm_prog *p = vm->prog;
for (int i = 0; tokens[i]; i++) {
int valid_instruction = 0;
for (int token_idx = 0; token_idx < MAX_TOKENS; token_idx++) {
/* If the token is empty, or non-existent, skip it */
if (!tokens[i][token_idx])
continue;
/* Check the source line for a valid instruction */
if (instr_to_opcode(tokens[i][token_idx]) != -1)
valid_instruction = 1;
/* Check for a label delimiter*/
char *label_delimiter = strchr(
tokens[i][token_idx], ':');
if (label_delimiter == NULL)
continue;
*label_delimiter = 0;
/* If the label is "start," make it the entry point */
if (strcmp(tokens[i][token_idx], "start") == 0)
p->start = num_instr;
/* Check if the label already exists */
int label_addr = tvm_htab_find(
p->label_htab, tokens[i][token_idx]);
if (label_addr != -1) {
printf("Label '%s' defined twice\n", tokens[i][token_idx]);
return 1;
}
tvm_htab_add(
p->label_htab,
tokens[i][token_idx],
num_instr
);
}
if (valid_instruction)
num_instr++;
}
return 0;
}
/* This function takes the instruction tokens, and location of the
* instruction inside the line, parses the arguments, and returns a pointer
* to the heap where they're stored.
*/
static int **tvm_parse_args(
struct tvm_ctx *vm, const char **instr_tokens, int *instr_place)
{
int **args = calloc(sizeof(int *), MAX_ARGS);
for (int i = 0; i < MAX_ARGS; ++i) {
if (!instr_tokens[*instr_place+1 + i]
|| !strlen(instr_tokens[*instr_place+1 + i]))
continue;
char *newline = strchr(instr_tokens[*instr_place+1 + i], '\n');
if (newline)
*newline = 0;
/* Check to see if the token specifies a register */
int *regp = token_to_register(
instr_tokens[*instr_place+1 + i], vm->mem);
if (regp) {
args[i] = regp;
continue;
}
/* Check to see whether the token specifies an address */
if (instr_tokens[*instr_place+1 + i][0] == '[') {
char *end_symbol = strchr(
instr_tokens[*instr_place+1 + i], ']');
if (end_symbol) {
*end_symbol = 0;
args[i] = &((int *)vm->mem->mem_space)[
tvm_parse_value(instr_tokens[
*instr_place+1 + i] + 1)];
continue;
}
}
/* Check if the argument is a label */
int addr = tvm_htab_find(
vm->prog->label_htab, instr_tokens[*instr_place+1 + i]);
if (addr != -1) {
args[i] = tvm_add_value(vm, addr);
continue;
}
/* Fuck it, parse it as a value */
args[i] = tvm_add_value(
vm, tvm_parse_value(instr_tokens[*instr_place+1 + i]));
}
return args;
}
/* This function frees the memory allocated by tvm_parse_args().
*/
static void tvm_free_args(int **args) {
if(args) {
for (int i = 0; args[i]; i++) {
free(args[i]);
}
}
free(args);
}
/* This is a helper function that converts one instruction,
* from one line of code, to tvm bytecode
*/
static int tvm_parse_instr(
struct tvm_ctx *vm, const char **instr_tokens, int *instr_place)
{
for (int token_idx = 0; token_idx < MAX_TOKENS; token_idx++) {
/* Skip empty tokens */
if (!instr_tokens[token_idx])
continue;
int opcode = instr_to_opcode(instr_tokens[token_idx]);
if (opcode == -1)
continue;
if (instr_place)
*instr_place = token_idx;
vm->prog->num_instr++;
return opcode;
}
return -1;
}
int tvm_parse_program(
struct tvm_ctx *vm, const char ***tokens)
{
int line_idx;
for (line_idx = 0; tokens[line_idx]; line_idx++) {
int instr_place = 0;
int opcode = tvm_parse_instr(
vm, tokens[line_idx], &instr_place);
if (opcode == -1)
continue;
int **args = tvm_parse_args(
vm, tokens[line_idx], &instr_place);
if (!args)
continue;
void *newptr;
newptr = realloc(
vm->prog->instr, sizeof(int) * (vm->prog->num_instr+1));
if (newptr != NULL) {
vm->prog->instr = newptr;
vm->prog->instr[vm->prog->num_instr - 1] = opcode;
} else {
tvm_free_args(args);
return -1;
}
newptr = realloc(
vm->prog->args,
sizeof(int **) * (vm->prog->num_instr+1));
if (newptr != NULL)
vm->prog->args = (int ***)newptr;
else {
tvm_free_args(args);
return -1;
}
vm->prog->args[vm->prog->num_instr - 1] = args;
}
vm->prog->args[vm->prog->num_instr] = NULL;
vm->prog->instr[vm->prog->num_instr] = -0x1;
return 0;
}
int *token_to_register(const char *token, struct tvm_mem *mem)
{
for (int i = 0; tvm_register_map[i]; i++) {
if (strcmp(token, tvm_register_map[i]) == 0)
return &mem->registers[i].i32;
}
return NULL;
}
int instr_to_opcode(const char *instr)
{
for (int i = 0; tvm_opcode_map[i]; i++)
if (strcmp(instr, tvm_opcode_map[i]) == 0)
return i;
return -1;
}
int *tvm_add_value(struct tvm_ctx *vm, const int val)
{
struct tvm_prog *p = vm->prog;
p->values = realloc(p->values, sizeof(int *) * (p->num_values + 1));
p->values[p->num_values] = (int *)calloc(1, sizeof(int));
*p->values[p->num_values] = val;
return p->values[p->num_values++];
}
int tvm_parse_value(const char *str)
{
char *delimiter = strchr(str, '|'), base = 0;
if (delimiter) {
char *identifier = delimiter + 1;
switch (*identifier) {
case 'h':
base = 16;
break;
case 'b':
base = 2;
break;
default:
base = 0;
break;
}
}
return strtoul(str, NULL, base);
}