MBasic Interpreter

To find my C legs again I decided to program a  small interpreter for a BASIC type language in C, as the first programming language I ever learned (after Scratch) was BASIC.

 Realising the largest challenge was the expression parser I started there. I use a custom tokenising/parsing module to tokenise expressions, then use Edsger Dijkstra's "shunting yard algorithm" to convert the tokens to an intermediate form of RPN, then use a simple RPN solver to evaluate the expression.

MBasic ScreenshotAt the moment MBasic is just a REPL environment, although I am currently working on control flow statements for a more structured language.

"MBasic" supplies the usual operators, +, -, *, /, ^, % (for integer and floats), and all of those operators as compound assignment operators too (+=, -=...).

As you can see from the examples to the right, the three "primitive" data types are integer, floating point, and string. General promotion rules apply, where 10 / 3 is 3 (integer division), but 10 / 3.0 causes the 10 to be promoted to a float, causing the expression to evaluate to 3.33333 (although only printed out to five d.p. the internal storage of floats is C "float").

As my normal language of choice is C++ obviously I am a practicioner of RAII and object oriented programming- and I tried to replicate this in my C code, where almost every function "method" is prepended with the "class" type (e.g. "expression_evaluate_expression") and with the first parameter a pointer to a struct "object" of that class, and "objects" are created using class_create() and then destroyed using class_destroy(object* obj).

I used Visual Studio's _CRTDBG memory leak detector and as far as I'm aware the program has no direct memory leakage- which I would guess is due to the strict create/destroy pattern, which mimics constructors/destructors, see llist_create (and llist_destroy which is off the page). I made heavy use of my existing libraries LList and DArray.

Snippet from "program" method "evaluate_rpn_tokens"

error program_evaluate_rpn_tokens(program* prog, const llist* tokens, value_ptr* returned_value)
{
	/* stack is a llist of token*s, ALL OF WHICH are allocated in this function */
	llist stack = llist_create();
	llist_node* current_node = tokens->first;
	error err = error_make_error(0, ERROR_OK);

	while (current_node)
	{
		const token* current_token = (const token*)current_node->value;

		if (current_token->type == TOKEN_TYPE_INT_LITERAL ||
			current_token->type == TOKEN_TYPE_FLOAT_LITERAL ||
			current_token->type == TOKEN_TYPE_STRING_LITERAL ||
			current_token->type == TOKEN_TYPE_VARIABLE
			)
		{
			token* new_token = malloc(sizeof(token));
			*new_token = token_dup(current_token);

			llist_push_back(&stack, (llist_data)new_token);
		}
		else
		{
			unsigned int required_arg_count = 0;

			/* token is an operator or function */
			if (current_token->type == TOKEN_TYPE_FUNCTION)
			{
				function* found_function = program_symbols_find_function(&prog->symbols, current_token->word_value);

				required_arg_count = found_function->arg_count;
				if (llist_len(&stack) < required_arg_count)
				{
					err = error_make_error(0, ERROR_WRONG_NUMBER_OF_ARGUMENTS_FOR_FUNCTION);
					break;
				}
			}
			else
			{
				/* operator */
				required_arg_count = token_get_operator_arg_count(current_token);
				if ((int)llist_len(&stack) < required_arg_count)
				{
					err = error_make_error(0, ERROR_WRONG_NUMBER_OF_ARGUMENTS_FOR_OPERATOR);
					break;
				}
			}


			/* Pop the needed arguments off the stack, and evaluate the operator. */
			llist args = llist_create();
			for (unsigned int i = 0; i < required_arg_count; ++i)
			{
				token* popped_arg = (token*)llist_pop_back(&stack);
				llist_push_front(&args, (llist_data)popped_arg);
			}

			value_ptr operation_return = 0;

			/* Evaluate the function call, or the simple operation. */
			if (current_token->type == TOKEN_TYPE_FUNCTION)
			{
				function* found_function = program_symbols_find_function(&prog->symbols, current_token->word_value);
				err = program_evaluate_function(prog, found_function, &args, &operation_return);
			}
			else
			{
				err = program_evaluate_simple_operation(current_token, &args, &operation_return);
			}

			/* Now free the arguments we popped off the stack. */
			llist_node* current_arg_node = args.first;
			while (current_arg_node)
			{
				token* current_arg_node_token = (token*)current_arg_node->value;
				token_destroy(current_arg_node_token);
				free(current_arg_node->value);

				current_arg_node = current_arg_node->next;
			}

			llist_destroy(&args);

			/* Push the returned results, if any, back onto the stack. */
			if (operation_return != 0)
			{
				/* No need to duplicate the token here- new_value_token is created
				above so we can add it to the stack and then it will be deleted
				when the stack is destroyed. */
				token* new_value_token = malloc(sizeof(token));
				*new_value_token = token_create_rvalue(operation_return);

				llist_push_back(&stack, (llist_data)new_value_token);
			}
		}

		current_node = current_node->next;
	}

	if (llist_len(&stack) == 1 && err.type == ERROR_OK)
	{
		/* If there is only one value in the stack, that is the result
		of the calculation. */
		const token* last_token = ((token*)llist_at(&stack, 0)->value);

		/* Create a value from the token. */
		if (last_token->type == TOKEN_TYPE_INT_LITERAL)
		{
			*returned_value = value_make_int_value(last_token->int_literal_value);
		}
		else if (last_token->type == TOKEN_TYPE_FLOAT_LITERAL)
		{
			*returned_value = value_make_float_value(last_token->float_literal_value);
		}
		else if (last_token->type == TOKEN_TYPE_STRING_LITERAL)
		{
			*returned_value = value_make_string_value(last_token->string_literal_value);
		}
		else if (last_token->type == TOKEN_TYPE_VARIABLE)
		{
			*returned_value = last_token->variable_ptr->val;
		}
		else if (last_token->type == TOKEN_TYPE_RVALUE)
		{
			*returned_value = last_token->rvalue;
		}
		else
		{
			*returned_value = 0;
		}
	}
	else
	{
		/* The user input has too many values. */
		err = error_make_error(0, err.type == ERROR_OK ? ERROR_SYNTAX_ERROR : err.type);
	}

	/* Free all of the allocated tokens in "stack". */
	current_node = stack.first;
	while (current_node)
	{
		token* token_to_destroy = (token*)current_node->value;
		token_destroy(token_to_destroy);
		free(current_node->value);

		current_node = current_node->next;
	}
	llist_destroy(&stack);

	return err;
}