# include "hugin.h"
# include <stdlib.h>
# include <string.h>


/* This program generates cases from a given Bayesian belief network.

   The program expects the following arguments:
   - the name of the NET file containing the belief network;
   - the name of the output file (which will contain the generated cases);
   - the number of cases to be generated (a nonnegative integer);
   - [optional] the probability of missing data (omitting this implies
   - a zero probability which means complete data will be generated).

   This program  is useful for generating artificial data that can be
   used for testing the various learning algorithms provided by the
   Hugin software.
*/

void generate_case_file_from_net
   (h_string_t, h_string_t, h_string_t, h_string_t);

int main (int argc, char *argv[])
{
    if (argc < 4 || argc > 5)
    {
	fprintf (stderr, "Usage: %s <NET_file_name> <data_file_name>"
		 " <number_of_cases> [<p_missing_data>]\n",
		 argv[0]);
	exit (EXIT_FAILURE);
    }

    if (argc == 4)
	generate_case_file_from_net (argv[1], argv[2], argv[3], "0.0");
    else
	generate_case_file_from_net (argv[1], argv[2], argv[3], argv[4]);

    return 0;
}


/* A simple parse error handler: It prints the error message on stderr. */

void error_handler (h_location_t line_no, h_string_t message, void *data)
{
    fprintf (stderr, "Error at line %d: %s\n", line_no, message);
}

/* This function is used when a Hugin API error is detected.
   An error message is printed on stderr, and the program is exited.
*/

void print_error (void)
{
    fprintf (stderr, "Error: %s\n", h_error_description (h_error_code ()));
    exit (EXIT_FAILURE);
}

/* This function parses a floating-point number from the given string.
   If an error is detected, -1.0 is returned.
*/

double get_double_from_string (h_string_t s)
{
    h_string_t rest;
    double x = strtod (s, &rest);

    if (*s != '\0' && *rest == '\0')
	return x;

    return -1.0;
}

/* This function parses a count (an integer) from the given string.
   If an error is detected, -1 is returned.
*/

long get_count_from_string (h_string_t s)
{
    h_string_t rest;
    long x = strtol (s, &rest, 10);

    if (*s != '\0' && *rest == '\0')
	return x;

    return -1;
}


/* This function loads the belief network from the given NET file.  Then,
   it generates <count> cases using that network such that the probability
   of missing data is <p_missing_data>.  Finally, the cases are saved as a
   Hugin data file named <data_file_name>.
*/

void generate_cases (h_domain_t, size_t, h_double_t);
h_node_t *get_all_nodes (h_domain_t);

void generate_case_file_from_net
   (h_string_t net_file_name, h_string_t data_file_name,
    h_string_t count, h_string_t p_missing_data)
{
    size_t l = strlen (net_file_name);
    char *file_name_buffer;
    h_domain_t domain;
    long number_of_cases;
    double p;  /* probability of missing data */
    h_node_t *nodes;

    if ((number_of_cases = get_count_from_string (count)) < 0)
    {
	fprintf (stderr, "Invalid number of cases: %s\n", count);
	exit (EXIT_FAILURE);
    }

    if ((p = get_double_from_string (p_missing_data)) < 0.0 || p > 1.0)
    {
	fprintf (stderr, "Invalid probability: %s\n", p_missing_data);
	exit (EXIT_FAILURE);
    }

    if (l >= 4 && strcmp (net_file_name + (l - 4), ".net") == 0)
	l -= 4;

    if ((file_name_buffer = malloc (l + 5)) == NULL)
    {
	fprintf (stderr, "Out of memory\n");
	exit (EXIT_FAILURE);
    }

    strcpy (file_name_buffer, net_file_name);
    strcpy (file_name_buffer + l, ".net");

    printf ("Parsing NET file \"%s\" ...\n", file_name_buffer);

    if ((domain = h_net_parse_domain (file_name_buffer, error_handler, NULL))
	== NULL)
	print_error ();

    printf ("Generating cases ...\n");

    generate_cases (domain, number_of_cases, p);

    if ((nodes = get_all_nodes (domain)) == NULL)
    {
	fprintf (stderr, "Out of memory\n");
	exit (EXIT_FAILURE);
    }

    printf ("Saving cases as \"%s\" ...\n", data_file_name);

    if (h_domain_save_cases (domain, data_file_name, nodes, NULL, 0, ",", "*")
	!= 0)
	print_error ();

    printf ("DONE\n");

    free (nodes);

    h_domain_delete (domain);

    free (file_name_buffer);
}


/* This function generates <number_of_cases> cases from <domain> such that
   the probability of missing data is <p_missing_data>.  The generated cases
   are stored as case data associated with <domain>.  The data can then
   immediately be used for learning or saved in the form af a Hugin data file.
*/

void generate_cases
   (h_domain_t domain, size_t number_of_cases, h_double_t p_missing_data)
{
    size_t k;

    if (h_domain_set_number_of_cases (domain, number_of_cases) != 0)
	print_error ();

    for (k = 0; k < number_of_cases; k++)
    {
	h_node_t node;

	if (h_domain_simulate (domain) != 0)
	    print_error ();

	for (node = h_domain_get_first_node (domain);
	     node != NULL; node = h_node_get_next (node))
	{
	    h_node_category_t c = h_node_get_category (node);

	    if ((c == h_category_chance || c == h_category_decision)
		&& h_domain_get_uniform_deviate (domain) >= p_missing_data)
	    {
		if (h_node_get_kind (node) == h_kind_discrete)
		{
		    if (h_node_set_case_state
			(node, k, h_node_get_sampled_state (node)) != 0)
			print_error ();
		}
		else
		{
		    if (h_node_set_case_value
			(node, k, h_node_get_sampled_value (node)) != 0)
			print_error ();
		}
	    }
	}
    }
}


/* This function creates a NULL-terminated list containing all nodes in
   the given domain.  The list is allocated from the heap, and the caller
   is responsible for deallocating it when it no longer needs it.
*/

h_node_t *get_all_nodes (h_domain_t domain)
{
    size_t count = 0;
    h_node_t node = h_domain_get_first_node (domain);

    for (; node != NULL; node = h_node_get_next (node))
	count++;

    {
	h_node_t *nodes = malloc (sizeof (h_node_t) * (count + 1));
	size_t k = 0;

	if (nodes == NULL)
	    return NULL;

	for (node = h_domain_get_first_node (domain);
	     node != NULL; node = h_node_get_next (node))
	    nodes[k++] = node;

	nodes[count] = NULL;

	return nodes;
    }
}
