/*

numa-preplace - Get NUMA pre-placement advice
Derived from numad code, Copyright (C) 2012 Bill Gray (bgray@redhat.com), Red Hat Inc

numad is free software; you can redistribute it and/or modify it under the
terms of the GNU Lesser General Public License as published by the Free
Software Foundation; version 2.1.

numad is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more
details.

You should find a copy of v2.1 of the GNU Lesser General Public License
somewhere on your Linux system; if not, write to the Free Software Foundation,
Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

*/

// Compile with: gcc -std=gnu99 -g -Wall -pthread -o numa-preplace numa-preplace.c -lrt -lm

#define _GNU_SOURCE
#include <sys/syslog.h>
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <stdint.h>
#include <sched.h>
#include <stdarg.h>
#include <time.h>
#include <dirent.h>
#include <ctype.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <string.h>
#include <values.h>
#include <math.h>
#include <errno.h>

#define VERSION_STRING "20150602-preplace"

#define CONFIG_FILE "/etc/numa-preplace.conf"
#define BUF_SIZE 1024
#define FNAME_SIZE 192
#define BIG_BUF_SIZE 4096
// The ONE_HUNDRED factor is used to scale time and CPU usage units.
// Several CPU quantities are measured in percents of a CPU; and
// several time values are counted in hundreths of a second.
#define ONE_HUNDRED 100
#define KILOBYTE (1024)

/* in the original numad code this was set to 20. Here we do not make a special case for HT */
#define DEFAULT_HTT_PERCENT 100

/*
 * this is the percentage of a full cpu that a vcpu is assumed to load.
 * It expresses the target overcommit ratio that the user expects on the server.
 * By default, we assume a target overcommit ratio of 4:1 (4 vcpus to fully load a cpu).
 * Altered with the -o option.
 */
#define DEFAULT_VCPU_LOAD_PERCENT 25.0

#define CONVERT_DIGITS_TO_NUM(p, n) \
    n = *p++ - '0'; \
    while (isdigit(*p)) { \
        n *= 10; \
        n += (*p++ - '0'); \
    }

int num_cpus = 0;
int num_nodes = 0;
int threads_per_core = 0;

long sum_CPUs_total = 0;
int requested_mbs = 0;
int requested_cpus = 0;
int hugepage_size = 0; /* 0 = normal memory request, otherwise size in MiB of the hugepages */
int htt_percent = DEFAULT_HTT_PERCENT;
double vcpu_load_percent = DEFAULT_VCPU_LOAD_PERCENT;
int use_inactive_file_cache = 1;

int log_level = LOG_NOTICE;
FILE *log_fs;

void numad_log(int level, const char *fmt, ...) {
    if (level > log_level) {
        return;
        // Logging levels (from sys/syslog.h)
        //     #define LOG_EMERG       0       /* system is unusable */
        //     #define LOG_ALERT       1       /* action must be taken immediately */
        //     #define LOG_CRIT        2       /* critical conditions */
        //     #define LOG_ERR         3       /* error conditions */
        //     #define LOG_WARNING     4       /* warning conditions */
        //     #define LOG_NOTICE      5       /* normal but significant condition */
        //     #define LOG_INFO        6       /* informational */
        //     #define LOG_DEBUG       7       /* debug-level messages */
    }
    va_list ap;
    va_start(ap, fmt);
    vfprintf(log_fs, fmt, ap);
    va_end(ap);
    fflush(stderr);
}

typedef struct cpu_data {
    double threads_cost;
} cpu_data_t;

cpu_data_t *cpu_data;

typedef struct id_list {
    // Use CPU_SET(3) <sched.h> bitmasks,
    // but bundle size and pointer together
    // and genericize for both CPU and Node IDs
    cpu_set_t *set_p;
    size_t bytes;
} id_list_t, *id_list_p;

#define ID_LIST_SET_P(list_p) (list_p->set_p)
#define ID_LIST_BYTES(list_p) (list_p->bytes)

#define INIT_ID_LIST(list_p, num_elements) \
    list_p = malloc(sizeof(id_list_t)); \
    if (list_p == NULL) { numad_log(LOG_CRIT, "INIT_ID_LIST malloc failed\n"); exit(EXIT_FAILURE); } \
    list_p->set_p = CPU_ALLOC(num_elements); \
    if (list_p->set_p == NULL) { numad_log(LOG_CRIT, "CPU_ALLOC failed\n"); exit(EXIT_FAILURE); } \
    list_p->bytes = CPU_ALLOC_SIZE(num_elements);

#define CLEAR_CPU_LIST(list_p) \
    if (list_p == NULL) { \
        INIT_ID_LIST(list_p, num_cpus); \
    } \
    CPU_ZERO_S(list_p->bytes, list_p->set_p)

#define CLEAR_NODE_LIST(list_p) \
    if (list_p == NULL) { \
        INIT_ID_LIST(list_p, num_nodes); \
    } \
    CPU_ZERO_S(list_p->bytes, list_p->set_p)

#define FREE_LIST(list_p) \
    if (list_p != NULL) { \
        if (list_p->set_p != NULL) { CPU_FREE(list_p->set_p); } \
        free(list_p); \
        list_p = NULL; \
    }

#define COPY_LIST(orig_list_p, copy_list_p) \
    memcpy(copy_list_p->set_p, orig_list_p->set_p, orig_list_p->bytes)

#define NUM_IDS_IN_LIST(list_p)     CPU_COUNT_S(list_p->bytes, list_p->set_p)
#define ADD_ID_TO_LIST(k, list_p)  CPU_SET_S(k, list_p->bytes, list_p->set_p)
#define CLR_ID_IN_LIST(k, list_p)  CPU_CLR_S(k, list_p->bytes, list_p->set_p)
#define ID_IS_IN_LIST(k, list_p) CPU_ISSET_S(k, list_p->bytes, list_p->set_p)

#define EQUAL_LISTS(list_1_p, list_2_p) CPU_EQUAL_S(list_1_p->bytes, list_1_p->set_p, list_2_p->set_p)
#define AND_LISTS(and_list_p, list_1_p, list_2_p) CPU_AND_S(and_list_p->bytes, and_list_p->set_p, list_1_p->set_p, list_2_p->set_p)
#define  OR_LISTS( or_list_p, list_1_p, list_2_p)  CPU_OR_S( or_list_p->bytes,  or_list_p->set_p, list_1_p->set_p, list_2_p->set_p)
#define XOR_LISTS(xor_list_p, list_1_p, list_2_p) CPU_XOR_S(xor_list_p->bytes, xor_list_p->set_p, list_1_p->set_p, list_2_p->set_p)

int negate_cpu_list(id_list_p list_p) {
    if (list_p == NULL) {
        numad_log(LOG_CRIT, "Cannot negate a NULL list\n");
        exit(EXIT_FAILURE);
    }
    if (num_cpus < 1) {
        numad_log(LOG_CRIT, "No CPUs to negate in list!\n");
        exit(EXIT_FAILURE);
    }
    for (int ix = 0;  (ix < num_cpus);  ix++) {
        if (ID_IS_IN_LIST(ix, list_p)) {
            CLR_ID_IN_LIST(ix, list_p);
        } else {
            ADD_ID_TO_LIST(ix, list_p);
        }
    }
    return NUM_IDS_IN_LIST(list_p);
}

id_list_p all_cpus_list_p = NULL;
id_list_p all_nodes_list_p = NULL;
id_list_p reserved_cpu_mask_list_p = NULL;
char *reserved_cpu_str = NULL;

typedef struct node_data {
    uint64_t node_id;
    uint64_t MBs_total;
    uint64_t MBs_free;
    uint64_t CPUs_total; // scaled * ONE_HUNDRED
    uint64_t CPUs_free;  // scaled * ONE_HUNDRED
    uint64_t magnitude;  // hack: MBs * CPUs
    uint8_t *distance;
    id_list_p cpu_list_p;
} node_data_t, *node_data_p;

node_data_p node = NULL;

int min_node_CPUs_free_ix = -1;
int min_node_MBs_free_ix = -1;
long min_node_CPUs_free = MAXINT;
long min_node_MBs_free = MAXINT;
long max_node_CPUs_free = 0;
long max_node_MBs_free = 0;
long avg_node_CPUs_free = 0;
long avg_node_MBs_free = 0;
double stddev_node_CPUs_free = 0.0;
double stddev_node_MBs_free = 0.0;

/* matching functions for scandir */
int node_and_digits(const struct dirent *dptr) {
    char *p = (char *)(dptr->d_name);
    if (*p++ != 'n') return 0;
    if (*p++ != 'o') return 0;
    if (*p++ != 'd') return 0;
    if (*p++ != 'e') return 0;
    do {
        if (!isdigit(*p++))
            return 0;
    } while (*p != '\0');
    return 1;
}

int qemu_directories_only(const struct dirent *entry) {
    if (entry->d_type != DT_DIR) {
        return 0;
    }
    if (strncmp(entry->d_name, "machine-qemu", strlen("machine-qemu")) != 0) {
        return 0;
    }
    return 1;
}

int vcpu_directories_only(const struct dirent *entry) {
    if (entry->d_type != DT_DIR) {
        return 0;
    }
    if (strncmp(entry->d_name, "vcpu", strlen("vcpu")) != 0) {
        return 0;
    }
    return 1;
}

int iothread_directories_only(const struct dirent *entry) {
    if (entry->d_type != DT_DIR) {
        return 0;
    }
    if (strncmp(entry->d_name, "iothread", strlen("iothread")) != 0) {
        return 0;
    }
    return 1;
}

int count_set_bits_in_hex_list_file(char *fname) {
    int sum = 0;
    int fd = open(fname, O_RDONLY, 0);
    if (fd >= 0) {
        char buf[BUF_SIZE];
        int bytes = read(fd, buf, BUF_SIZE);
        close(fd);
        for (int ix = 0;  (ix < bytes);  ix++) {
            char c = tolower(buf[ix]);
            switch (c) {
                case '0'  : sum += 0; break;
                case '1'  : sum += 1; break;
                case '2'  : sum += 1; break;
                case '3'  : sum += 2; break;
                case '4'  : sum += 1; break;
                case '5'  : sum += 2; break;
                case '6'  : sum += 2; break;
                case '7'  : sum += 3; break;
                case '8'  : sum += 1; break;
                case '9'  : sum += 2; break;
                case 'a'  : sum += 2; break;
                case 'b'  : sum += 3; break;
                case 'c'  : sum += 2; break;
                case 'd'  : sum += 3; break;
                case 'e'  : sum += 3; break;
                case 'f'  : sum += 4; break;
                case ' '  : sum += 0; break;
                case ','  : sum += 0; break;
                case '\n' : sum += 0; break;
                default : numad_log(LOG_CRIT, "Unexpected character in list\n"); exit(EXIT_FAILURE);
            }
        }
    }
    return sum;
}

int add_ids_to_list_from_str(id_list_p list_p, char *s) {
    if (list_p == NULL) {
        numad_log(LOG_CRIT, "Cannot add to NULL list\n");
        exit(EXIT_FAILURE);
    }
    if ((s == NULL) || (strlen(s) == 0)) {
        goto return_list;
    }
    int in_range = 0;
    int next_id = 0;
    for (;;) {
        // skip over non-digits
        while (!isdigit(*s)) {
            if ((*s == '\n') || (*s == '\0')) {
                goto return_list;
            }
            if (*s++ == '-') {
                in_range = 1;
            }
        }
        int id;
        CONVERT_DIGITS_TO_NUM(s, id);
        if (!in_range) {
            next_id = id;
        }
        for (; (next_id <= id); next_id++) {
            ADD_ID_TO_LIST(next_id, list_p);
        }
        in_range = 0;
    }
return_list:
    return NUM_IDS_IN_LIST(list_p);
}

int str_from_id_list(char *str_p, int str_size, id_list_p list_p) {
    char *p = str_p;
    if ((p == NULL) || (str_size < 3)) {
        numad_log(LOG_CRIT, "Bad string for ID listing\n");
        exit(EXIT_FAILURE);
    }
    int n;
    if ((list_p == NULL) || ((n = NUM_IDS_IN_LIST(list_p)) == 0)) {
        goto terminate_string;
    }
    int id_range_start = -1;
    for (int id = 0;  ;  id++) {
        int id_in_list = (ID_IS_IN_LIST(id, list_p) != 0);
        if ((id_in_list) && (id_range_start < 0)) {
            id_range_start = id; // beginning an ID range
        } else if ((!id_in_list) && (id_range_start >= 0)) {
            // convert the range that just ended...
            p += snprintf(p, (str_p + str_size - p - 1), "%d", id_range_start);
            if (id - id_range_start > 1) {
                *p++ = '-';
                p += snprintf(p, (str_p + str_size - p - 1), "%d", (id - 1));
            }
            *p++ = ',';
            id_range_start = -1; // no longer in a range
            if (n <= 0) { break; } // exit only after finishing a range
        }
        n -= id_in_list;
    }
    p -= 1; // eliminate trailing ','
terminate_string:
    *p = '\0';
    return (p - str_p);
}

void show_threads(cpu_data_t *cpu_data_buf)
{
    for (int i = 0; i < num_cpus; i++) {
        numad_log(LOG_INFO, "cpu_data[%d]: threads cost: %.2f\n", i, cpu_data[i].threads_cost);
    }
}

/*
 * update_cpu_allowed: count how many vcpus or iothreads are restricted to each cpu.
 */
void update_cpu_allowed(cpu_data_t *cpu_data_buf, id_list_p cpu_allowed_list_p,
                        char *vm_name, int num_dirs, struct dirent **dirs) {
    char fname[FNAME_SIZE];
    char buf[BIG_BUF_SIZE];

    for (int v = 0; v < num_dirs; v++) {
        snprintf(fname, FNAME_SIZE, "/sys/fs/cgroup/machine.slice/%s/libvirt/%s/cpuset.cpus.effective",
                 vm_name, dirs[v]->d_name);
        numad_log(LOG_INFO, "scanning cgroup %s\n", fname);
        int fd = open(fname, O_RDONLY, 0);
        if (fd < 0 || read(fd, buf, BIG_BUF_SIZE) < 0) {
            numad_log(LOG_INFO, "Could not read %s\n", fname);
            if (fd >= 0) {
                close(fd);
            }
            continue;
        }
        close(fd);
        buf[BIG_BUF_SIZE - 1] = '\0';
        add_ids_to_list_from_str(cpu_allowed_list_p, buf);

        for (int c = 0; c < num_cpus; c++) {
            if (ID_IS_IN_LIST(c, cpu_allowed_list_p)) {
                cpu_data_buf[c].threads_cost += vcpu_load_percent / NUM_IDS_IN_LIST(cpu_allowed_list_p);
            }
        }
        CLEAR_CPU_LIST(cpu_allowed_list_p);
    }
}


/*
 * update_cpu_data: parse info from /sys/fs/cgroup/machine.slice/ to detect
 * all KVM VCPU threads and iothreads that have affinity to run on each cpu.
 * We will consider this when later calculating the amount of "free" cpus on each node.
 */
void update_cpu_data() {
    char fname[FNAME_SIZE];
    id_list_p cpu_allowed_list_p = NULL;
    CLEAR_CPU_LIST(cpu_allowed_list_p);

    struct dirent **vmlist;
    if (cpu_data == NULL) {
        if (!(cpu_data = calloc(num_cpus, sizeof(*cpu_data)))) {
            numad_log(LOG_CRIT, "cpu_data calloc failed\n");
            exit(EXIT_FAILURE);
        }
    } else {
        memset(cpu_data, 0, num_cpus * sizeof(*cpu_data));
    }
    int num_vms = scandir("/sys/fs/cgroup/machine.slice", &vmlist, qemu_directories_only, NULL);
    if (num_vms < 0) {
        numad_log(LOG_CRIT, "error trying to read /sys/fs/cgroup/machine.slice\n");
        exit(EXIT_FAILURE);
    }
    for (int i = 0; i < num_vms; i++) {
        struct dirent **vcpus, **iothreads;
        char *vm_name = vmlist[i]->d_name;
        snprintf(fname, FNAME_SIZE, "/sys/fs/cgroup/machine.slice/%s/libvirt", vm_name);
        int num_vcpus = scandir(fname, &vcpus, vcpu_directories_only, NULL);
        int num_iothreads = scandir(fname, &iothreads, iothread_directories_only, NULL);
        if (num_vcpus < 0 || num_iothreads < 0) {
            numad_log(LOG_CRIT, "Could not scan cgroup libvirt directories\n");
            exit(EXIT_FAILURE);
        }
        update_cpu_allowed(cpu_data, cpu_allowed_list_p, vm_name, num_vcpus, vcpus);
        /*
         * XXX disable iothreads for now: if we really want them we need to also include in the command line
         * the amount of iothreads for _this_ request, so libvirt would need to pass this information too.
         */
        /* update_cpu_allowed(cpu_data, cpu_allowed_list_p, vm_name, num_iothreads, iothreads); */
        free(vcpus);
        free(iothreads);
    }
    FREE_LIST(cpu_allowed_list_p);
    free(vmlist);
}


void show_nodes() {
    fprintf(log_fs, "\n");
    numad_log(LOG_INFO, "Nodes: %d\n", num_nodes);
    fprintf(log_fs, "Min CPUs free: %ld, Max CPUs: %ld, Avg CPUs: %ld, StdDev: %lg\n",
        min_node_CPUs_free, max_node_CPUs_free, avg_node_CPUs_free, stddev_node_CPUs_free);
    fprintf(log_fs, "Min MBs free: %ld, Max MBs: %ld, Avg MBs: %ld, StdDev: %lg\n",
        min_node_MBs_free, max_node_MBs_free, avg_node_MBs_free, stddev_node_MBs_free);
    for (int ix = 0;  (ix < num_nodes);  ix++) {
        fprintf(log_fs, "Node %d: MBs_total %ld, MBs_free %6ld, CPUs_total %ld, CPUs_free %4ld,  Distance: ",
                node[ix].node_id, node[ix].MBs_total, node[ix].MBs_free, node[ix].CPUs_total, node[ix].CPUs_free);
        for (int d = 0;  (d < num_nodes);  d++) {
            fprintf(log_fs, "%d ", node[ix].distance[d]);
        }
        char buf[BUF_SIZE];
        str_from_id_list(buf, BUF_SIZE, node[ix].cpu_list_p);
        fprintf(log_fs, " CPUs: %s\n", buf);
    }
    fflush(log_fs);
}

int update_nodes() {
    char fname[FNAME_SIZE];
    char buf[BIG_BUF_SIZE];
    if (1) {
        // Count directory names of the form: /sys/devices/system/node/node<N>
        struct dirent **namelist;
        int num_files = scandir ("/sys/devices/system/node", &namelist, node_and_digits, NULL);
        if (num_files < 1) {
            numad_log(LOG_CRIT, "Could not get NUMA node info\n");
            exit(EXIT_FAILURE);
        }
        int need_to_realloc = (num_files != num_nodes);
        if (need_to_realloc) {
            for (int ix = num_files;  (ix < num_nodes);  ix++) {
                // If new < old, free old node_data pointers
                free(node[ix].distance);
                FREE_LIST(node[ix].cpu_list_p);
            }
            node = realloc(node, (num_files * sizeof(node_data_t)));
            if (node == NULL) {
                numad_log(LOG_CRIT, "node realloc failed\n");
                exit(EXIT_FAILURE);
            }
            for (int ix = num_nodes;  (ix < num_files);  ix++) {
                // If new > old, nullify new node_data pointers
                node[ix].distance = NULL;
                node[ix].cpu_list_p = NULL;
            }
            num_nodes = num_files;
        }
        sum_CPUs_total = 0;
        CLEAR_CPU_LIST(all_cpus_list_p);
        CLEAR_NODE_LIST(all_nodes_list_p);
        // Figure out how many threads per core there are (for later discounting of hyper-threads)
        threads_per_core = count_set_bits_in_hex_list_file("/sys/devices/system/cpu/cpu0/topology/thread_siblings");
        if (threads_per_core < 1) {
            numad_log(LOG_CRIT, "Could not count threads per core\n");
            exit(EXIT_FAILURE);
        }
        // For each "node<N>" filename present, save <N> in node[ix].node_id
        // Note that the node id might not necessarily match the node ix.
        // Also populate the cpu lists and distance vectors for this node.
        for (int node_ix = 0;  (node_ix < num_nodes);  node_ix++) {
            int node_id;
            char *p = &namelist[node_ix]->d_name[4];
            CONVERT_DIGITS_TO_NUM(p, node_id);
            free(namelist[node_ix]);
            node[node_ix].node_id = node_id;
            ADD_ID_TO_LIST(node_id, all_nodes_list_p);
            // Get all the CPU IDs in this node...  Read lines from node<N>/cpulist
            // file, and set the corresponding bits in the node cpu list.
            snprintf(fname, FNAME_SIZE, "/sys/devices/system/node/node%d/cpulist", node_id);
            int fd = open(fname, O_RDONLY, 0);
            if ((fd >= 0) && (read(fd, buf, BIG_BUF_SIZE) > 0)) {
                buf[BIG_BUF_SIZE - 1] = '\0';
                // get cpulist from the cpulist string
                CLEAR_CPU_LIST(node[node_ix].cpu_list_p);
                int n = add_ids_to_list_from_str(node[node_ix].cpu_list_p, buf);
                if (reserved_cpu_str != NULL) {
                    AND_LISTS(node[node_ix].cpu_list_p, node[node_ix].cpu_list_p, reserved_cpu_mask_list_p);
                    n = NUM_IDS_IN_LIST(node[node_ix].cpu_list_p);
                }
                OR_LISTS(all_cpus_list_p, all_cpus_list_p, node[node_ix].cpu_list_p);
                // Calculate total CPUs, but possibly discount hyper-threads
                if ((threads_per_core == 1) || (htt_percent >= 100)) {
                    node[node_ix].CPUs_total = n * ONE_HUNDRED;
                } else {
                    n /= threads_per_core;
                    node[node_ix].CPUs_total = n * ONE_HUNDRED;
                    node[node_ix].CPUs_total += n * (threads_per_core - 1) * htt_percent;
                }
                sum_CPUs_total += node[node_ix].CPUs_total;
                close(fd);
            } else {
                numad_log(LOG_CRIT, "Could not get node cpu list\n");
                exit(EXIT_FAILURE);
            }
            // Get distance vector of ACPI SLIT data from node<N>/distance file
            if (need_to_realloc) {
                node[node_ix].distance = realloc(node[node_ix].distance, (num_nodes * sizeof(uint8_t)));
                if (node[node_ix].distance == NULL) {
                    numad_log(LOG_CRIT, "node distance realloc failed\n");
                    exit(EXIT_FAILURE);
                }
            }
            snprintf(fname, FNAME_SIZE, "/sys/devices/system/node/node%d/distance", node_id);
            fd = open(fname, O_RDONLY, 0);
            if ((fd >= 0) && (read(fd, buf, BIG_BUF_SIZE) > 0)) {
                int rnode = 0;
                for (char *p = buf;  (*p != '\n'); ) {
                    int lat;
                    CONVERT_DIGITS_TO_NUM(p, lat);
                    node[node_ix].distance[rnode++] = lat;
                    while (*p == ' ') { p++; }
                }
                close(fd);
            } else {
                numad_log(LOG_CRIT, "Could not get node distance data\n");
                exit(EXIT_FAILURE);
            }
        }
        free(namelist);
    }
    // Second, update the dynamic free memory and available CPU capacity
    update_cpu_data();
    if (log_level >= LOG_INFO) {
        show_threads(cpu_data);
    }
    max_node_MBs_free = 0;
    max_node_CPUs_free = 0;
    min_node_MBs_free = MAXINT;
    min_node_CPUs_free = MAXINT;
    uint64_t hugepages_rsvd = 0;
    uint64_t sum_of_node_MBs_free = 0;
    uint64_t sum_of_node_CPUs_free = 0;

    if (hugepage_size > 0) {
        /*
         * accounting of Rsvd is not available per-node, only globally,
         * which is a major issue in trying to gauge from user-space
         * how many hugepages are _really_ free on a node.
         *
         * Something like a /sys/devices/system/node/node0/meminfo
         * HugePages_Rsvd field would need to be populated from the kernel.
         *
         * In this experiment we try to be pessimistic and consider all
         * reservations affect all nodes.
         */
        int fd = open("/proc/meminfo", O_RDONLY, 0);
        if (fd < 0 || read(fd, buf, BIG_BUF_SIZE) < 0) {
            numad_log(LOG_CRIT, "Could not get node /proc/meminfo\n");
            exit(EXIT_FAILURE);
        }
        close(fd);
        buf[BIG_BUF_SIZE - 1] = '\0';
        char *p = strstr(buf, "HugePages_Rsvd:");
        if (p != NULL) {
            p += strlen("HugePages_Rsvd:");
        } else {
            numad_log(LOG_CRIT, "Could not get node HugePages_Rsvd\n");
            exit(EXIT_FAILURE);
        }
        while (!isdigit(*p)) { p++; }
        CONVERT_DIGITS_TO_NUM(p, hugepages_rsvd);
    }

    for (int node_ix = 0;  (node_ix < num_nodes);  node_ix++) {
        int node_id = node[node_ix].node_id;
        // Get available memory info from node<N>/meminfo file
        snprintf(fname, FNAME_SIZE, "/sys/devices/system/node/node%d/meminfo", node_id);
        int fd = open(fname, O_RDONLY, 0);
        if ((fd >= 0) && (read(fd, buf, BIG_BUF_SIZE) > 0)) {
            close(fd);
            uint64_t KB; char *p;
            buf[BIG_BUF_SIZE - 1] = '\0';
            if (hugepage_size == 0) {
                p = strstr(buf, "MemTotal:");
                if (p != NULL) {
                    p += 9;
                } else {
                    numad_log(LOG_CRIT, "Could not get node MemTotal\n");
                    exit(EXIT_FAILURE);
                }
            } else {
                p = strstr(buf, "HugePages_Total:");
                if (p != NULL) {
                    p += strlen("HugePages_Total:");
                } else {
                    numad_log(LOG_CRIT, "Could not get node HugePages_Total\n");
                    exit(EXIT_FAILURE);
                }
            }
            while (!isdigit(*p)) { p++; }
            CONVERT_DIGITS_TO_NUM(p, KB);
            if (hugepage_size == 0) {
                node[node_ix].MBs_total = (KB / KILOBYTE);
            } else {
                /* KB here is the number of pages, page_size_in_MiB = total MiB */
                node[node_ix].MBs_total = KB * hugepage_size;
            }
            if (hugepage_size == 0) {
                p = strstr(p, "MemFree:");
                if (p != NULL) {
                    p += 8;
                } else {
                    numad_log(LOG_CRIT, "Could not get node MemFree\n");
                    exit(EXIT_FAILURE);
                }
            } else {
                p = strstr(p, "HugePages_Free:");
                if (p != NULL) {
                    p += strlen("HugePages_Free:");
                } else {
                    numad_log(LOG_CRIT, "Could not get node HugePages_Free\n");
                    exit(EXIT_FAILURE);
                }
            }
            while (!isdigit(*p)) { p++; }
            CONVERT_DIGITS_TO_NUM(p, KB);
            if (hugepage_size == 0) {
                node[node_ix].MBs_free = (KB / KILOBYTE);
            } else if (KB < hugepages_rsvd) {
                node[node_ix].MBs_free = 0;
            } else {
                /* KB here is the number of pages, * page_size_in_MiB = total MiB */
                node[node_ix].MBs_free = ((KB - hugepages_rsvd) * hugepage_size);
            }
            if (use_inactive_file_cache) { /* always false for hugepages */
                // Add inactive file cache quantity to "free" memory
                p = strstr(p, "Inactive(file):");
                if (p != NULL) {
                    p += 15;
                } else {
                    numad_log(LOG_CRIT, "Could not get node Inactive(file)\n");
                    exit(EXIT_FAILURE);
                }
                while (!isdigit(*p)) { p++; }
                CONVERT_DIGITS_TO_NUM(p, KB);
                node[node_ix].MBs_free += (KB / KILOBYTE);
            }
            if (node[node_ix].MBs_total < 1 || node[node_ix].MBs_free < 1) {
                // If a node has zero memory, remove it from the all_nodes_list...
                CLR_ID_IN_LIST(node_id, all_nodes_list_p);
            }
            sum_of_node_MBs_free += node[node_ix].MBs_free;
            if (min_node_MBs_free > node[node_ix].MBs_free) {
                min_node_MBs_free = node[node_ix].MBs_free;
                min_node_MBs_free_ix = node[node_ix].node_id;
            }
            if (max_node_MBs_free < node[node_ix].MBs_free) {
                max_node_MBs_free = node[node_ix].MBs_free;
            }
        } else {
            numad_log(LOG_CRIT, "Could not get node meminfo\n");
            exit(EXIT_FAILURE);
        }
        // Calculate available capacity
        if (cpu_data != NULL) {
            double threads_cost = 0.0;
            int cpu = 0;
            int num_lcpus = NUM_IDS_IN_LIST(node[node_ix].cpu_list_p);
            int num_cpus_to_process = num_lcpus;
            while (num_cpus_to_process) {
                if (ID_IS_IN_LIST(cpu, node[node_ix].cpu_list_p)) {
                    threads_cost += cpu_data[cpu].threads_cost;
                    num_cpus_to_process -= 1;
                }
                cpu += 1;
            }
            uint32_t cpus_used = threads_cost;
            if (cpus_used > node[node_ix].CPUs_total) {
                node[node_ix].CPUs_free = 0;
            } else {
                node[node_ix].CPUs_free = node[node_ix].CPUs_total - cpus_used;
            }
            sum_of_node_CPUs_free += node[node_ix].CPUs_free;
            if (min_node_CPUs_free > node[node_ix].CPUs_free) {
                min_node_CPUs_free = node[node_ix].CPUs_free;
                min_node_CPUs_free_ix = node[node_ix].node_id;
            }
            if (max_node_CPUs_free < node[node_ix].CPUs_free) {
                max_node_CPUs_free = node[node_ix].CPUs_free;
            }
            node[node_ix].magnitude = node[node_ix].CPUs_free * node[node_ix].MBs_free;
        } else {
            node[node_ix].CPUs_free = 0;
            node[node_ix].magnitude = 0;
        }
    }
    avg_node_MBs_free = sum_of_node_MBs_free / num_nodes;
    avg_node_CPUs_free = sum_of_node_CPUs_free / num_nodes;
    double MBs_variance_sum = 0.0;
    double CPUs_variance_sum = 0.0;
    for (int node_ix = 0;  (node_ix < num_nodes);  node_ix++) {
        double MBs_diff = (double)node[node_ix].MBs_free - (double)avg_node_MBs_free;
        double CPUs_diff = (double)node[node_ix].CPUs_free - (double)avg_node_CPUs_free;
        MBs_variance_sum += MBs_diff * MBs_diff;
        CPUs_variance_sum += CPUs_diff * CPUs_diff;
    }
    double MBs_variance = MBs_variance_sum / (num_nodes);
    double CPUs_variance = CPUs_variance_sum / (num_nodes);
    stddev_node_MBs_free = sqrt(MBs_variance);
    stddev_node_CPUs_free = sqrt(CPUs_variance);
    if (log_level >= LOG_INFO) {
        show_nodes();
    }
    return num_nodes;
}

uint64_t combined_value_of_weighted_resources(int nodeid, int mbs, int cpus, uint64_t MBs_free, uint64_t CPUs_free) {
    int64_t needed_mem;
    int64_t needed_cpu;
    int64_t excess_mem;
    int64_t excess_cpu;
    if (MBs_free > mbs) {
        needed_mem = mbs;
        excess_mem = MBs_free - mbs;
    } else {
        needed_mem = MBs_free;
        excess_mem = 0;
    }
    if (CPUs_free > cpus) {
        needed_cpu = cpus;
        excess_cpu = CPUs_free - cpus;
    } else {
        needed_cpu = CPUs_free;
        excess_cpu = 0;
    }
    // Weight the available resources, and then calculate magnitude as
    // product of available CPUs and available MBs.
    int64_t memfactor = (needed_mem * 10 + excess_mem * 4);
    int64_t cpufactor = (needed_cpu *  6 + excess_cpu * 1);
    numad_log(LOG_DEBUG, "    Node: %d  memfactor: %ld  cpufactor: %ld\n", nodeid, memfactor, cpufactor);
    return (memfactor * cpufactor);
}

id_list_p pick_numa_nodes(int pid, int cpus, int mbs, int hp_size, int assume_enough_cpus) {
    if (log_level >= LOG_DEBUG) {
        numad_log(LOG_DEBUG, "PICK NODES FOR:  PID: %d,  CPUs %d,  MBs %d,  HP %d\n", pid, cpus / ONE_HUNDRED, mbs, hp_size);
    }
    char buf[BUF_SIZE];
    uint64_t proc_avg_node_CPUs_free = 0;
    // For existing processes, get miscellaneous process specific details
    int pid_ix;
    // Make a copy of node available resources array.  Add in info specific to
    // this process to equalize available resource quantities wrt locations of
    // resources already in use by this process.
    static node_data_p tmp_node;
    tmp_node = realloc(tmp_node, num_nodes * sizeof(node_data_t) );
    if (tmp_node == NULL) {
        numad_log(LOG_CRIT, "tmp_node realloc failed\n");
        exit(EXIT_FAILURE);
    }
    memcpy(tmp_node, node, num_nodes * sizeof(node_data_t) );
    uint64_t sum_of_node_CPUs_free = 0;
    for (int ix = 0;  (ix < num_nodes);  ix++) {
        // Enforce 1/100th CPU minimum
        if (tmp_node[ix].CPUs_free < 1) {
            tmp_node[ix].CPUs_free = 1;
        }
        // numad_log(LOG_DEBUG, "Raw Node[%d]: mem: %ld  cpu: %ld\n", ix, tmp_node[ix].MBs_free, tmp_node[ix].CPUs_free);
        tmp_node[ix].magnitude = combined_value_of_weighted_resources(tmp_node[ix].node_id, mbs, cpus, tmp_node[ix].MBs_free, tmp_node[ix].CPUs_free);
    }
    // Now figure out where to get resources for this request....
    static id_list_p target_node_list_p;
    CLEAR_NODE_LIST(target_node_list_p);
    // Establish a CPU flex fudge factor, on the presumption it is OK if not
    // quite all the CPU request is met.  However, if trying to find resources
    // for pre-placement advice request, do not underestimate the amount of
    // CPUs needed.  Instead, err on the side of providing too many resources.
    int cpu_flex = 0;
    // Figure out minimum number of nodes required
    uint64_t mbs_total_max = 1;
    uint64_t cpus_total_max = 1;
    for (int i = 0; i < num_nodes; i++) {
      if (mbs_total_max < node[i].MBs_total) {
          mbs_total_max = node[i].MBs_total;
      }
      if (cpus_total_max < node[i].CPUs_total) {
          cpus_total_max = node[i].CPUs_total;
      }
    }
    int mem_req_nodes = ceil((double)mbs / mbs_total_max);
    int cpu_req_nodes = ceil((double)(cpus - cpu_flex) / (double)cpus_total_max);
    int min_req_nodes = mem_req_nodes;
    if (min_req_nodes < cpu_req_nodes) {
        min_req_nodes = cpu_req_nodes;
    }
    if (min_req_nodes > num_nodes) {
        min_req_nodes = num_nodes;
    }
    // Use an index to sort NUMA connected resource chain for each node
    int index[num_nodes];
    uint64_t totmag[num_nodes];
    for (int ix = 0;  (ix < num_nodes);  ix++) {
        // Reset the index each time
        for (int n = 0;  (n < num_nodes);  n++) {
            index[n] = n;
        }
        // Sort by minimum relative NUMA distance from node[ix],
        // breaking distance ties with magnitude of available resources
        for (int ij = 0;  (ij < num_nodes);  ij++) {
            int best_ix = ij;
            for (int ik = ij + 1;  (ik < num_nodes);  ik++) {
                int ik_dist = tmp_node[index[ik]].distance[ix];
                int best_ix_dist = tmp_node[index[best_ix]].distance[ix];
                if (best_ix_dist > ik_dist) {
                    best_ix = ik;
                } else if (best_ix_dist == ik_dist) {
                    if (tmp_node[index[best_ix]].magnitude < tmp_node[index[ik]].magnitude ) {
                        best_ix = ik;
                    }
                }
            }
            if (best_ix != ij) {
                int tmp = index[ij];
                index[ij] = index[best_ix];
                index[best_ix] = tmp;
            }
        }
#if 0
        if (log_level >= LOG_DEBUG) {
            for (int iq = 0;  (iq < num_nodes);  iq++) {
                numad_log(LOG_DEBUG, "Node: %d  Dist: %d  Magnitude: %ld\n",
                    tmp_node[index[iq]].node_id, tmp_node[index[iq]].distance[ix], tmp_node[index[iq]].magnitude);
            }
        }
#endif
        // Save the totmag[] sum of the magnitudes of expected needed nodes,
        // "normalized" by NUMA distance (by dividing each magnitude by the
        // relative distance squared).
        totmag[ix] = 0;
        for (int ij = 0;  (ij < min_req_nodes);  ij++) {
            int dist = tmp_node[index[ij]].distance[ix];
            totmag[ix] += (tmp_node[index[ij]].magnitude / (dist * dist));
        }
        numad_log(LOG_DEBUG, "Totmag[%d]: %ld\n", ix, totmag[ix]);
    }
    // Now find the best NUMA node based on the normalized sum of node
    // magnitudes expected to be used.
    int best_node_ix = 0;
    for (int ix = 0;  (ix < num_nodes);  ix++) {
        if (totmag[best_node_ix] < totmag[ix]) {
            best_node_ix = ix;
        }
    }
    numad_log(LOG_DEBUG, "best_node_ix: %d\n", best_node_ix);
    // Reset sorting index again
    for (int n = 0;  (n < num_nodes);  n++) {
        index[n] = n;
    }
    // Sort index by distance from node[best_node_ix],
    // breaking distance ties with magnitude
    for (int ij = 0;  (ij < num_nodes);  ij++) {
        int best_ix = ij;
        for (int ik = ij + 1;  (ik < num_nodes);  ik++) {
            int ik_dist = tmp_node[index[ik]].distance[best_node_ix];
            int best_ix_dist = tmp_node[index[best_ix]].distance[best_node_ix];
            if (best_ix_dist > ik_dist) {
                best_ix = ik;
            } else if (best_ix_dist == ik_dist) {
                if (tmp_node[index[best_ix]].magnitude < tmp_node[index[ik]].magnitude ) {
                    best_ix = ik;
                }
            }
        }
        if (best_ix != ij) {
            int tmp = index[ij];
            index[ij] = index[best_ix];
            index[best_ix] = tmp;
        }
    }
    if (log_level >= LOG_DEBUG) {
        for (int iq = 0;  (iq < num_nodes);  iq++) {
            numad_log(LOG_DEBUG, "Node: %d  Dist: %d  Magnitude: %ld\n",
                tmp_node[index[iq]].node_id, tmp_node[index[iq]].distance[best_node_ix], tmp_node[index[iq]].magnitude);
        }
    }

    /*
     * now apply the target cpu overcommit. Note that min_req_nodes still applies,
     * which is a boundary condition that ensures that the number of VCPUS does not exceed the
     * number of physical cpus available to back the VM (even if overcommitted).
     */
    cpus = cpus * vcpu_load_percent / ONE_HUNDRED;
    // Allocate more resources until request is met.
    for (best_node_ix = 0; best_node_ix < num_nodes &&
             (min_req_nodes > 0 || mbs > 0 || (cpus > cpu_flex && !assume_enough_cpus));
         best_node_ix++) {
        int nodeid = tmp_node[index[best_node_ix]].node_id;
        if (log_level >= LOG_DEBUG) {
            numad_log(LOG_DEBUG, "resources needed: MBs: %d,  CPUs: %d\n", mbs, cpus);
        }
        if (!ID_IS_IN_LIST(tmp_node[index[best_node_ix]].node_id, all_nodes_list_p)) {
            /* skip node that has been previously removed from the all nodes list */
            numad_log(LOG_DEBUG, "Skipping rejected Node: %d\n", nodeid);
            continue;
        }
        numad_log(LOG_DEBUG, "Assigning resources from Node: %d\n", nodeid);
        ADD_ID_TO_LIST(nodeid, target_node_list_p);
        min_req_nodes -= 1;
        if (EQUAL_LISTS(target_node_list_p, all_nodes_list_p)) {
            // Apparently we must use all resource nodes...
            break;
        }
        // "Consume" the resources on this node
#define CPUS_MARGIN 0
        /*
         * for normal memory we consider a 100 MiB margin on each node;
         * for hugepages it is likely we only need those for the specific application we
         * are trying to support, where a margin of 1GiB might be wasteful.
         */
        int mbs_margin = (hp_size > 0) ? 0 : 100;
        if (tmp_node[index[best_node_ix]].MBs_free >= (mbs + mbs_margin)) {
            tmp_node[index[best_node_ix]].MBs_free -= mbs;
            mbs = 0;
        } else {
            mbs -= (tmp_node[index[best_node_ix]].MBs_free - mbs_margin);
            tmp_node[index[best_node_ix]].MBs_free = mbs_margin;
        }
        if (tmp_node[index[best_node_ix]].CPUs_free >= (cpus + CPUS_MARGIN)) {
            tmp_node[index[best_node_ix]].CPUs_free -= cpus;
            cpus = 0;
        } else {
            cpus -= (tmp_node[index[best_node_ix]].CPUs_free - CPUS_MARGIN);
            tmp_node[index[best_node_ix]].CPUs_free = CPUS_MARGIN;
        }
        // Next line optional, since we will not look at that node again
        tmp_node[index[best_node_ix]].magnitude = combined_value_of_weighted_resources(nodeid, mbs, cpus, tmp_node[index[best_node_ix]].MBs_free, tmp_node[index[best_node_ix]].CPUs_free);
    }
    // Must always provide at least one node for pre-placement advice
    // FIXME: verify this can happen only if no resources requested...
    if (NUM_IDS_IN_LIST(target_node_list_p) <= 0) {
        ADD_ID_TO_LIST(node[0].node_id, target_node_list_p);
    }
    // Log advice, and return target node list
    str_from_id_list(buf,  BUF_SIZE, all_nodes_list_p);
    char buf2[BUF_SIZE];
    str_from_id_list(buf2, BUF_SIZE, target_node_list_p);
    numad_log(LOG_NOTICE, "Advising nodes (%s)\n", buf2);
    return target_node_list_p;
}

int get_num_cpus() {
    int n1 = sysconf(_SC_NPROCESSORS_CONF);
    int n2 = sysconf(_SC_NPROCESSORS_ONLN);
    if (n1 < n2) {
        n1 = n2;
    }
    if (n1 < 0) {
        numad_log(LOG_CRIT, "Cannot count number of processors\n");
        exit(EXIT_FAILURE);
    }
    return n1;
}

void print_usage_and_exit(char *prog_name) {
    fprintf(stderr, "Usage: %s <options> ...\n", prog_name);
    fprintf(stderr, "-w <CPUs>[:<MBs>] for NUMA node suggestions\n");
    fprintf(stderr, "-H <MBs> hugepages request of size <MBs> (example: 1048576, default=0, normal memory)\n");
    fprintf(stderr, "-o <PERCENT> target overcommit expressed as cpu load per VCPU (default=25.0, overcommit 4:1)\n");
    fprintf(stderr, "-C 1  to count inactive file cache as available memory (default)\n");
    fprintf(stderr, "-C 0  to count inactive file cache memory as unavailable\n");
    fprintf(stderr, "-d for debug logging (same effect as '-l 7')\n");
    fprintf(stderr, "-h to print this usage info\n");
    fprintf(stderr, "-l <N> to specify logging level (usually 5, 6, or 7 -- default 5)\n");
    fprintf(stderr, "-R <CPU_LIST> to reserve some CPUs for non-numad use\n");
    fprintf(stderr, "-t <N> to specify thread / logical CPU valuation percent (default %d)\n", DEFAULT_HTT_PERCENT);
    fprintf(stderr, "-v for verbose  (same effect as '-l 6')\n");
    fprintf(stderr, "-V to show version info\n");
    exit(EXIT_FAILURE);
}

void print_version_and_exit(char *prog_name) {
    fprintf(stdout, "%s version: %s: compiled %s\n", prog_name, VERSION_STRING, __DATE__);
    exit(EXIT_SUCCESS);
}

void parse_two_arg_values(char *p, int *first_ptr, int *second_ptr, int first_is_optional, int first_scale_digits) {
    char *orig_p = p;
    char *q = NULL;
    int second = -1;
    errno = 0;
    int first = (int) strtol(p, &p, 10);
    if ((errno != 0) || (p == orig_p) || (first < 0)) {
        fprintf(stderr, "Can't parse arg value(s): %s\n", orig_p);
        exit(EXIT_FAILURE);
    }
    if (*p == '.') {
        p++;
        while ((first_scale_digits > 0) && (isdigit(*p))) {
            first *= 10;
            first += (*p++ - '0');
            first_scale_digits -= 1;
        }
        while (isdigit(*p)) { p++; }
    }
    while (first_scale_digits > 0) {
        first *= 10;
        first_scale_digits -= 1;
    }
    if (*p == ':') {
        q = p + 1;
        errno = 0;
        second = (int) strtol(q, &p, 10);
        if ((errno != 0) || (p == q) || (second < 0)) {
            fprintf(stderr, "Can't parse arg value(s): %s\n", orig_p);
            exit(EXIT_FAILURE);
        }
    }
    if (q != NULL) {
        // Two numbers are present
        if (first_ptr  != NULL) *first_ptr = first;
        if (second_ptr != NULL) *second_ptr = second;
    } else if (first_is_optional) {
        if (second_ptr != NULL) *second_ptr = first;
    } else {
        if (first_ptr != NULL) *first_ptr = first;
    }
}

char *config_strdup(const char *s) {
    char *r = strdup(s);
    if (!r) {
        fprintf(stderr, "read_config_file: failed to strdup.\n");
        exit(EXIT_FAILURE);
    }
    return r;
}

char **read_config_file(int *argc, char *argv_orig[], const char *config_file) {
    char buf[BUF_SIZE];
    char *line;
    char **argv = NULL;
    int n_lines = 0;
    int i = 0;
    FILE *f = fopen(config_file, "r");
    if (!f) {
        if (errno == ENOENT) {
            return argv_orig;
        }
        fprintf(stderr, "read_config_file: failed to open %s, %s\n", config_file, strerror(errno));
        exit(EXIT_FAILURE);
    }
    /* count lines first */
    while ((line = fgets(buf, BUF_SIZE, f)) != NULL) {
        n_lines += 1;
    }
    /* max possible argc is the original argc + arg with value on each line + 1 for the NULL */
    argv = calloc(*argc + (n_lines * 2) + 1, sizeof(char *));
    if (!argv) {
        fprintf(stderr, "read_config_file: failed to calloc.\n");
        exit(EXIT_FAILURE);
    }
    rewind(f);
    /* copy the program name first */
    argv[i] = config_strdup(argv_orig[0]);
    i += 1;
    /* then build arguments and remove comments and newlines */
    while ((line = fgets(buf, BUF_SIZE, f)) != NULL) {
        size_t len = strlen(line);
        if (len > 0 && line[len - 1] == '\n') {
            line[len - 1] = '\0';
        }
        char *mark = strchr(line, '#');
        if (mark) {
            *mark = '\0';
        }
        if (strlen(line) > 0) {
            mark = strchr(line, ' ');
            if (mark) {
                *mark = '\0';
                mark += 1;
            }
            argv[i] = config_strdup(line);
            i += 1;
            if (mark) {
                argv[i] = config_strdup(mark);
                i += 1;
            }
        }
    }
    fclose(f);

    /* done copying in the arguments from the config file, now copy the original argv */
    for (int j = 1; j < *argc; i++, j++) {
        argv[i] = config_strdup(argv_orig[j]);
    }
    /* done, now store the final NULL and return results */
    argv[i] = NULL;
    *argc = i;
    return argv;
}

int main(int argc, char *argv_orig[]) {
    log_fs = stderr;
    int opt;
    int w_flag = 0;
    int tmp_int = 0;
    char **argv = read_config_file(&argc, argv_orig, CONFIG_FILE);
    if (argc < 0) {
        fprintf(stderr, "parse error in " CONFIG_FILE ".\n");
        exit(EXIT_FAILURE);
    }
    while ((opt = getopt(argc, argv, "C:H:R:Vdhl:o:t:vw:")) != -1) {
        switch (opt) {
        case 'C':
            use_inactive_file_cache = (atoi(optarg) != 0);
            break;
        case 'd':
            log_level = LOG_DEBUG;
            break;
        case 'h':
            print_usage_and_exit(argv[0]);
            break;
        case 'l':
            log_level = atoi(optarg);
            break;
        case 'R':
            reserved_cpu_str = strdup(optarg);
            break;
        case 't':
            tmp_int = atoi(optarg);
            if ((tmp_int >= 0) && (tmp_int <= 100)) {
                htt_percent = tmp_int;
            }
            break;
        case 'v':
            log_level = LOG_INFO;
            break;
        case 'V':
            print_version_and_exit(argv[0]);
            break;
        case 'w':
            w_flag = 1;
            parse_two_arg_values(optarg, &requested_cpus, &requested_mbs, 0, 2);
            break;
        case 'H':
            hugepage_size = atoi(optarg); /* in MiB */
            break;
        case 'o':
            vcpu_load_percent = atof(optarg); /* cpu overcommit ratio */
            break;
        default:
            print_usage_and_exit(argv[0]);
            break;
        }
    }
    if (argc > optind) {
        fprintf(stderr, "Unexpected arg = %s\n", argv[optind]);
        exit(EXIT_FAILURE);
    }
    if (hugepage_size > 0) {
        use_inactive_file_cache = 0;
    }
    num_cpus = get_num_cpus();
    // First, make note of any reserved CPUs....
    if (reserved_cpu_str != NULL) {
        CLEAR_CPU_LIST(reserved_cpu_mask_list_p);
        int n = add_ids_to_list_from_str(reserved_cpu_mask_list_p, reserved_cpu_str);
        char buf[BUF_SIZE];
        str_from_id_list(buf, BUF_SIZE, reserved_cpu_mask_list_p);
        numad_log(LOG_NOTICE, "Reserving %d CPUs (%s) for non-numad use\n", n, buf);
        // turn reserved list into a negated mask for later ANDing use...
        negate_cpu_list(reserved_cpu_mask_list_p);
    }
    if (w_flag) {
        // Get pre-placement NUMA advice without starting daemon
        update_nodes();
        numad_log(LOG_NOTICE, "Getting NUMA pre-placement advice for %d CPUs and %d MBs, HP %d\n", requested_cpus / ONE_HUNDRED, requested_mbs, hugepage_size);
        id_list_p node_list_p = pick_numa_nodes(-1, requested_cpus, requested_mbs, hugepage_size, 0);
        char buf[BUF_SIZE];
        str_from_id_list(buf, BUF_SIZE, node_list_p);
        fprintf(stdout, "%s\n", buf);
        exit(EXIT_SUCCESS);
    } else {
        fprintf(stderr, "Missing required -w flag\n");
        exit(EXIT_FAILURE);
    }
}
