# AWK script to process a CSV file, ensuring consistent headers and handling missing values

BEGIN {
    # Set input and output field separators to comma
    FS = ","
    OFS = ","
}

END {
    # Process the file after reading all lines

    # Step 1: Collect all unique headers from odd-numbered rows
    r = 1
    while (getline < ARGV[1]) {
        if (r % 2 == 1) {
            # Append each field to the array of all headers
            for (i = 1; i <= NF; i++) {
                all_headers[all_headers_index++] = $i
            }
        }
        r++
    }
    close(ARGV[1])

    # Step 2: Remove duplicate headers and store them in 'unique' array
    for (i = 1; i in all_headers; i++) {
        if (!seen[all_headers[i]]++) {
            unique[++j] = all_headers[i]
        }
    }

    # Step 3: Print out the unique headers followed by a newline
    for (u in unique) {
        printf "%s,", unique[u]
    }
    printf "\n"

    # Step 4: Process each pair of rows to ensure consistent headers and handle missing values
    r = 1
    while (getline < ARGV[1]) {
        if (r % 2 == 1) {
            # Store headers from odd-numbered rows
            for (i = 1; i <= NF; i++) {
                old_header[i] = $i
            }
        }
        if (r % 2 == 0) {
            # Print values corresponding to unique headers or "NaN" if a value is missing
            for (u in unique) {
                found = 0
                for (h in old_header) {
                    if (unique[u] == old_header[h]) {
                        if (length($h) == 0) {
                            printf "NaN,"
                        } else {
                            printf "%s,", $h
                        }
                        found = 1
                    }
                }
                if (found == 0) {
                    printf "NaN,"
                }
            }
            printf "\n"
        }
        r++
    }
    close(ARGV[1])
}