it's going to be hard to write a 'generic' script to handle any number of column as you have 'embedded spaces in your columns: 'New Jersey'
It would be easier to make your DB extraction to use different 'column separators' and use quotes around field values - more or less like CSV.
Here's an attemp to deal with your sample file with the embedded spaces.
The variable 'FIELDWIDTHS' contains the width of your three fields.
If this logic works for you, you can make variable 'FIELDWIDTHS' be a parameter to the script specifying different file widths that you want to parse.
for this code below - voot.awk:
nawk -f voot.awk file2format
Code:
function setFieldsByWidth( i,n,FWS,start,copyd0) {
# Licensed under GPL Peter S Tillier, 2003
# NB corrupts $0
copyd0 = $0 # make copy of $0 to work on
if (length(FIELDWIDTHS) == 0) {
print "You need to set the width of the fields that you require" > "/dev/stderr"
print "in the variable FIELDWIDTHS (NB: Upper case!)" > "/dev/stderr"
exit(1)
}
if (!match(FIELDWIDTHS,/^[0-9 ]+$/)) {
print "The variable FIELDWIDTHS must contain digits, separated" > "/dev/stderr"
print "by spaces." > "/dev/stderr"
exit(1)
}
n = split(FIELDWIDTHS,FWS)
if (n == 1) {
print "Warning: FIELDWIDTHS contains only one field width." > "/dev/stderr"
print "Attempting to continue." > "/dev/stderr"
}
start = 1
for (i=1; i <= n; i++) {
$i = trim(substr(copyd0,start,FWS[i]))
start = start + FWS[i]
}
return n;
}
# Note that the "/dev/stderr" entries in some lines have wrapped.
#
# I then call setFieldsByWidth() in my main awk code as follows:
function trim(str)
{
sub("^[ ]*", "", str);
sub("[ ]*$", "", str);
return str;
}
BEGIN {
#FIELDWIDTHS="7 6 5 4 3 2 1" # for example
FIELDWIDTHS="3 3 15"
OFS=","
}
!/^[ ]*$/ && FNR > 2 && !/record\(s\) selected./ {
saveDollarZero = $0 # if you want it later
numFields = setFieldsByWidth()
# now we can manipulate $0, NF and $1 .. $NF as we wish
for(i=1; i <= numFields; i++)
printf("%s%s", $i, (i != numFields) ? OFS : ORS);
}