I\'m trying to read a multi-line tab-separated file in bash. The format is such that empty fields are expected. Unfortunately, the shell is collapsing together field separat
Here's an approach with some niceties:
The code. file_data
and file_input
are just for generating input as though from a external command called from the script. data
and cols
could be parameterized for the get
and put
calls, etc, but this script doesn't go that far.
#!/bin/bash
file_data=( $'\t\t' $'\t\tbC' $'\tcB\t' $'\tdB\tdC' \
$'eA\t\t' $'fA\t\tfC' $'gA\tgB\t' $'hA\thB\thC' )
file_input () { printf '%s\n' "${file_data[@]}" ; } # simulated input file
delim=$'\t'
# the IFS=$'\n' has a side-effect of skipping blank lines; acceptable:
OIFS="$IFS" ; IFS=$'\n' ; oset="$-" ; set -f
lines=($(file_input)) # read the "file"
set -"$oset" ; IFS="$OIFS" ; unset oset # cleanup the environment mods.
# the read-in data has (rows * cols) fields, with cols as the stride:
data=()
cols=0
get () { local r=$1 c=$2 i ; (( i = cols * r + c )) ; echo "${data[$i]}" ; }
put () { local r=$1 c=$2 i ; (( i = cols * r + c )) ; data[$i]="$3" ; }
# convert the lines from input into the pseudo-2D data array:
i=0 ; row=0 ; col=0
for line in "${lines[@]}" ; do
line="$line$delim"
while [ -n "$line" ] ; do
case "$line" in
*${delim}*) data[$i]="${line%%${delim}*}" ; line="${line#*${delim}}" ;;
*) data[$i]="${line}" ; line= ;;
esac
(( ++i ))
done
[ 0 = "$cols" ] && (( cols = i ))
done
rows=${#lines[@]}
# output the data array as a matrix, using the get accessor
for (( row=0 ; row < rows ; ++row )) ; do
printf 'row %2d: ' $row
for (( col=0 ; col < cols ; ++col )) ; do
printf '%5s ' "$(get $row $col)"
done
printf '\n'
done
Output:
$ ./tabtest
row 0:
row 1: bC
row 2: cB
row 3: dB dC
row 4: eA
row 5: fA fC
row 6: gA gB
row 7: hA hB hC