#!/usr/bin/perl ######################################################### # usage: # merge.pl file1 file2 # merges two spreadsheets # # Example: # # file1: # # A 11 22 33 # B 44 55 # C 77 88 # D 99 # # file2: # # A aa bb # C cc dd ee # B ff gg hh ii # E jj kk # # merge file1 file2 produces the new matrix: # # A 11 22 33 aa bb NA NA # B 44 55 NA ff gg hh ii # C 77 88 NA cc dd ee NA # D 99 NA NA NA NA NA NA # # merge file2 file1 produces the new matrix: # # A aa bb NA NA 11 22 33 # C cc dd ee NA 77 88 NA # B ff gg hh ii 44 55 NA # E jj kk NA NA NA NA NA # # While this could be done more resourceful, # the approach used here aims at clarity: # one just builds two matrices, where entries can # be accessed like &m1(3,2) &m2(4,1) # then prints out a newly merged matrix. # # Oliver Knill, March, 2004 ######################################################### ######################################################### # Configuration ######################################################### # # default delimiter and string for empty entries $delimiter1 ="[ ]+"; $delimiter2 ="[ ]+"; $emptystr = "NA"; ######################################################### # Input part ######################################################### # read files $file1 = $ARGV[0]; $file2 = $ARGV[1]; open (FILE1,"<$file1") || die "Can't open $file1 $!"; open (FILE2,"<$file2") || die "Can't open $file2 $!"; @matrix1 = ; @matrix2 = ; # find number of rows in each matrix $num_of_rows1=$#matrix1+1; $num_of_rows2=$#matrix2+1; # count the maximal number of cols in first matrix $num_of_cols1=0; for ($i=0; $i<$num_of_rows1; $i++) { my $line=@matrix1[$i]; my @entries=split($delimiter1,$line); my $s=$#entries+1; if ($s>$num_of_cols1) { $num_of_cols1=$s; } } # count the maximal number of cols in second matrix $num_of_cols2=0; for ($i=0; $i<$num_of_rows2; $i++) { my $line=$matrix2[$i]; my @entries=split($delimiter2,$line); my $s=$#entries+1; if ($s>$num_of_cols2) { $num_of_cols2=$s; } } ######################################################### ## Output part ########################################################## # print the new matrix for ($i=1; $i<=$num_of_rows1; $i++) { $found_entry="no"; for ($j=1; $j<=$num_of_cols1; $j++) {print &m1($i,$j)," ";} for ($k=1; $k<=$num_of_rows2; $k++) { if (&m1($i,1) eq &m2($k,1)) { $found_entry="yes"; for ($l=2; $l<=$num_of_cols2; $l++) {print &m2($k,$l)," ";} } } if ($found_entry eq "no") { for ($l=2; $l<=$num_of_cols2; $l++) {print $emptystr," ";} } print "\n"; } ########################################################### ### Procedures ########################################################### # access matrix elements in first matrix sub m1 { my @indices=@_; my $i=@indices[0]-1; my $j=@indices[1]-1; my $line=@matrix1[$i]; my @entries=split($delimiter1,$line); my $entry=@entries[$j]; $entry =~ s/\n//; if ($entry eq "") { $entry=$emptystr; } return $entry; } # access matrix elements in second matrix sub m2 { my @indices=@_; my $i=@indices[0]-1; my $j=@indices[1]-1; my $line=@matrix2[$i]; my @entries=split($delimiter2,$line); my $entry=@entries[$j]; $entry =~ s/\n//; if ($entry eq "") { $entry=$emptystr; } return $entry; }