#!/usr/bin/perl
# AMPS to OC
# AMPS pairwse output parser to OC input format
# Created 30/01/03 by MajicPhatCalves (a.k.a Greg)

use strict;
use warnings;
system ("clear");

my ($infile, $outfile, $field) = @ARGV;
my ($count, @names, @scores);

############################# INFILE PROCESSING ###############################
unless ((defined $infile) && (defined $outfile) && (defined $field)){
	warn "###### AMPS2OC ######\n";
	warn "Greg Machray 24/02/2003\n\n";
	warn "Command line not complete\n";
	warn "Correct usage is: amps2oc [infile_name] [outfile_name] [score field no.(1-5)]\n";
	print "1=\%ID 2=NAS 3=NASAL 4=SD 5=score\n\n\n";
	exit;
	}

open (IN, $infile)|| die "Input file not found, or not accessible: $!\n";

# 1,2,3 on command line are fields 9,10,11
if ($field < 4){$field += 8;}

# 4,5 are fields 14 an 15	
else {	$field += 10;}		

foreach my $line (<IN>){

	# If line has the identifiers in it, rip them out
	if ($line =~ m/>\s*?(\w.*?)\s/){
		push @names, $1;
		}
	# if line is part of the scores list; split into elements:
	# [9] is percentage identity
	# [10] for NAS 
	# [11] for NASAL
	# [14] for SD
	# [15] for score
	elsif ($line =~ m/^\s+\d+\s+\d+/){
		my @parts = split /\s+/, $line;
		push @scores, $parts[$field];
		}
	}
close IN;

############################# OUTFILE PROCESSING ##############################		
open (OUT, ">$outfile")|| die "Output file can't be created (or added to): $!\n";

# Get total number of sequences
$count = scalar @names;

# Set output seperator to newline - nice for easy output
$" = "\n";

# Print out in the correct format
print OUT "$count\n";
print OUT "@names\n";
print OUT "@scores\n";

close OUT;

###############################################################################
exit;
