//
// By: Zoran Obradovic (zoo@cs.uchicago.edu)
//

import java.io.*;
import java.util.*;


public class HomoloGene
{

	//
	// CLASS BEHAVIORS
	//
	public List parse (String theFilename, int theMin)
	throws Exception
	{
		BufferedReader reader = new BufferedReader (new FileReader (theFilename));
		List groups = new LinkedList();

		String line = reader.readLine();		// the first ">"
		while (reader.ready())
		{
			Group group = new Group();
			while (true)
			{
				line = reader.readLine();
				if (line == null || line.length() == 0 || line.startsWith(START)) break;

				// if its a title, add it
				if (line.startsWith(TITLE)) group.titles.add(line);

				// otherwise, its a record, parse it and add it
				else group.orthologs.add(parseRecord(line));
			}

			// how many humans do we have here?
			calculateHumanCount (group, HUMAN);


			// if (group.orthologs.size() >= theMin)
			if (group.humanCount >= theMin)
			{
				if (groupContains(group, HUMAN, PIG) &&
					groupContains(group, HUMAN, COW) &&
					groupContains(group, HUMAN, MOUSE) &&
					groupContains(group, HUMAN, FROG) &&
					groupContains(group, HUMAN, RAT) &&
					groupContains(group, HUMAN, TROUT) &&
					groupContains(group, HUMAN, ZEBRAFISH) &&
					groupContains(group, HUMAN, FLY) &&
					groupContains(group, HUMAN, C_ELEGANS) &&
					groupContains(group, HUMAN, FLOWER) &&
					groupContains(group, HUMAN, SOYBEAN) &&
					groupContains(group, HUMAN, YEAST)
					)
					{
						// print(group);
						sort(group);
						assignComments (group);

						Ortholog ortho = getOrtholog(group, HUMAN, PIG);

						System.out.println ("-----------------------------------------------------");
						System.out.println (group.titleSymbol + SEP + group.title);

						HTML.write(TAB + TR + NEWLINE);
						HTML.write (TAB + TAB + TD + "<a href=\"" + getLink(ortho.accessionA) + "\">" + ortho.accessionA + "</a>" + TDX + NEWLINE);
						writeHTML(group, PIG);
						writeHTML(group, COW);
						writeHTML(group, MOUSE);
						writeHTML(group, FROG);
						writeHTML(group, RAT);
						writeHTML(group, TROUT);
						writeHTML(group, ZEBRAFISH);
						writeHTML(group, FLY);
						writeHTML(group, C_ELEGANS);
						writeHTML(group, FLOWER);
						writeHTML(group, SOYBEAN);
						writeHTML(group, YEAST);
						HTML.write(TAB + TRX + NEWLINE);
					}
			}

			if (line == null || line.length() == 0)
			{
				break;
			}

		}

		reader.close();
		return groups;
	}


	public String getLink (String accession)
	{
		return "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Search&db=nucleotide&dopt=GenBank&doptcmdl=Detailed&term=" + accession;
	}


	public void writeHTML (Group group, String species)
	throws IOException
	{
		Ortholog ortho = getOrtholog (group, HUMAN, species);
		HTML.write (TAB + TAB + TD + "<a href=\"" + getLink(ortho.accessionB) + "\">" + ortho.accessionB + "</a>" + SPACE + LPAREN + ortho.ten + RPAREN + TDX + NEWLINE);


		System.out.print (getOrganismName(ortho.organismA) + SEP);
		System.out.print (getOrganismName(ortho.organismB) + SEP);
		System.out.print (ortho.geneIdA + SEP);
		System.out.print (ortho.geneIdB + SEP);
		System.out.print (ortho.accessionA + SEP);
		System.out.print (ortho.accessionB + SEP);
		System.out.print (ortho.locusA + SEP);
		System.out.print (ortho.locusB + SEP);
		System.out.print (ortho.ten + SEP);
		System.out.print (ortho.titleSymbol + SEP);
		System.out.print (ortho.title);
		System.out.println();
	}



	private Ortholog getOrtholog (Group group, String a, String b)
	{
		Ortholog temp = null;
		for (Iterator i = group.orthologs.iterator(); i.hasNext(); )
		{
			temp = (Ortholog) i.next();

			if (temp.organismA.equalsIgnoreCase(a) && temp.organismB.equalsIgnoreCase(b)) 	return temp;
		}

		return null;
	}



	private boolean groupContains (Group group, String a, String b)
	{
		Ortholog temp = null;
		for (Iterator i = group.orthologs.iterator(); i.hasNext(); )
		{
			temp = (Ortholog) i.next();

			if (temp.organismA.equals(a) && temp.organismB.equals(b)) 	return true;
			if (temp.organismB.equals(a) && temp.organismA.equals(b))	return true;
		}
		return false;
	}



	private void calculateHumanCount (Group theGroup, String theSpecies)
	{
		Ortholog temp = null;
		for (Iterator i = theGroup.orthologs.iterator(); i.hasNext(); )
		{
			 temp = (Ortholog) i.next();

			 if (temp.organismA.equals(theSpecies) || temp.organismB.equals(theSpecies))
			 {
				 theGroup.humanCount++;
			 }
		}
	}




	public void assignComments (Group theGroup)
	{
		// make title objects out of everything
		List titles = new LinkedList();
		String temp = null;
		for (Iterator i = theGroup.titles.iterator(); i.hasNext(); )
		{
			temp = (String) i.next();
			Title title = parseTitle (temp);
			titles.add(title);
		}

		// save these parsed titles in the group object
		theGroup.parsedTitles = titles;

		// set the title for this group
		Ortholog ortho = getFirstHumanOrtholog(theGroup);
		Title orthoTitle = getTitle(theGroup.parsedTitles, HUMAN, ortho.geneIdA);
		theGroup.titleSymbol =  orthoTitle.titleSymbol;
		theGroup.title =		orthoTitle.title;


		// iterate over all the orthologs
		Ortholog ortholog = null;
		for (Iterator i = theGroup.orthologs.iterator(); i.hasNext(); )
		{
			ortholog = (Ortholog) i.next();

			// get the proper title
			Title title = getTitle (theGroup.parsedTitles, ortholog.organismB, ortholog.geneIdB);

			ortholog.titleSymbol = title.titleSymbol;
			ortholog.title = title.title;
		}
	}


	public Ortholog getFirstHumanOrtholog (Group theGroup)
	{
		Ortholog temp = null;
		for (Iterator i = theGroup.orthologs.iterator(); i.hasNext(); )
		{
			temp = (Ortholog) i.next();
			if (temp.organismA.equals(HUMAN)) return temp;
		}

		return null;
	}
	public Title getTitle (List theList, String theOrganism, String theId)
	{
		// System.out.println("titles size: " + theList.size() + " " + theOrganism + " " + theId + "...");
		Title temp = null;
		for (Iterator i = theList.iterator(); i.hasNext(); )
		{
			temp = (Title) i.next();

			// System.out.println("org: " + temp.organism + "...");
			if (temp.organism.equals(theOrganism))
			{
				// System.out.println("  id: " + temp.id + "...");
				if (temp.id.equals(theId))
				{
					// System.out.println ("returning " + temp);
					return temp;
				}
			}
		}

		// not found
		// System.out.println("returning NULL");
		return null;
	}


	public Title parseTitle (String x)
	{
		Title ret = new Title();

		x = x.substring(TITLE.length()).trim();

		// split it into two halves, along the "=" sign
		StringTokenizer tokenizer = new StringTokenizer(x, "=");
		String half1 = tokenizer.nextToken();
		String half2 = tokenizer.nextToken();


		// split the first half, along the underbar
		tokenizer = new StringTokenizer(half1, "_");
		ret.organism = tokenizer.nextToken();
		ret.id = tokenizer.nextToken();

		// split the second half, along the first space
		tokenizer = new StringTokenizer(half2, "\t ");
		ret.titleSymbol = tokenizer.nextToken();
		ret.title = half2.substring(ret.titleSymbol.length()).trim();

		return ret;
	}








	// this sorts the orthlogs of the group
	public void sort (Group theGroup)
	{
		Object [] array = theGroup.orthologs.toArray();
		Arrays.sort(array);
		theGroup.orthologs = Arrays.asList(array);
	}




	public void print (Group theGroup)
	{
		System.out.println ("-----------------------------------------------------");

		// sort this thing
		sort(theGroup);

		Ortholog ortholog = null;
		for (Iterator i = theGroup.orthologs.iterator(); i.hasNext(); )
		{
			ortholog = (Ortholog) i.next();

			if (isHuman(ortholog))
			{
				// System.out.print (ortholog.matchType + SEP);
				System.out.print (getOrganismName(ortholog.organismA) + SEP);
				System.out.print (getOrganismName(ortholog.organismB) + SEP);
				System.out.print (ortholog.geneIdA + SEP);
				System.out.print (ortholog.geneIdB + SEP);
				System.out.print (ortholog.accessionA + SEP);
				System.out.print (ortholog.accessionB + SEP);
				System.out.print (ortholog.locusA + SEP);
				System.out.print (ortholog.locusB + SEP);
				System.out.print (ortholog.ten);
				System.out.println();
			}
		}

		for (Iterator i = theGroup.titles.iterator(); i.hasNext(); )
		{
			System.out.println (i.next());
		}
	}



	public boolean isHuman (Ortholog x)
	{
		if (x.organismA.equals(HUMAN) || x.organismB.equals(HUMAN))
		{
			return true;
		}

		return false;
	}



	// switches B to A, if B is human...
	public void switcheroo (Ortholog x)
	{
		if (x.organismB.equals(HUMAN))
		{
			String temp = null;

			temp = x.organismA;
			x.organismA = x.organismB;
			x.organismB = temp;

			temp = x.geneIdA;
			x.geneIdA = x.geneIdB;
			x.geneIdB = temp;

			temp = x.accessionA;
			x.accessionA = x.accessionB;
			x.accessionB = temp;

			temp = x.locusA;
			x.locusA = x.locusB;
			x.locusB = temp;
		}
	}



	public Ortholog parseRecord (String x)
	{
		StringTokenizer tokenizer = new StringTokenizer (x, SEP);
		Ortholog ret = new Ortholog();

		ret.organismA	= tokenizer.nextToken().trim();
		ret.organismB	= tokenizer.nextToken().trim();
		ret.matchType	= tokenizer.nextToken().trim();
		ret.locusA		= tokenizer.nextToken().trim();
		ret.geneIdA		= tokenizer.nextToken().trim();
		ret.accessionA	= tokenizer.nextToken().trim();
		ret.locusB		= tokenizer.nextToken().trim();
		ret.geneIdB		= tokenizer.nextToken().trim();
		ret.accessionB	= tokenizer.nextToken().trim();
		ret.ten			= tokenizer.nextToken().trim();

		// switch it so the human is always first
		switcheroo (ret);

		return ret;
	}






	// returns the name of an organism
	public static String getOrganismName (String x)
	{
		Object ret = myOrganisms.get(x);
		if (ret == null) ret = "x" + x;

		return (String) ret;
	}

	public String getOrganismNumber (String name)
	{
		String temp = null;
		for (Iterator i = myOrganisms.keySet().iterator(); i.hasNext(); )
		{
			temp = (String) i.next();
			if (myOrganisms.get(temp).equals(name)) return temp;
		}

		// nothing was found
		return null;
	}



	//
	// CLASS CONSTANTS
	//
	private static final String NEWLINE	= "\n";
	private static final String TAB 	= "\t";
	public static final String	SPACE	= " ";
	public static final String	LPAREN	= "(";
	public static final String	RPAREN	= ")";
	public static final String 	START	= ">";
	public static final String 	SEP 	= "|";
	public static final String	TITLE	= "TITLE";
	public static final int		BEST_SIZE	= 16;

	public static final String TD 		= "<td>";
	public static final String TDX		= "</td>";
	public static final String TR 		= "<tr>";
	public static final String TRX		= "</tr>";

	public static final String HUMAN	= "9606";
	public static final String PIG		= "9823";
	public static final String MOUSE	= "10090";
	public static final String FLY		= "7227";
	public static final String ZEBRAFISH= "7955";
	public static final String C_ELEGANS= "6239";
	public static final String COW		= "9913";
	public static final String FLOWER	= "3702";
	public static final String RAT		= "10116";
	public static final String MOSQUITO	= "7165";
	public static final String TROUT	= "8022";
	public static final String SOYBEAN	= "3847";
	public static final String YEAST	= "4932";
	public static final String ORYZA	= "4530";
	public static final String TOMATO	= "4081";
	public static final String BARLEY	= "4513";
	public static final String ALFALFA	= "3880";
	public static final String WHEAT	= "4565";
	public static final String FROG		= "8355";
	public static final String MALARIA	= "5833";
	public static final String SQUIRT	= "7719";
	public static final String MAIZE	= "4577";



	//
	// CLASS OBJECTS
	//
	private static FileWriter HTML;
	private static HashMap	myOrganisms;

	// CLASS INITIALIZER
	static
	{
		try
		{
			myOrganisms = new HashMap();

			myOrganisms.put(FLOWER,		"Flower");				// Arabidopsis thaliana
			myOrganisms.put(COW,		"Cow");					// Bos taurus
			myOrganisms.put(C_ELEGANS,	"C.elegans");
			myOrganisms.put(ZEBRAFISH,	"Fish");				// Danio rerio (zebrafish)
			myOrganisms.put(FLY,		"Fly");					// Drosophilia melanogaster
			myOrganisms.put(HUMAN,		"Human");				// homo sapiens
			myOrganisms.put(BARLEY,		"Barley");				// Hordeum vulgare (barley)
			myOrganisms.put(TOMATO,		"Tomato");				// Lycopersicon esculentum  (tomato)
			myOrganisms.put(ALFALFA,	"Alfalfa");				// Medicago truncatula
			myOrganisms.put(MOUSE,		"Mouse");				// Mus musculus
			myOrganisms.put(ORYZA,		"Oryza");				// Oryza sativa
			myOrganisms.put(RAT,		"Rat");					// Rattus norvegicus
			myOrganisms.put(PIG,		"Pig");					// Sus scrofa
			myOrganisms.put(WHEAT,		"Wheat");				// Triticum aestivum
			myOrganisms.put(FROG,		"Frog");				// Xenopus laevis
			myOrganisms.put(MAIZE,		"Corn");				// Zea mays (Indian Maize)
			myOrganisms.put(MOSQUITO,	"Mosquito");			// african malaria mosquito  (a. gambiae)
			myOrganisms.put("4896",		"fission yeast");		// Schizosaccharomyces pombe (fission yeast)
			myOrganisms.put(TROUT,		"Trout");				// Oncorhynchus mykiss (rainbow trout)
			myOrganisms.put(SQUIRT,		"SeaSquirt");			// Ciona intestinalis
			myOrganisms.put(MALARIA,	"Plasmodium falciparum");	// Plasmodium falciparum (malaria parasite)
			myOrganisms.put(SOYBEAN,	"soybean");				// Glycine max (soybean)
			myOrganisms.put(YEAST,		"bakers yeast");		// Saccharomyces cerevisiae

		}
		catch (Throwable e)
		{
			e.printStackTrace();
			System.out.println ("CAUGHT: " + e);
			System.exit(-1);
		}
	}




	//
	// INNER CLASS
	//
	public class Ortholog
	implements Comparable
	{
		public String	organismA;
		public String	organismB;
		public String	matchType;
		public String	locusA;
		public String 	geneIdA;
		public String	accessionA;
		public String	locusB;
		public String 	geneIdB;
		public String	accessionB;
		public String	ten;

		public String 	titleSymbol;
		public String	title;


		public int compareTo (Object o)
		{
			Ortholog b = (Ortholog) o;

			int ret = this.organismA.compareTo(b.organismA);
			if (ret != 0) return ret;

			ret = this.organismB.compareTo(b.organismB);
			if (ret != 0) return ret;

			ret = this.ten.compareTo(b.ten);
			return ret;
		}

		public boolean equals (Object o)
		{
			Ortholog b = (Ortholog) o;

			if (compareTo(b) == 0) return true;
			return false;
		}
	}


	//
	// INNER CLASS
	//
	public class Group
	{
		// yep, let there be public data :(
		public List		orthologs;
		public List		titles;
		public List		parsedTitles;

		public int		humanCount;		// counts the number of human gene in the group

		public String	titleSymbol;
		public String	title;

		public Group ()
		{
			orthologs	= new LinkedList();
			titles		= new LinkedList();
		}
	}



	//
	// INNER CLASS
	//
	public class Title
	{
		public String 	organism;
		public String	id;
		public String 	titleSymbol;
		public String 	title;
	}



	//
	// BOOKKEEPERS
	//
	public HomoloGene ()
	{
		myBest 	= new LinkedList();
	}




	//
	// private members
	//
	private List	myBest;



	//
	// main
	//
	public static void main (String [] args)
	{
		try
		{
			HTML = new FileWriter ("html.txt");
			String filename = "hmlg.trip.ftp.txt";

			HomoloGene foo = new HomoloGene();
			List groups = foo.parse(filename, Integer.parseInt(args[0]));
			HTML.close();
		}
		catch (Throwable e)
		{
			e.printStackTrace();
			System.out.println ("CAUGHT: " + e.getClass().getName() + " " + e.getMessage());
		}
	}
}
