#!/usr/bin/python

import os, csv, sys
from GotermSummarization_PV_AllGenesInAssociationFile_quick import *

#Data of Go Ontology structure and gene_Goterm association
weightGographData = 'newWeightedPubMedGO.xml'
geneGotermAssociationData = 'gene_association.goa_human_2012'


#Create a GoGraph object (Node: every time you use the gotermSummarization(), you need to create a new object)
G = GoGraph(weightGographData,geneGotermAssociationData)

#A list of genes need to be summarized
GeneList = ['CLEC5A','COL11A1','CRABP2','CXCL10','DCC1','DDAH2','DEFB1','DEPDC1','DKFZp762E1312','DLG7','DNA2L','DSC2','DSC3','ECT2','ENC1','EXOSC5','EYA4','EZH2','FABP5','FAM64A','FGF9','FLJ21963','GFOD1','GGH','GINS1','GLDC','GOLGA8A','GOLT1B','HMMR','HRASLS','HS3ST3A1','IGFBP2','ING4','INHBB','ISG15','ITGB8','KIAA1199','KIF11','KIF14','KIF20A','KIF23','KIF2C','KIFC1','KLK7','KPNA2','KRT6A','LMNB1','MAL','MELK','MFAP2','MGAT4A','MKI67','MRS2L','MUC16','MYO10','NCAPD2','NDC80','NEK2','NETO2','NID2','NLRP2','NMU','NRAS','NRCAM','NUSAP1','OGDHL']


#Using Go term to summarize the list of gene.
Result =  G.gotermSummarization(GeneList,0.05,3)

#0.05 is the threshold of P_Value of the Go term node in final result.
#5 is the minimum number of genes in the Go term node in final result.
#The result has the format: [value_0,{Goterm_1:[a list of genes_1,value_1],Goterm_2:[a list of genes_2,value_2],....}]
# value_0: the total information lost for the summarization.
# Goterm_1: Goterm ID that is used to summarize the given list of gene.
# a list of genes_1: a subset of genes (in the given list of gene) that are annotated by Goterm_1.
# value_1: the level of Goterm_1 on the Go Ontology. The root note is in level 1.



#print the result
print 'Total information lost is', Result[0]
for goterm in Result[1]:
	print 'GO term ID:', goterm, '--------------'
	print 'GO term level:',Result[1][goterm][1]
	print 'Gene list:',Result[1][goterm][0],'\n'