#!/usr/bin/python import os, csv, sys from GotermSummarization_PV_AllGenesInAssociationFile_quick import * #Data of Go Ontology structure and gene_Goterm association weightGographData = 'newWeightedPubMedGO.xml' geneGotermAssociationData = 'gene_association.goa_human_2012' #Create a GoGraph object (Node: every time you use the gotermSummarization(), you need to create a new object) G = GoGraph(weightGographData,geneGotermAssociationData) #A list of genes need to be summarized GeneList = ['CLEC5A','COL11A1','CRABP2','CXCL10','DCC1','DDAH2','DEFB1','DEPDC1','DKFZp762E1312','DLG7','DNA2L','DSC2','DSC3','ECT2','ENC1','EXOSC5','EYA4','EZH2','FABP5','FAM64A','FGF9','FLJ21963','GFOD1','GGH','GINS1','GLDC','GOLGA8A','GOLT1B','HMMR','HRASLS','HS3ST3A1','IGFBP2','ING4','INHBB','ISG15','ITGB8','KIAA1199','KIF11','KIF14','KIF20A','KIF23','KIF2C','KIFC1','KLK7','KPNA2','KRT6A','LMNB1','MAL','MELK','MFAP2','MGAT4A','MKI67','MRS2L','MUC16','MYO10','NCAPD2','NDC80','NEK2','NETO2','NID2','NLRP2','NMU','NRAS','NRCAM','NUSAP1','OGDHL'] #Using Go term to summarize the list of gene. Result = G.gotermSummarization(GeneList,0.05,3) #0.05 is the threshold of P_Value of the Go term node in final result. #5 is the minimum number of genes in the Go term node in final result. #The result has the format: [value_0,{Goterm_1:[a list of genes_1,value_1],Goterm_2:[a list of genes_2,value_2],....}] # value_0: the total information lost for the summarization. # Goterm_1: Goterm ID that is used to summarize the given list of gene. # a list of genes_1: a subset of genes (in the given list of gene) that are annotated by Goterm_1. # value_1: the level of Goterm_1 on the Go Ontology. The root note is in level 1. #print the result print 'Total information lost is', Result[0] for goterm in Result[1]: print 'GO term ID:', goterm, '--------------' print 'GO term level:',Result[1][goterm][1] print 'Gene list:',Result[1][goterm][0],'\n'