#!/usr/bin/env python from SOAPpy import WSDL #connect to the SubLoc SOAP server via the WSDL file wsdl = 'http://www.bioinfo.tsinghua.edu.cn/~tigerchen/SubLoc.wsdl' #wsdl='SubLoc.wsdl' serv = WSDL.Proxy(wsdl) #################### id_search # by SWISSPROT ID res = serv.id_search("SWISSPROT","O03978") # get the number of matched entries print len(res) # get all entries' ID SQ LC CX DE OS # SQ=sequence, LC=location, CX=cross database id, DE=description, OS=organism for i in range(len(res)): print res[i].ID print res[i].SQ print res[i].DE print res[i].LC print res[i].OS print res[i].CX # by DBSubLoc ID res = serv.id_search("DBSubLoc","10043258") # get the number of matched entries print len(res) # get all entries' ID SQ LC CX DE OS # SQ=sequence, LC=location, CX=cross database id, DE=description, OS=organism for i in range(len(res)): print res[i].ID print res[i].SQ print res[i].DE print res[i].LC print res[i].OS print res[i].CX # by GO ID # Caution! Search by GO ID may return many many entries, please be patient, thanks res = serv.id_search("GO","0005739") # get the number of matched entries print len(res) # get all entries' ID SQ LC CX DE OS # SQ=sequence, LC=location, CX=cross database id, DE=description, OS=organism for i in range(len(res)): print res[i].ID print res[i].SQ print res[i].DE print res[i].LC print res[i].OS print res[i].CX ############################ name_search res=serv.name_search("p53") # get the number of matched entries print len(res) # get all entries' ID SQ LC CX DE OS # SQ=sequence, LC=location, CX=cross database id, DE=description, OS=organism for i in range(len(res)): print res[i].ID print res[i].SQ print res[i].DE print res[i].LC print res[i].OS print res[i].CX ############################# blast_search seq = "TKRFFNKNNRLNKGYAKTFSINEPDNNFYRKKFEHILPPVDLISEYESIYPGTLQELMHMAQKEQAHKHAIDLKNLKIQERIAKLTRICLLIFGIGLVVLIFLKLLK" #seq = "TKRFFNKNNRLNKGY" #res = serv.blast_search("all","full",seq,"1.0") res = serv.blast_search("all","full",seq,"1") # we provide two kinds of output formats: original BLAST output and parsed results by BioPerl's BPlite, users could choose one they like. # get the original output of BLAST program, you could use BioPython to parse it quite easily. There are many example scripts on the Internet. print res.plain # or you can get the parsed result (using BioPerl's BPlite) # the parsed output's fields, every field is separated by a TAB #HIT_NUM SCORE BITS PERCENT EXPECT SUBJECT_NAME IDENTITIES MATCH_LENGTH QUERY_START QUERY_END SUBJECT_START SUBJECT_END print res.parsed ################################################################### ############### SubLoc prediction by SVM # for more details about the method, please see: #Hua, S. and Sun, Z. (2001). Support vector machine approach for protein subcellular localization prediction. Bioinformatics 17, 721-8. # for prokaryotic organisms seq = "TKRFFNKNNRLNKGYAKTFSINEPDNNFYRKKFEHILPPVDLISEYESIYPGTLQELMHMAQKEQAHKHAIDLKNLKIQERIAKLTRICLLIFGIGLVVLIFLKLLK" pre = serv.pro_predict(seq) # get predicted location print pre.Prediction print "\n" # get expected accuracy print pre.ExpectAcc print "\n" # get reliability index print pre.RI print "\n" # for eukaryotic organisms pre = serv.eu_predict(seq) # get predicted location print pre.Prediction print "\n" # get expected accuracy print pre.ExpectAcc print "\n" # get reliability index print pre.RI print "\n" ###################################################################### ################# SubLoc prediction by PSORT # for more details about PSORT, please refer: # Nakai, K. and Horton, P. (1999). PSORT: a program for detecting sorting signals in proteins and # predicting their subcellular localization. Trends Biochem Sci 24, 34-6. pre = serv.psort_predict(seq) # get predicted location print pre.Prediction; print "\n"; # get all output information of PSORT print pre.Detail print "\n"; ######################################################################## ################## Submit new entries by users ID="test" DE="pseudo-entry for test" OS="human" LC="cytoplasmic" CX="unknown" SQ="XXXXXXXXXXXXXXXXXX" feed = serv.feed_entry(ID,DE,OS,LC,CX,SQ) print feed ## if you have a fasta file containing many sequences, you may use following code: ## need BioPython, if not installed, please visit www.biopython.org to download and install it. #from Bio.SeqIO import FASTA #import sys #filename = '/your/path/to/fasta/file' #handle = open(filename) #it = FASTA.FastaReader(handle) #seq = it.next() #while seq: # print seq.name # print seq.seq # seq = it.next() #handle.close()