#!/usr/bin/perl use SOAP::Lite; #connect to the SubLoc SOAP server via the WSDL file $wsdl = 'http://www.bioinfo.tsinghua.edu.cn/~tigerchen/SubLoc.wsdl'; $serv = SOAP::Lite->service($wsdl); $matched = 0; ################################################################### ########### id_search by SWISSPROT ID $res = $serv -> id_search("SWISSPROT","O03978"); # get the number of matched entries: $#$res $matched = $#$res+1; print "$matched matched entries are found!\n"; # get all entries' ID SQ LC CX DE OS # SQ=sequence, LC=location, CX=cross database id, DE=description, OS=organism for(my $index=0;$index<=$#$res;$index++){ print $res->[$index]->{ID}; print $res->[$index]->{SQ}; print $res->[$index]->{DE}; print $res->[$index]->{LC}; print $res->[$index]->{OS}; print $res->[$index]->{CX}; } ########### id_search by DBSubLoc ID $res = $serv -> id_search("DBSubLoc","10043258"); # get the number of matched entries: $#$res $matched = $#$res+1; print "$matched matched entries are found!\n"; # get all entries' ID SQ LC CX DE OS # SQ=sequence, LC=location, CX=cross database id, DE=description, OS=organism for(my $index=0;$index<=$#$res;$index++){ print $res->[$index]->{ID}; print $res->[$index]->{SQ}; print $res->[$index]->{DE}; print $res->[$index]->{LC}; print $res->[$index]->{OS}; print $res->[$index]->{CX}; } ########### id_search by GO ID # Caution! Search by GO ID may return many many entries, please be patient, thanks $res = $serv -> id_search("GO","0005739"); # get the number of matched entries: $#$res $matched = $#$res+1; print "$matched matched entries are found!\n"; # get all entries' ID SQ LC CX DE OS # SQ=sequence, LC=location, CX=cross database id, DE=description, OS=organism for(my $index=0;$index<=$#$res;$index++){ print $res->[$index]->{ID}; print $res->[$index]->{SQ}; print $res->[$index]->{DE}; print $res->[$index]->{LC}; print $res->[$index]->{OS}; print $res->[$index]->{CX}; } ####################################################################### ############### name_search $res=$serv -> name_search("p53"); # get the number of matched entries: $#$res $matched = $#$res +1; print "$matched matched entries are found!\n"; # get all entries' ID SQ LC CX DE OS # SQ=sequence, LC=location, CX=cross database id, DE=description, OS=organism for(my $index=0;$index<=$#$res;$index++){ print $res->[$index]->{ID}; print $res->[$index]->{SQ}; print $res->[$index]->{DE}; print $res->[$index]->{LC}; print $res->[$index]->{OS}; print $res->[$index]->{CX}; } ########################################################################## ############### blast_search one sequence $seq="TKRFFNKNNRLNKGYAKTFSINEPDNNFYRKKFEHILPPVDLISEYESIYPGTLQELMHMAQKEQAHKHAIDLKNLKIQERIAKLTRICLLIFGIGLVVLIFLKLLK"; $res = $serv -> blast_search("all","full",$seq,1); # we provide two kinds of output formats: original BLAST output and parsed results by BioPerl's BPlite, users could choose one they like. # get the original output of BLAST program, you could use BioPerl to parse it quite easily. There are many example scripts on the Internet. print $res->{plain}; # or you can get the parsed result (using BioPerl's BPlite) # the parsed output's fields, every field is separated by a TAB. #HIT_NUM SCORE BITS PERCENT EXPECT SUBJECT_NAME IDENTITIES MATCH_LENGTH QUERY_START QUERY_END SUBJECT_START SUBJECT_END print $res->{parsed}; # the parsed output's fields #HIT_NUM SCORE BITS PERCENT EXPECT SUBJECT_NAME IDENTITIES MATCH_LENGTH QUERY_START QUERY_END SUBJECT_START SUBJECT_END ################################################################### ############### SubLoc prediction by SVM # for more details about the method, please see: #Hua, S. and Sun, Z. (2001). Support vector machine approach for protein subcellular localization prediction. Bioinformatics 17, 721-8. # for prokaryotic organisms $seq="TKRFFNKNNRLNKGYAKTFSINEPDNNFYRKKFEHILPPVDLISEYESIYPGTLQELMHMAQKEQAHKHAIDLKNLKIQERIAKLTRICLLIFGIGLVVLIFLKLLK"; $pre= $serv -> pro_predict($seq); # get sequence print $pre->{Seq}."\n"; # get predicted location print $pre->{Prediction}."\n"; # get expected accuracy print $pre->{ExpectAcc}."\n"; # get reliability index print $pre->{RI}."\n"; # for eukaryotic organisms $pre= $serv -> eu_predict($seq); # get sequence print $pre->{Seq}."\n"; # get predicted location print $pre->{Prediction}."\n"; # get expected accuracy print $pre->{ExpectAcc}."\n"; # get reliability index print $pre->{RI}."\n"; ###################################################################### ################# SubLoc prediction by PSORT # for more details about PSORT, please refer: # Nakai, K. and Horton, P. (1999). PSORT: a program for detecting sorting signals in proteins and # predicting their subcellular localization. Trends Biochem Sci 24, 34-6. $pre= $serv -> psort_predict($seq); # get predicted location print $pre->{Prediction}."\n"; # get all output information of PSORT print $pre->{Detail}."\n"; ######################################################################## ################## Submit new entries by users $ID="test"; $DE="pseudo-entry for test"; $OS="human"; $LC="cytoplasmic"; $CX="unknown"; $SQ="XXXXXXXXXXXXXXXXXX"; $feed = $serv -> feed_entry($ID,$DE,$OS,$LC,$CX,$SQ); print $feed; ## if you have a fasta file containing many sequences, you may use following code: ## need BioPerl, if not installed, please visit www.bioperl.org to download and install it. use Bio::SeqIO; $filename='/path/to/your/fasta/file'; my $fasta = Bio::SeqIO -> new('-fh'=>$filename,'-format'=>'fasta'); while (my $seq = $fasta->next_seq){ # do what you want to do, for example: $pre=$serv -> psort_predict($seq); # ....... $blast=$serv-> blast_search('all','full',$seq,1); # ....... }