ensembl-hive  2.7.0
IKMC_get_data.pl
Go to the documentation of this file.
1 #!/ebi/extserv/bin/perl/bin/perl
2 # See the NOTICE file distributed with this work for additional information
3 # regarding copyright ownership.
4 #
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
8 #
9 # http://www.apache.org/licenses/LICENSE-2.0
10 #
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
16 
17 # an example script demonstrating the use of BioMart webservice
18 
19 
20 #
21 # NOTE this could have implemented in the parser itself but the data is needed
22 # for the simple features so
23 #
24 use strict;
25 use warnings;
26 
27 use LWP::UserAgent;
28 
29 
30  my $xml = (<<XXML);
31 <?xml version="1.0" encoding="UTF-8"?>
32 <!DOCTYPE Query>
33 <Query virtualSchemaName = "default" formatter = "TSV" header = "0" uniqueRows = "1" count = "" datasetConfigVersion = "0.6" >
34 
35  <Dataset name = "dcc" interface = "default" >
36  <Attribute name = "mgi_accession_id" />
37  <Attribute name = "marker_symbol" />
38  <Attribute name = "vector_available" />
39  <Attribute name = "escell_available" />
40  <Attribute name = "mouse_available" />
41  <Attribute name = "ensembl_gene_id" />
42  </Dataset>
43 </Query>
44 XXML
45 
46 open (OUT,">ensembl_ikmc_initial.txt");
47 
48 my $path="http://www.i-dcc.org/biomart/martservice?";
49 my $request = HTTP::Request->new("POST",$path,HTTP::Headers->new(),'query='.$xml."\n");
50 my $ua = LWP::UserAgent->new;
51 
52 my $response;
53 
54 $ua->request($request,
55  sub{
56  my($data, $response) = @_;
57  if ($response->is_success) {
58  print OUT "$data";
59  }
60  else {
61  warn ("Problems with the web server: ".$response->status_line);
62  }
63  },1000);
64 
65 close OUT;
66 
67 my %symbols;
68 my %ensembl_ids;
69 my %status;
70 
71 open (IN,"ensembl_ikmc_initial.txt");
72 #nb [9] is now cell_line_bg and [10] is backcross
73 while (<IN>){
74  my @line = split(/\t/,$_);
75  chop $line[5];
76  my $mgi_id = $line[0];
77  $symbols{$mgi_id}=$line[1];
78  $ensembl_ids{$mgi_id}=$line[5];
79  $status{$mgi_id} = 1 if ($status{$mgi_id} eq '');
80 
81  if ($status{$mgi_id} < 4 && $line[4] == 1){
82  $status{$mgi_id} = 4;
83  }
84  elsif ($status{$mgi_id} < 3 && $line[3] == 1){
85  $status{$mgi_id} = 3;
86  }
87  elsif ($status{$mgi_id} < 2 && $line[2] == 1){
88  $status{$mgi_id} = 2;
89  }
90 
91 }
92 close IN;
93 
94 open (OUT,">ensembl_ikmc_xref.txt");
95 foreach my $mgi_id(keys %symbols){
96  my $description;
97  $description = 'No products available yet' if $status{$mgi_id} == 1;
98  $description = 'Vector available' if $status{$mgi_id} == 2;
99  $description = 'ES cells available' if $status{$mgi_id} == 3;
100  $description = 'Mice available' if $status{$mgi_id} == 4;
101 
102  print OUT "$mgi_id\t$symbols{$mgi_id}\t$description\t$ensembl_ids{$mgi_id}\n";
103 }
104 close OUT;