ensembl-hive  2.8.1
alt_alleles.pl
Go to the documentation of this file.
1 #!/usr/bin/env perl
2 # See the NOTICE file distributed with this work for additional information
3 # regarding copyright ownership.
4 #
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
8 #
9 # http://www.apache.org/licenses/LICENSE-2.0
10 #
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
16 
17 
18 use strict;
19 use warnings;
22 use Getopt::Long qw(:config pass_through);
23 
24 # (make sure api version is correct
25 # Usage:
26 # perl alt_alleles.pl -cpass XXXX > & human_release_63_alt_alleles
27 #
28 #
29 # long way
30 # perl alt_alleles.pl -vhost ens-staging1 -vport 3306 -vdbname homo_sapiens_vega_63_37 -cdbname homo_sapiens_core_63_37 -chost ens-staging1 -cpass XXXX > & human_release_63_alt_alleles
31 #
32 
33 my ($vhost, $vpass, $vport, $vdbname, $vuser, $chost, $cpass, $cport, $cdbname, $cuser);
34 
35 GetOptions(
36  'vuser=s' => \$vuser,
37  'vpass=s' => \$vpass,
38  'vhost=s' => \$vhost,
39  'vport=i' => \$vport,
40  'vdbname=s' => \$vdbname,
41  'cuser=s' => \$cuser,
42  'cpass=s' => \$cpass,
43  'chost=s' => \$chost,
44  'cport=i' => \$cport,
45  'cdbname=s' => \$cdbname);
46 #
47 # Connect to the vgea databse to get the alt allele data.
48 #
49 
50 my $api_version = Bio::EnsEMBL::ApiVersion->software_version();
51 
52 if(!defined($vdbname)){
53  $vdbname = "homo_sapiens_vega_".$api_version."_37";
54 }
55 
56 if(!defined($cdbname)){
57  $cdbname = "homo_sapiens_core_".$api_version."_37";
58 }
59 
60 #
61 # Connect to the core & vega database
62 #
63 
64 my $core_dba = Bio::EnsEMBL::DBSQL::DBAdaptor->new(
65  -host => $chost||'ens-staging1',
66  -user => $cuser||'ensadmin',
67  -pass => $cpass,
68  -group => 'core',
69  -dbname => $cdbname,
70  -port => $cport
71 );
72 
73 my $vega_dba = Bio::EnsEMBL::DBSQL::DBAdaptor->new(
74  -host => $vhost||'ens-staging1',
75  -user => $vuser||'ensadmin',
76  -pass => $vpass,
77  -group => 'vega',
78  -dbname => $vdbname,
79  -port => $vport
80 );
81 
82 
83 #
84 # get ensembl gene ids and vega stable ids from the *core* database
85 #
86 my $vega_core_sql = <<'SQL';
87 select display_label, ensembl_id
88 from object_xref
89 join xref using(xref_id)
90 join external_db using(external_db_id)
91 where db_name = 'OTTG'
92 and ensembl_object_type = 'Gene'
93 SQL
94 
95 # sometimes we will see more than one gene associated with an OTTG
96 # this happens when an OTTG on the primary assemby has been projected to a patch.
97 my %vega_to_ensembl_core_gene_id;
98 $core_dba->dbc->sql_helper()->execute_no_return(-SQL => $vega_core_sql, -CALLBACK => sub {
99  my ($row) = @_;
100  my ($vega_stable_id, $gene_id) = @{$row};
101  $vega_to_ensembl_core_gene_id{$vega_stable_id}{$gene_id} = $gene_id;
102 });
103 
104 print "\nFetched ".(scalar(keys %vega_to_ensembl_core_gene_id))." Vega Stable IDs\n";
105 
106 #
107 # Get AltAlleles from vega
108 #
109 my $vega_aaga = $vega_dba->get_AltAlleleGroupAdaptor();
110 my $vega_groups = $vega_aaga->fetch_all();
111 
112 my $cnt_vega_rows = @{$vega_groups};
113 print STDERR "Fetched $cnt_vega_rows rows from the vega db alt_allele table\n";
114 
115 my %no_gene_id;
116 my @new_groups;
117 foreach my $group (@{$vega_groups}) {
118  my $members = $group->get_all_Genes_types();
119  my $new_core_group = Bio::EnsEMBL::AltAlleleGroup->new();
120  foreach my $member (@{$members}) {
121  my ($vega_gene, $attribs_hash) = @{$member};
122  my $vega_stable_id = $vega_gene->stable_id();
123  if(exists $vega_to_ensembl_core_gene_id{$vega_stable_id}) {
124  foreach my $gene_id (keys %{$vega_to_ensembl_core_gene_id{$vega_stable_id}} ) {
125  #Add each gene in. If we had a 1:m relationship then we copy the attribute already assigned
126  #across
127  $new_core_group->add_member($gene_id, $attribs_hash);
128  }
129  }
130  else {
131  push @{$no_gene_id{$group->dbID()}}, $vega_stable_id;
132  print STDERR "no ensembl gene_id found for vega stable id $vega_stable_id in core\n";
133  }
134  }
135  if($new_core_group->size() > 0) {
136  push(@new_groups, $new_core_group);
137  }
138 }
139 
140 #
141 # Delete the old data
142 #
143 print STDERR "\n\nDeleting all alt_alleles...\n\n";
144 $core_dba->dbc->do("delete from alt_allele");
145 $core_dba->dbc->do("delete from alt_allele_attrib");
146 $core_dba->dbc->do("delete from alt_allele_group");
147 
148 #
149 # Store alt_alleles.
150 #
151 print STDERR "Storing new alt alleles...\n\n";
152 my $alt_allele_count=0;
153 my $gene_count = 0;
154 
155 my $core_aaga = $core_dba->get_AltAlleleGroupAdaptor();
156 foreach my $group (@new_groups) {
157  my $alt_allele_id = $core_aaga->store($group);
158  $alt_allele_count++;
159  $gene_count += $group->size()
160 }
161 
162 print "Added $alt_allele_count alt_allele ids for $gene_count genes\nDONE\n";
Bio::EnsEMBL::DBSQL::DBAdaptor
Definition: DBAdaptor.pm:40
Bio::EnsEMBL::DBSQL::DBAdaptor::dbc
public Bio::EnsEMBL::DBSQL::DBConnection dbc()
Bio::EnsEMBL::AltAlleleGroup
Definition: AltAlleleGroup.pm:67
Bio::EnsEMBL::ApiVersion::software_version
public software_version()
Bio::EnsEMBL::Utils::SqlHelper::execute_no_return
public void execute_no_return()
Bio::EnsEMBL::DBSQL::DBAdaptor::new
public Bio::EnsEMBL::DBSQL::DBAdaptor new()
Bio::EnsEMBL::DBSQL::DBConnection::sql_helper
public Bio::EnsEMBL::Utils::SqlHelper sql_helper()
Bio::EnsEMBL::AltAlleleGroup::new
public Bio::EnsEMBL::AltAlleleGroup new()
Bio::EnsEMBL::AltAlleleGroup::add_member
public add_member()
Bio::EnsEMBL::ApiVersion
Definition: ApiVersion.pm:17