2 # See the NOTICE file distributed with this work for additional information
3 # regarding copyright ownership.
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
9 # http://www.apache.org/licenses/LICENSE-2.0
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
22 use Getopt::Long qw(:config pass_through);
24 # (make sure api version is correct
26 # perl alt_alleles.pl -cpass XXXX > & human_release_63_alt_alleles
30 # perl alt_alleles.pl -vhost ens-staging1 -vport 3306 -vdbname homo_sapiens_vega_63_37 -cdbname homo_sapiens_core_63_37 -chost ens-staging1 -cpass XXXX > & human_release_63_alt_alleles
33 my ($vhost, $vpass, $vport, $vdbname, $vuser, $chost, $cpass, $cport, $cdbname, $cuser);
40 'vdbname=s' => \$vdbname,
45 'cdbname=s' => \$cdbname);
47 # Connect to the vgea databse to get the alt allele data.
52 if(!defined($vdbname)){
53 $vdbname =
"homo_sapiens_vega_".$api_version.
"_37";
56 if(!defined($cdbname)){
57 $cdbname =
"homo_sapiens_core_".$api_version.
"_37";
61 # Connect to the core & vega database
65 -host => $chost||
'ens-staging1',
66 -user => $cuser||
'ensadmin',
74 -host => $vhost||
'ens-staging1',
75 -user => $vuser||
'ensadmin',
84 # get ensembl gene ids and vega stable ids from the *core* database
86 my $vega_core_sql = <<
'SQL';
87 select display_label, ensembl_id
89 join xref
using(xref_id)
90 join external_db
using(external_db_id)
91 where db_name =
'OTTG'
92 and ensembl_object_type =
'Gene'
95 # sometimes we will see more than one gene associated with an OTTG
96 # this happens when an OTTG on the primary assemby has been projected to a patch.
97 my %vega_to_ensembl_core_gene_id;
100 my ($vega_stable_id, $gene_id) = @{$row};
101 $vega_to_ensembl_core_gene_id{$vega_stable_id}{$gene_id} = $gene_id;
104 print
"\nFetched ".(scalar(keys %vega_to_ensembl_core_gene_id)).
" Vega Stable IDs\n";
107 # Get AltAlleles from vega
109 my $vega_aaga = $vega_dba->get_AltAlleleGroupAdaptor();
110 my $vega_groups = $vega_aaga->fetch_all();
112 my $cnt_vega_rows = @{$vega_groups};
113 print STDERR
"Fetched $cnt_vega_rows rows from the vega db alt_allele table\n";
117 foreach my $group (@{$vega_groups}) {
118 my $members = $group->get_all_Genes_types();
120 foreach my $member (@{$members}) {
121 my ($vega_gene, $attribs_hash) = @{$member};
122 my $vega_stable_id = $vega_gene->stable_id();
123 if(exists $vega_to_ensembl_core_gene_id{$vega_stable_id}) {
124 foreach my $gene_id (keys %{$vega_to_ensembl_core_gene_id{$vega_stable_id}} ) {
125 #Add each gene in. If we had a 1:m relationship then we copy the attribute already assigned
127 $new_core_group->
add_member($gene_id, $attribs_hash);
131 push @{$no_gene_id{$group->dbID()}}, $vega_stable_id;
132 print STDERR
"no ensembl gene_id found for vega stable id $vega_stable_id in core\n";
135 if($new_core_group->size() > 0) {
136 push(@new_groups, $new_core_group);
141 # Delete the old data
143 print STDERR
"\n\nDeleting all alt_alleles...\n\n";
144 $core_dba->dbc->do(
"delete from alt_allele");
145 $core_dba->dbc->do(
"delete from alt_allele_attrib");
146 $core_dba->dbc->do(
"delete from alt_allele_group");
151 print STDERR
"Storing new alt alleles...\n\n";
152 my $alt_allele_count=0;
155 my $core_aaga = $core_dba->get_AltAlleleGroupAdaptor();
156 foreach my $group (@new_groups) {
157 my $alt_allele_id = $core_aaga->store($group);
159 $gene_count += $group->size()
162 print
"Added $alt_allele_count alt_allele ids for $gene_count genes\nDONE\n";