ensembl-hive  2.8.1
CCDSParser.pm
Go to the documentation of this file.
1 =head1 LICENSE
2 
3 See the NOTICE file distributed with this work for additional information
4 regarding copyright ownership.
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 =cut
19 
20 package XrefParser::CCDSParser;
21 
22 use strict;
23 use Carp;
24 
25 use base qw( XrefParser::BaseParser );
27 # Parse file of CCDS records and assign direct xrefs
28 # All assumed to be linked to transcripts
29 # The same CCDS may be linked to more than one transcript, but need to only
30 # add the xref once, so check if it already exists before adding it.
31 
32 sub run_script {
33 
34  my ($self, $ref_arg) = @_;
35  my $source_id = $ref_arg->{source_id};
36  my $species_id = $ref_arg->{species_id};
37  my $file = $ref_arg->{file};
38  my $verbose = $ref_arg->{verbose};
39  my $db = $ref_arg->{dba};
40  my $dbi = $ref_arg->{dbi};
41  $dbi = $self->dbi unless defined $dbi;
42 
43  if((!defined $source_id) or (!defined $species_id) or (!defined $file) ){
44  croak "Need to pass source_id, species_id and file as pairs";
45  }
46  $verbose |=0;
47 
48  my $user = "ensro";
49  my $host;
50  my $port = 3306;
51  my $dbname;
52  my $pass;
53 
54  if($file =~ /host[=][>](\S+?)[,]/){
55  $host = $1;
56  }
57  if($file =~ /port[=][>](\S+?)[,]/){
58  $port = $1;
59  }
60  if($file =~ /dbname[=][>](\S+?)[,]/){
61  $dbname = $1;
62  }
63  if($file =~ /pass[=][>](\S+?)[,]/){
64  $pass = $1;
65  }
66 
67  my ($ccds_db, $dbi2);
68  if (defined $host) {
69  $ccds_db = XrefParser::Database->new({ host => $host,
70  port => $port,
71  user => $user,
72  dbname => $dbname,
73  pass => $pass});
74  $dbi2 = $ccds_db->dbi();
75  } elsif (defined $db) {
76  $dbi2 = $db->dbc();
77  }
78  if(!defined($dbi2)){
79  return 1;
80  }
81 
82  my $sql =(<<'SCD');
83 SELECT t.stable_id, x.dbprimary_acc
84  FROM xref x, object_xref ox, transcript t, external_db e
85  WHERE x.xref_id=ox.xref_id AND
86  ox.ensembl_object_type = "Transcript" AND
87  ox.ensembl_id = t.transcript_id AND
88  e.external_db_id = x.external_db_id AND
89  e.db_name like "Ens\_%\_transcript"
90 SCD
91 
92  my %seen;
93 
94  my $sth = $dbi2->prepare($sql) or die "Could not prepare sql $sql\n";
95  $sth->execute() or die "Could not execute $sql\n";
96  my $xref_count = 0;
97  my $direct_count=0;
98  my ($stable_id, $display_label);
99  $sth->bind_columns( \$display_label,\$stable_id);
100  while ( $sth->fetch ) {
101 
102  my ($acc, $version) = split (/\./,$display_label);
103 
104  my $xref_id;
105  if (!defined($seen{$display_label})) {
106  $xref_id = $self->add_xref({ acc => $acc,
107  version => $version,
108  label => $display_label,
109  source_id => $source_id,
110  species_id => $species_id,
111  dbi => $dbi,
112  info_type => "DIRECT"} );
113  $xref_count++;
114  $seen{$display_label} = $xref_id;
115  }
116  else{
117  $xref_id = $seen{$display_label};
118  }
119 
120  $self->add_direct_xref($xref_id, $stable_id, "Transcript", "", $dbi);
121  $direct_count++;
122  }
123 
124  print "Parsed CCDS identifiers from $file, added $xref_count xrefs and $direct_count direct_xrefs\n" if($verbose);
125 
126  return 0;
127 }
128 
129 1;
transcript
public transcript()
XrefParser::BaseParser
Definition: BaseParser.pm:8
XrefParser::Database::dbi
public dbi()
XrefParser::Database::new
public new()
XrefParser::Database
Definition: Database.pm:8