ensembl-hive  2.8.1
EXAMPLE.use_mapping.pl
Go to the documentation of this file.
1 #!/usr/bin/env perl
2 # See the NOTICE file distributed with this work for additional information
3 # regarding copyright ownership.
4 #
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
8 #
9 # http://www.apache.org/licenses/LICENSE-2.0
10 #
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
16 
17 
18 =head1 NAME
19 
20 EXAMPLE.use_mapping.pl - example script for projecting features from one
21 assembly to another
22 
23 =head1 SYNOPSIS
24 
25 EXAMPLE.use_mapping.pl [arguments]
26 
27 Required arguments:
28 
29  --host=hOST database host HOST
30  --port=PORT database port PORT
31  --user=USER database username USER
32  --pass=PASS database passwort PASS
33  --dbname=NAME database name NAME
34  --infile=FILE read input data from FILE
35  --old_assembly=NAME old assembly NAME
36  --new_assembly=NAME new assembly NAME
37 
38 =head1 DESCRIPTION
39 
40 This is an example script illustrating how to use a mapping between two
41 assemblies for a species, as generated by the scripts in this directory. At the
42 time of writing, this mapping was available for human and mouse.
43 
44 The script assumes that your input data are features in old assembly
45 coordinates which you want to transform into new assembly coordinates (i.e.
46 project the features onto the new assembly) and store in an Ensembl database.
47 The input data in this example is read from a file with the format
48 
49 # NAME:CHROMOSOME:START:END:STRAND
50 feat_1:X:200:5000:1
51 feat_2:X:6000:7040:-1
52 ...
53 
54 
55 The example also includes some sanity check you might want to do on your
56 projections. It will depend on your use case whether you want to use them or
57 not (you might need either more or less restrictive conditions).
58 
59 
60 =head1 AUTHOR
61 
62 Patrick Meidl <meidl@ebi.ac.uk>, Ensembl core API team
63 
64 =head1 CONTACT
65 
66 Please post comments/questions to the Ensembl development list
67 <http://lists.ensembl.org/mailman/listinfo/dev>
68 
69 =cut
70 
71 use strict;
72 use warnings;
73 no warnings 'uninitialized';
74 
75 use Getopt::Long;
79 
80 $| = 1;
81 
82 my ($host, $port, $user, $pass, $dbname, $infile, $old_assembly, $new_assembly);
83 
84 GetOptions(
85  "host=s", \$host,
86  "port=i", \$port,
87  "user=s", \$user,
88  "pass=s", \$pass,
89  "dbname=s", \$dbname,
90  "infile=s", \$infile,
91  "old_assembly=s", \$old_assembly,
92  "new_assembly=s", \$new_assembly
93 );
94 
95 # connect to database and get adaptors
97  -HOST => $host,
98  -PORT => $port,
99  -USER => $user,
100  -PASS => $pass,
101  -DBNAME => $dbname,
102 );
103 
104 my $sa = $db->get_SliceAdaptor();
105 
106 # create an analysis for the type of feature you wish to store
107 my $analysis = new Bio::EnsEMBL::Analysis(
108  -LOGIC_NAME => 'your_analysis'
109 );
110 
111 # read your input data
112 open(FILE, "<$infile") or die("Can't open $infile for reading: $!");
113 
114 while (<FILE>) {
115 
116  # skip comments
117  next if (/^#/);
118 
119  my ($name, $chr, $start, $end, $strand) = split(/:/);
120 
121  # get a slice on the old assembly
122  my $slice_oldasm = $sa->fetch_by_region('chromosome', $chr, undef, undef,
123  undef, $old_assembly);
124 
125  if (!$slice_oldasm) {
126  warn "Can't get $old_assembly slice for $chr:$start:$end\n";
127  next;
128  }
129 
130  # create a new feature on the old assembly
131  my $feat = Bio::EnsEMBL::SimpleFeature->new(
132  -DISPLAY_LABEL => $name,
133  -START => $start,
134  -END => $end,
135  -STRAND => $strand,
136  -SLICE => $slice_oldasm,
137  -ANALYSIS => $analysis,
138  );
139 
140  # project feature to new assembly
141  my @segments = @{ $feat->feature_Slice->project('chromosome', $new_assembly) };
142 
143  # do some sanity checks on the projection results:
144  # discard the projected feature if
145  # 1. it doesn't project at all (no segments returned)
146  # 2. the projection is fragmented (more than one segment)
147  # 3. the projection doesn't have the same length as the original
148  # feature
149 
150  # this tests for (1) and (2)
151  next unless (scalar(@segments) == 1);
152 
153  # test (3)
154  my $proj_slice = $segments[0]->to_Slice;
155  next unless ($feat->length == $proj_slice->length);
156 
157  next unless ($proj_slice->seq_region_name eq $feat->slice->seq_region_name);
158 
159  # everything looks fine, so adjust the coords of your feature
160  $feat->start($proj_slice->start);
161  $feat->end($proj_slice->end);
162  my $slice_newasm = $sa->fetch_by_region('chromosome', $chr, undef, undef,
163  undef, $new_assembly);
164  $feat->slice($slice_newasm);
165 
166  # store the feature
167  $feat->store;
168 
169 }
170 
171 close(FILE);
172 
Bio::EnsEMBL::DBSQL::DBAdaptor
Definition: DBAdaptor.pm:40
Bio::EnsEMBL::Slice::project
public List project()
Bio::EnsEMBL::Analysis
Definition: PairAlign.pm:3
Bio::EnsEMBL::Feature::feature_Slice
public Bio::EnsEMBL::Slice feature_Slice()
Bio::EnsEMBL::SimpleFeature
Definition: SimpleFeature.pm:31
Bio::EnsEMBL::SimpleFeature::new
public Bio::EnsEMBL::Feature new()