2 # See the NOTICE file distributed with this work for additional information
3 # regarding copyright ownership.
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
9 # http://www.apache.org/licenses/LICENSE-2.0
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
18 # Designed to work on data retrieved from
19 # https://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/working/20110106_recombination_hotspots/
21 # Imports the recombination hotspots from 1000 Genomes into Ensembl
37 my ($url,$db_name,$db_host,$db_user,$db_pass,$db_port,$db_version,$help,$species);
40 GetOptions (
"url=s" => \$url,
41 "db_name=s" => \$db_name,
42 "db_host=s" => \$db_host,
43 "db_user=s" => \$db_user,
44 "db_pass=s" => \$db_pass,
45 "db_port=s" => \$db_port,
46 "db_version=s" => \$db_version,
47 "species=s" => \$species,
52 if ($help) {&
usage; exit;}
53 unless ($url and $db_name and $db_host) {print
"Insufficient arguments\n"; &
usage; exit;}
64 -db_version => $db_version,
69 #Bio::EnsEMBL::Registry->load_registry_from_db(
74 # -db_version => $db_version,
77 my $file_fetch = File::Fetch->new(uri=>$url);
78 my $archive_path = $file_fetch->fetch() or die
"Unable to get data from given URL. ".$file_fetch->error;
80 system(
'tar',
'-xzf',$archive_path);
83 tie %directory,
'IO::Dir',
".";
84 foreach my $file (keys %directory) {
85 if ($file =~ /\.txt$/) {
86 my $fh = IO::File->new($file);
91 print
"Finished. Feel free to delete downloaded data in this directory.\n";
94 my $file_handle = shift;
95 <$file_handle>; #strip header
101 -description =>
"The map was generated using the HapMap Phase II data and human genome assembly NCBI35 using LDhat as described in the 2007 HapMap paper (Nature, 18th Sept 2007).
103 The map was then converted from NCBI35 to GRCh37 coordinates and inspected for regions in which
104 the genome assembly had be rearranged.
106 See https://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/working/20110106_recombination_hotspots/",
107 -display_label =>
'HapMap Phase II genetic recombination map',
109 -logic_name =>
'human_1kg_hapmap_phase_2',
112 my $previous_chromosome;
115 while (my $line = <$file_handle>) {
116 my ($chromosome,$position,$score) = split(/\t+/,$line);
117 $chromosome =~ s/^chr
118 $chromosome =~ s/\_.+$
119 if (!$slice || $previous_chromosome ne $chromosome) {
120 $slice = $slice_adaptor->fetch_by_region(
'toplevel', $chromosome);
126 -analysis => $analysis,
129 -display_label =>
'Recombination hotspot',
132 push @features, $simple_feature;
133 $previous_chromosome = $chromosome;
135 $simple_feature_adaptor->store(@features);
139 print
"Launching instructions:
140 Run from a folder you are happy to have filled with files.
144 -url Supply the URL to download from
145 -db_name The DB to add these features to
146 -db_host Hostname for the DB