2 # Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
3 # Copyright [2016-2024] EMBL-European Bioinformatics Institute
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
9 # http://www.apache.org/licenses/LICENSE-2.0
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
18 # Designed to work on BED data such as that available from UCSC or 1KG:
19 # https://ftp.1000genomes.ebi.ac.uk/vol1/ftp/phase1/analysis_results/supporting/accessible_genome_masks
30 use POSIX qw/strftime/;
33 my ($file,$db_name,$db_host,$db_user,$db_pass,$db_port,$help,$species,$group);
34 my ($dna_db_name,$dna_db_host,$dna_db_user,$dna_db_pass,$dna_db_port, $dna_group);
35 my ($logic_name, $description, $display_label);
40 GetOptions (
"file=s" => \$file,
41 "db_name|dbname|database=s" => \$db_name,
42 "db_host|dbhost|host=s" => \$db_host,
43 "db_user|dbuser|user|username=s" => \$db_user,
44 "db_pass|dbpass|pass|password=s" => \$db_pass,
45 "db_port|dbport|port=s" => \$db_port,
46 "dna_db_name|dna_dbname|dna_database=s" => \$dna_db_name,
47 "dna_db_host|dna_dbhost|dna_host=s" => \$dna_db_host,
48 "dna_db_user|dna_dbuser|dna_user|dna_username=s" => \$dna_db_user,
49 "dna_db_pass|dna_dbpass|dna_pass|dna_password=s" => \$dna_db_pass,
50 "dna_db_port|dna_dbport|dna_port=s" => \$dna_db_port,
51 "dna_group=s" => \$dna_group,
52 "species=s" => \$species,
54 'logic_name=s' => \$logic_name,
55 'description=s' => \$description,
56 'display_label=s' => \$display_label,
57 'write_every=i' => \$write_every,
62 if ($help) {&
usage; exit 0;}
63 unless ($file and $db_name and $db_host) {print
"Insufficient arguments\n"; &
usage; exit 1;}
64 unless ($logic_name) { print
"No logic name given\n";
usage(); exit 1; }
78 $dna_group ||=
'core';
79 $dna_db_host ||= $db_host;
80 $dna_db_port ||= $db_port;
81 $dna_db_user ||= $db_user;
85 -dbname => $dna_db_name,
86 -host => $dna_db_host,
87 -user => $dna_db_user,
90 $dna_dba->
dbc->
password($dna_db_pass)
if $dna_db_pass;
91 $dba->dnadb($dna_dba);
96 # see bottom of file for this method call
100 die
"No file found at $file";
111 my $commit_count = 0;
112 iterate_file($f, sub {
114 if($count != 0 && $count % 2000 == 0) {
115 info(
"Processed %s records", $count);
119 push(@features, $sf);
123 if($commit_count == $write_every) {
134 my ($features, $dba) = @_;
135 my $sfa = $dba->get_SimpleFeatureAdaptor();
136 my $count = scalar(@{$features});
138 info(
"Writing %d feature(s)", $count);
139 $sfa->store(@{$features});
146 my ($line, $analysis, $dba) = @_;
147 my ($chr, $start, $end, $label, $score, $ucsc_strand) = split(/\t/, $line);
148 $start++; # UCSC is 0 idx start
149 #if was defined & -ve then set as so. +ve is default
150 my $strand = (defined $ucsc_strand && $strand eq
'-') ? -1 : 1;
155 -analysis => $analysis,
158 -display_label => $label,
160 $args{-SCORE} = $score
if defined $score; # only add score
if it was there
167 my ($original, $dba) = @_;
168 my $name = $original;
170 return $slices{$name}
if exists $slices{name};
171 my $slice = $dba->get_SliceAdaptor()->fetch_by_region(
'toplevel', $name);
173 die
"Could not get a Slice from the Ensembl database for the given region '$original' or '$name' and coorindate system 'toplevel'. Check your core database";
175 $slices{$name} = $slice;
181 my $aa = $dba->get_AnalysisAdaptor();
182 my $analysis = $aa->fetch_by_logic_name($logic_name);
186 -logic_name => $logic_name,
188 $analysis->description($description)
if $description;
189 $analysis->display_label($display_label)
if $display_label;
190 $aa->store($analysis);
196 my ($msg, @args) = @_;
197 my $m = sprintf $msg, @args;
198 my $time = strftime(
'%c',localtime());
199 printf STDERR
'[%s] %s', $time, $m;
205 print
"Launching instructions:
206 Run from a folder you are happy to have filled with files.
210 Import data from a BED file into the simple_feature table. Only supports
211 6 column BED files (location, name and score).
215 perl import_bed_simple_feature.pl -file [PATH} -db_name NAME
219 -file Supply the file path
220 -logic_name Analysis logic name import data against
222 -db_name The DB to add these features to
226 -db_host Hostname for the DB
230 -db_user Username for the DB
235 -db_pass Password for the DB
240 -db_port Port for the DB
244 -dna_db_name The DNA DB to use if DB does not contain coordinate systems and DNA
248 -dna_db_host Hostname for the DNA DB. Defaults to -host
252 -dna_db_user Username for the DNA DB. Defaults to -user
257 -dna_db_pass Password for the DNA DB. Defaults to -pass
262 -dna_db_port Port for the DNA DB. Defaults to -port
268 -species Name of the species; defaults to human
269 -group Name of the DB group; defaults to core
270 -description Analysis description; only needed if analysis is not already in the DB
271 -display_label Analysis display label for the website; only needed if analysis is not already in the DB
273 -write_every Write features once every N lines. Defaults to -1 (write once all records are parsed)