2 # See the NOTICE file distributed with this work for additional information
3 # regarding copyright ownership.
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
9 # http://www.apache.org/licenses/LICENSE-2.0
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
20 ########################################################################
22 # This script will take the 'xref_config.ini' configuration #
23 # file (or whatever file name given on the command line) and #
24 # convert it into a SQL file that can be used in place of the old #
25 # 'populate_metadata.sql' file found in the 'sql' subdirectory. #
27 # The output from this script should be redirected to a file that #
28 # you manually run to populate your Xref database, just as was done #
29 # with 'populate_metadata.sql'. The safest thing to do is just to #
30 # overwrite 'sql/populate_metadata.sql' with the output of this #
31 # script. This will ensure that 'xref_parser.pl populates the Xref #
32 # database with the correct data. #
34 ########################################################################
40 my $file = (defined $ARGV[0] && -f $ARGV[0]) ? $ARGV[0] :
'xref_config.ini';
43 my $preparse = defined $ARGV[1] ? $ARGV[1] : 0;
45 my $config = Config::IniFiles->new(-file => $file);
46 if(! defined $config) {
47 foreach my $e (@Config::IniFiles::errors) {
51 die
"No Xref config made from $file. Check STDERR";
58 print(
'#' x 80,
"\n");
62 foreach my $section ( $config->GroupMembers(
'species') ) {
63 my $species_name = substr( $section, 8 );
66 split( /\n/, $config->val( $section,
'taxonomy_id' ) );
68 my $species_id = $taxonomy_ids[0];
70 printf(
"# Species '%s' (id = %d)\n", $species_name, $species_id );
72 foreach my $taxonomy_id (@taxonomy_ids) {
73 print(
"INSERT INTO species "
74 .
"(species_id, taxonomy_id, name, aliases)\n" );
76 printf(
"VALUES (%d, %d, '%s', '%s');\n",
77 $species_id, $taxonomy_id, $species_name,
78 $config->val( $section,
'aliases' ) || $species_name );
86 print(
'#' x 80,
"\n" );
91 foreach my $source_section ( sort( $config->GroupMembers(
'source') ) ) {
92 my ( $spaces, $source_name ) =
93 $source_section =~ /^source(\s+)(\S+)\s*$/;
95 if ( length($spaces) > 1 ) {
96 die( sprintf(
"Too many spaces between the words 'source' and '%s'\n"
97 .
"while reading source section '[%s]'\n",
98 $source_name, $source_section ) );
101 # if ( exists( $source_ids{$source_section} ) ) {
102 # # Won't happen because Config::IniFile will combine the configs
103 # # of multiple sections with the same name into one section with
104 # # multi-value values. Sigh...
105 # die( sprintf( "The source section '[%s]' occurs more than once\n",
106 # $source_section ) );
109 if ( index( $config->val( $source_section,
'name' ),
"\n" ) != -1 ) {
110 die( sprintf(
"The source section '[%s]' occurs more\n"
111 .
"than once in the configuration file\n",
115 $source_ids{$source_section} = ++$source_id;
117 my $priority_description = defined $config->val( $source_section,
'prio_descr') ? $config->val ( $source_section,
'prio_descr') :
'';
119 printf(
"# Source '%s' (id = %d)\n", $source_name, $source_id );
121 print(
"INSERT INTO source "
122 .
"(name, source_release, ordered, "
123 .
"priority, priority_description, status)\n" );
125 printf(
"VALUES ('%s', '1', %d, %d, '%s', '%s');\n",
126 $config->val( $source_section,
'name' ),
127 $config->val( $source_section,
'order' ),
128 $config->val( $source_section,
'priority' ),
129 $priority_description,
130 $config->val($source_section,
'status',
'NOIDEA') );
135 split( /\,/, $config->val( $source_section,
'dependent_on',
'' ) );
137 foreach my $dep (@dependents){
138 print
"# adding source dependency that $source_section needs $dep loaded first\n";
139 print
"INSERT IGNORE INTO dependent_source (master_source_id, dependent_name)\n";
140 printf(
"VALUES (%d, '%s');\n\n", $source_ids{$source_section}, $dep);
143 } ## end
foreach my $source_section ...
147 print(
'#' x 80,
"\n" );
148 print(
"# DATA FILES\n");
151 foreach my $species_section ( sort( $config->GroupMembers(
'species') ) )
153 my ( $spaces, $species_name ) =
154 $species_section =~ /^species(\s+)(\S+)\s*$/;
156 if ( length($spaces) > 1 ) {
158 "Too many spaces between the words 'species' and '%s'\n"
159 .
"while reading species section '[%s]'\n",
160 $species_name, $species_section ) );
164 split( /\n/, $config->val( $species_section,
'taxonomy_id' ) );
166 my $species_id = $taxonomy_ids[0];
168 print(
'#',
'-' x 79,
"\n" );
169 printf(
"# Data for species '%s' (id = %d)\n",
170 $species_name, $species_id );
171 print(
'#',
'-' x 79,
"\n" );
174 foreach my $source_name (
175 sort( split( /\n/, $config->val( $species_section,
'source' ) ) ) )
177 my $source_section = sprintf(
"source %s", $source_name );
178 $source_section =~ s/\s$
180 if ( !exists( $source_ids{$source_section} ) ) {
181 die( sprintf(
"Can not find source section '[%s]'\n"
182 .
"while reading species section '[%s]'\n",
183 $source_section, $species_section ) );
186 printf(
"# Data from source '%s' (id = %d)\n",
187 $source_name, $source_ids{$source_section} );
189 print(
"INSERT INTO source_url "
190 .
"(source_id, species_id, parser)\n" );
192 my $parser = (defined($config->val($source_section,
'old_parser')) && !$preparse ? $config->val($source_section,
'old_parser') : $config->val($source_section,
'parser'));
194 printf(
"VALUES (%d, %d, '%s') ;\n",
195 $source_ids{$source_section}, $species_id,
200 } ## end
foreach my $source_name ( sort...)
201 } ## end
foreach my $species_section...
203 print
"# FINISHED SUCCESSFULLY\n"