ensembl-hive  2.7.0
AnalysisStatsAdaptor.pm
Go to the documentation of this file.
1 =pod
2 
3 =head1 NAME
4 
6 
7 =head1 SYNOPSIS
8 
9  $analysisStatsAdaptor = $db_adaptor->get_AnalysisStatsAdaptor;
10  $analysisStatsAdaptor = $analysisStats->adaptor;
11 
12 =head1 DESCRIPTION
13 
14  Module to encapsulate all db access for persistent class AnalysisStats.
15  There should be just one per application and database connection.
16 
17 =head1 LICENSE
18 
19  See the NOTICE file distributed with this work for additional information
20  regarding copyright ownership.
21 
22  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
23  You may obtain a copy of the License at
24 
25  http://www.apache.org/licenses/LICENSE-2.0
26 
27  Unless required by applicable law or agreed to in writing, software distributed under the License
28  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
29  See the License for the specific language governing permissions and limitations under the License.
30 
31 =head1 CONTACT
32 
33  Please subscribe to the Hive mailing list: http://listserver.ebi.ac.uk/mailman/listinfo/ehive-users to discuss Hive-related questions or to be notified of our updates
34 
35 =head1 APPENDIX
36 
37  The rest of the documentation details each of the object methods.
38  Internal methods are usually preceded with a _
39 
40 =cut
41 
42 
43 package Bio::EnsEMBL::Hive::DBSQL::AnalysisStatsAdaptor;
44 
45 use strict;
46 use warnings;
47 
48 use base ('Bio::EnsEMBL::Hive::DBSQL::ObjectAdaptor');
49 
50 
51 sub default_table_name {
52  return 'analysis_stats';
53 }
54 
55 
56 sub default_input_column_mapping {
57  my $self = shift @_;
58  return {
59  'when_updated' => $self->dbc->_interval_seconds_sql('when_updated') . ' seconds_since_when_updated',
60  };
61 }
62 
63 
64 sub do_not_update_columns {
65  return ['when_updated'];
66 }
67 
68 
69 sub object_class {
70  return 'Bio::EnsEMBL::Hive::AnalysisStats';
71 }
72 
73 
74 sub objectify { # turn the hashref into an object
75  my ($self, $hashref) = @_;
76 
77  my $object = $self->SUPER::objectify( $hashref );
78 
79  $object->seconds_since_last_fetch(0);
80 
81  return $object;
82 }
83 
84 
85 ################
86 #
87 # UPDATE METHODS
88 #
89 ################
90 
91 
92 sub update_stats_and_monitor {
93  my ($self, $stats) = @_;
94 
95  my $sql = "UPDATE analysis_stats SET status='".$stats->status."' ";
96 
97  $sql .= ",avg_msec_per_job=" . $stats->avg_msec_per_job();
98  $sql .= ",avg_input_msec_per_job=" . $stats->avg_input_msec_per_job();
99  $sql .= ",avg_run_msec_per_job=" . $stats->avg_run_msec_per_job();
100  $sql .= ",avg_output_msec_per_job=" . $stats->avg_output_msec_per_job();
101  $sql .= ",is_excluded=" . $stats->is_excluded();
102 
103  unless( $stats->hive_pipeline->hive_use_triggers() ) {
104  $sql .= ",total_job_count=" . $stats->total_job_count();
105  $sql .= ",semaphored_job_count=" . $stats->semaphored_job_count();
106  $sql .= ",ready_job_count=" . $stats->ready_job_count();
107  $sql .= ",done_job_count=" . $stats->done_job_count();
108  $sql .= ",failed_job_count=" . $stats->failed_job_count();
109 
110  $stats->num_running_workers( $self->db->get_RoleAdaptor->count_active_roles( $stats->analysis_id() ) );
111  $sql .= ",num_running_workers=" . $stats->num_running_workers();
112  }
113 
114  $sql .= ",when_updated=CURRENT_TIMESTAMP";
115  $sql .= ",sync_lock='0'";
116  $sql .= " WHERE analysis_id='".$stats->analysis_id."' ";
117 
118  my $sth = $self->prepare($sql);
119  $sth->execute();
120  $sth->finish;
121  $sth = $self->prepare("INSERT INTO analysis_stats_monitor SELECT CURRENT_TIMESTAMP, analysis_stats.* from analysis_stats WHERE analysis_id = ".$stats->analysis_id);
122  $sth->execute();
123  $sth->finish;
124  $stats->seconds_since_when_updated(0); #not exact but good enough :)
125 }
126 
127 
128 sub update_status {
129  my ($self, $analysis_id, $status) = @_;
130 
131  my $sql = "UPDATE analysis_stats SET status='$status' WHERE analysis_id='$analysis_id' ";
132 
133  my $sth = $self->prepare($sql);
134  $sth->execute();
135  $sth->finish;
136 }
137 
138 
139 =head2 interval_update_work_done
140 
141  Arg [1] : int $analysis_id
142  Arg [2] : int $jobs_done_in_interval
143  Arg [3] : int $interval_msec
144  Arg [4] : int $fetching_msec
145  Arg [5] : int $running_msec
146  Arg [6] : int $writing_msec
147  Arg [7] : real $weight_factor [optional]
148  Example : $statsDBA->interval_update_work_done($analysis_id, $jobs_done, $interval_msec, $fetching_msec, $running_msec, $writing_msec);
149  Description : does a database update to recalculate the avg_msec_per_job and done_job_count
150  does an interval equation by multiplying out the previous done_job_count with the
151  previous avg_msec_per_job and then expanding by new interval values to give a better average.
153 
154 =cut
155 
156 sub interval_update_work_done {
157  my ($self, $analysis_id, $job_count, $interval_msec, $fetching_msec, $running_msec, $writing_msec, $weight_factor) = @_;
158 
159  $weight_factor ||= 3; # makes it more sensitive to the dynamics of the farm
160 
161  my $sql = $self->db->hive_pipeline->hive_use_triggers()
162  ? qq{
163  UPDATE analysis_stats SET
164  avg_msec_per_job = ROUND(((done_job_count*avg_msec_per_job)/$weight_factor + $interval_msec) / (done_job_count/$weight_factor + $job_count)),
165  avg_input_msec_per_job = ROUND(((done_job_count*avg_input_msec_per_job)/$weight_factor + $fetching_msec) / (done_job_count/$weight_factor + $job_count)),
166  avg_run_msec_per_job = ROUND(((done_job_count*avg_run_msec_per_job)/$weight_factor + $running_msec) / (done_job_count/$weight_factor + $job_count)),
167  avg_output_msec_per_job = ROUND(((done_job_count*avg_output_msec_per_job)/$weight_factor + $writing_msec) / (done_job_count/$weight_factor + $job_count))
168  WHERE analysis_id= $analysis_id
169  }
170  : qq{
171  UPDATE analysis_stats SET
172  avg_msec_per_job = ROUND(((done_job_count*avg_msec_per_job)/$weight_factor + $interval_msec) / (done_job_count/$weight_factor + $job_count)),
173  avg_input_msec_per_job = ROUND(((done_job_count*avg_input_msec_per_job)/$weight_factor + $fetching_msec) / (done_job_count/$weight_factor + $job_count)),
174  avg_run_msec_per_job = ROUND(((done_job_count*avg_run_msec_per_job)/$weight_factor + $running_msec) / (done_job_count/$weight_factor + $job_count)),
175  avg_output_msec_per_job = ROUND(((done_job_count*avg_output_msec_per_job)/$weight_factor + $writing_msec) / (done_job_count/$weight_factor + $job_count)),
176  done_job_count = done_job_count + $job_count
177  WHERE analysis_id= $analysis_id
178  };
179 
180  $self->dbc->do( $sql );
181 }
182 
183 
184 sub increment_a_counter {
185  my ($self, $counter, $increment, $analysis_id) = @_;
186 
187  unless( $self->db->hive_pipeline->hive_use_triggers() ) {
188  if($increment) { # can either be positive or negative
189 ## ToDo: does it make sense to update the timestamp as well, to signal to the sync-allowed workers that they should wait?
190 # $self->dbc->do( "UPDATE analysis_stats SET $counter = $counter + ($increment), when_updated=CURRENT_TIMESTAMP WHERE sync_lock=0 AND analysis_id='$analysis_id'" );
191  $self->dbc->do( "UPDATE analysis_stats SET $counter = $counter + ($increment) WHERE sync_lock=0 AND analysis_id='$analysis_id'" );
192  }
193  }
194 }
195 
196 1;
197 
Bio::EnsEMBL::Hive::Version
Definition: Version.pm:19
Bio::EnsEMBL::Hive::DBSQL::AnalysisStatsAdaptor
Definition: AnalysisStatsAdaptor.pm:18
Bio::EnsEMBL::Hive::Worker
Definition: Worker.pm:53
Bio::EnsEMBL::Hive::Cacheable::hive_pipeline
public hive_pipeline()
Bio::EnsEMBL::Hive
Definition: Hive.pm:38