Module timeseries
[hide private]
[frames] | no frames]

Source Code for Module timeseries

  1  #!/usr/bin/env python 
  2   
  3   
  4  import re 
  5  import time 
  6  import sys 
  7  import corestats 
  8  from pylab import *  # you need Matplotlib installed (which needs Numeric) 
  9   
 10   
 11   
 12  # the regex gets compiled on module import so its only done once 
 13  parse_time_re = re.compile( 
 14      '([0-9]{4})-([0-9]{2})-([0-9]{2}) ' 
 15      '([0-9]{2}):([0-9]{2}):([0-9]{2})') 
 16   
 17   
18 -def parse_time(timestamp, time_format='timestamp'):
19 #get a list of epoch time ints 20 if time_format == 'epoch': 21 epoch = timestamp 22 elif time_format == 'timestamp': 23 # convert an exported TimeGenerated timestamp into seconds since the epoch 24 try: 25 matches = parse_time_re.match(timestamp) 26 epoch = time.mktime( 27 (int(matches.group(1)), 28 int(matches.group(2)), 29 int(matches.group(3)), 30 int(matches.group(4)), 31 int(matches.group(5)), 32 int(matches.group(6)), 33 0, 0, -1)) 34 except AttributeError: 35 sys.stderr.write('ERROR: bad time value: %s\n'% timestamp) 36 sys.exit(1) 37 else: 38 # TODO: should throw an exception here instead 39 sys.stderr.write('ERROR: bad time_format supplied to time_series.parse_time() \n') 40 sys.exit(1) 41 return int(epoch)
42 43 44 45
46 -class TimeSeries:
47
48 - def __init__(self, interval, data_set, time_format='timestamp'):
49 # interval is a value in seconds representing the duration of each slice of the time-series 50 # data_set is a nested list of 'timestamp, value' csv pairs 51 52 self.interval = interval 53 54 # convert the nested list of 'timestamp, value' csv pairs into a sequence of [timestamp, value] pairs 55 self.datapoints = [re.split(',', row) for row in data_set] 56 57 # store a timestamp of the series start and end 58 self.starttime = self.datapoints[0][0] 59 self.finishtime = self.datapoints[len(self.datapoints) - 1][0] 60 61 # epoch time of the series start and finish 62 self.offset = parse_time(self.datapoints[0][0], time_format) 63 # duration of the entire series in seconds 64 self.timespan = parse_time(self.datapoints[len(self.datapoints) - 1][0], time_format) - self.offset 65 # convert timestamps in the data set to epoch seconds and subtract the start offset so now we have a 66 # sequence of [int elapsed seconds, float value] pairs 67 self.datapoints = [[parse_time(self.datapoints[i][0], time_format) - self.offset, float(self.datapoints[i][1])] 68 for i in range(len(self.datapoints))] 69 70 #number of samples in data set 71 self.length = (len(self.datapoints)) 72 73 # we slice the datapoints into a time series with slots based on the interval size 74 # slots with no datapoints contain an empty list 75 self.series = [] 76 current_marker = 0 77 next_marker = self.interval 78 #while next_marker < (self.timespan / self.interval): 79 while next_marker < self.timespan: 80 # print [timeval for timeval in self.datapoints if current_marker <= timeval[0] < next_marker] 81 self.series.append([timeval[1] for timeval in self.datapoints if current_marker <= timeval[0] < next_marker]) 82 current_marker = next_marker 83 next_marker += self.interval 84 85 # used to store a time series that has been processed 86 self.calced_series = [] 87 88 # set options for graphing 89 self.screen_output = True 90 self.image_output = True 91 self.graph_type = 'LINE' # 'LINE' or 'SCATTER' 92 self.output_name = 'timeseries.png' 93 self.graph_xlabel = 'Elapsed Time (s)' 94 self.graph_ylabel = 'Value' 95 self.graph_title = ''
96 97 98
99 - def dump_datapoints(self):
100 # extract a list of all individual datapoint values to stdout 101 for row in self.datapoints: 102 print row
103 104
105 - def dump_series(self):
106 # extract a list of all slots to stdout 107 for slot in self.series: 108 print slot
109 110
111 - def dump_calced_series(self):
112 # extract a list of all slots to stdout 113 for slot in self.calced_series: 114 print slot
115 116
117 - def calc_series(self, calc_type):
118 calced_series = [] 119 timeslots = [] 120 marker = 0 121 for slot in self.series: 122 if slot: # don't run calcs on null slots 123 if calc_type == 'SUM': 124 stats = corestats.Stats(slot) 125 calced_series.append(stats.sum()) 126 if calc_type == 'COUNT': 127 stats = corestats.Stats(slot) 128 calced_series.append(stats.count()) 129 if calc_type == 'COUNT_PERSEC': 130 stats = corestats.Stats(slot) 131 calced_series.append(stats.count() / self.interval) 132 if calc_type == 'MIN': 133 stats = corestats.Stats(slot) 134 calced_series.append(stats.min()) 135 if calc_type == 'MAX': 136 stats = corestats.Stats(slot) 137 calced_series.append(stats.max()) 138 if calc_type == 'AVG': 139 stats = corestats.Stats(slot) 140 calced_series.append(stats.avg()) 141 if calc_type == 'MEDIAN': 142 stats = corestats.Stats(slot) 143 calced_series.append(stats.median()) 144 if calc_type == 'STDEV': 145 stats = corestats.Stats(slot) 146 calced_series.append(stats.stdev()) 147 if calc_type == 'PERCENT50': 148 stats = corestats.Stats(slot) 149 calced_series.append(stats.percentile(50)) 150 if calc_type == 'PERCENT90': 151 stats = corestats.Stats(slot) 152 calced_series.append(stats.percentile(90)) 153 if calc_type == 'PERCENT95': 154 calced_series.append(stats.percentile(95)) 155 if calc_type == 'PERCENT99': 156 calced_series.append(stats.percentile(99)) 157 else: 158 calced_series.append(0) # put a zero in place of null slots 159 marker += self.interval 160 self.calced_series = calced_series 161 return self.calced_series
162 163
164 - def graph_all_tk(self):
165 self.screen_output = True 166 self.image_output = False 167 self.__graph_all()
168 169
170 - def graph_all_image(self):
171 self.screen_output = False 172 self.image_output = True 173 self.__graph_all()
174 175
176 - def __graph_all(self):
177 sequence = [item[1] for item in self.datapoints] 178 timeslots = [item[0] for item in self.datapoints] 179 self.__graph(timeslots, sequence)
180 181
182 - def graph_series_tk(self):
183 self.screen_output = True 184 self.image_output = False 185 self.__graph_series()
186 187
188 - def graph_series_image(self):
189 self.screen_output = False 190 self.image_output = True 191 self.__graph_series()
192 193
194 - def __graph_series(self):
195 if len(self.calced_series) < 1: 196 sys.stderr.write('ERROR: there is no calculated series to graph. create one first.\n') 197 sys.exit(1) 198 sequence = self.calced_series 199 # create a list of timeslots based on our time-series length and slot intervals 200 timeslots = [] 201 marker = 0 202 for slot in self.series: 203 timeslots.append(marker) 204 marker += self.interval 205 self.__graph(timeslots, sequence)
206 207
208 - def __graph(self, timeslots, sequence):
209 xlabel(self.graph_xlabel) 210 ylabel(self.graph_ylabel) 211 title(self.graph_title) 212 if self.graph_type == 'LINE': 213 lines = plot(timeslots, sequence, 'r-') # line plot 214 setp(lines, color='r', linewidth=.5) 215 elif self.graph_type == 'SCATTER': 216 lines = plot(timeslots, sequence, 'r,') # pixel scatter plot 217 else: 218 sys.stderr.write('ERROR: invalid graph type.\n') 219 sys.exit(1) 220 grid(True) 221 if self.image_output: 222 savefig(self.output_name) 223 if self.screen_output: 224 show()
225