[perf-web] Error report uploads
- From: Owen Taylor <otaylor src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [perf-web] Error report uploads
- Date: Tue, 23 Sep 2014 14:02:38 +0000 (UTC)
commit 9968259345038ec83e30db10aa3f78a07abdcd47
Author: Owen W. Taylor <otaylor fishsoup net>
Date: Tue Sep 23 09:55:39 2014 -0400
Error report uploads
Allow controller machines to post error reports, which have, instead of
metrics, a error message, and a JSON-formatted syslog from the target
machine.
Display the error message and a link to the log on the target web page.
logs/.gitignore | 1 +
metrics/models.py | 1 +
metrics/views.py | 248 +++++++++++++++++++++++++++++++++++++----
perf/urls.py | 1 +
static/main.js | 83 +++++++++-----
static/perf.css | 5 +
templates/metrics/target.html | 13 ++-
7 files changed, 299 insertions(+), 53 deletions(-)
---
diff --git a/logs/.gitignore b/logs/.gitignore
new file mode 100644
index 0000000..72e8ffc
--- /dev/null
+++ b/logs/.gitignore
@@ -0,0 +1 @@
+*
diff --git a/metrics/models.py b/metrics/models.py
index d56f660..6c0ce90 100644
--- a/metrics/models.py
+++ b/metrics/models.py
@@ -17,6 +17,7 @@ class Report(models.Model):
target = models.ForeignKey(Target)
revision = models.CharField(max_length=64)
pull_time = models.DateTimeField()
+ error = models.CharField(max_length=255)
class Value(models.Model):
report = models.ForeignKey(Report)
diff --git a/metrics/views.py b/metrics/views.py
index cd33ef9..3ad040b 100644
--- a/metrics/views.py
+++ b/metrics/views.py
@@ -6,6 +6,7 @@ import sys
from django.conf import settings
from django.db.models import Min, Max
+from django.core.exceptions import ObjectDoesNotExist
from django.template import Context, loader
from django.http import HttpResponse, HttpResponseNotFound, HttpResponseBadRequest
from django.utils import timezone
@@ -30,7 +31,9 @@ else:
def home(request):
t = loader.get_template('metrics/home.html')
- time_range = Report.objects.aggregate(min=Min('pull_time'), max=Max('pull_time'));
+ time_range = Report.objects \
+ .filter(error='') \
+ .aggregate(min=Min('pull_time'), max=Max('pull_time'));
if time_range['min'] is None:
time_range['min'] = time_range['max'] = timezone.now()
@@ -54,7 +57,8 @@ def metric(request, metric_name):
t = loader.get_template('metrics/metric.html')
time_range = Report.objects \
- .aggregate(min=Min('pull_time'), max=Max('pull_time'));
+ .filter(error='') \
+ .aggregate(min=Min('pull_time'), max=Max('pull_time'));
if time_range['min'] is None:
time_range['min'] = time_range['max'] = timezone.now()
@@ -76,11 +80,19 @@ def target(request, machine_name, partition_name, tree_name, testset_name):
t = loader.get_template('metrics/target.html')
time_range = Report.objects \
- .filter(target__name=target.name) \
+ .filter(target__name=target.name, error='') \
.aggregate(min=Min('pull_time'), max=Max('pull_time'));
if time_range['min'] is None:
time_range['min'] = time_range['max'] = timezone.now()
+ error_report = None
+ error_time = None
+ try:
+ error_report = Report.objects.filter(target__name=target_name).exclude(error='').latest('pull_time')
+ error_time = unix_time(error_report.pull_time)
+ except ObjectDoesNotExist:
+ pass
+
c = Context({
'page_name': 'target',
'target': target,
@@ -88,6 +100,8 @@ def target(request, machine_name, partition_name, tree_name, testset_name):
'partition': target.partition,
'tree': target.tree,
'testset': target.testset,
+ 'error_report': error_report,
+ 'error_time': error_time,
'metrics': config.Metric.all(),
'min_time': unix_time(time_range['min']),
'max_time': unix_time(time_range['max']),
@@ -344,18 +358,25 @@ def process_report(data, machine_name):
except ValueError:
raise ValidationError("Can't parse property 'pullTime'")
- metrics_data = child_array(data, 'metrics')
- for metric_data in metrics_data:
- if not isinstance(metric_data, dict):
- raise ValidationError("metric is not an object")
+ error = None
+ log = None
+ metrics_data = None
+ if 'error' in data:
+ error = child_string(data, 'error')
+ log = child_array(data, 'log')
+ else:
+ metrics_data = child_array(data, 'metrics')
+ for metric_data in metrics_data:
+ if not isinstance(metric_data, dict):
+ raise ValidationError("metric is not an object")
- metric_name = child_string(metric_data, 'name')
- try:
- metric = config.Metric.get(metric_name)
- except KeyError:
- raise ValidationError("unknown metric '%s'" % metric_name)
+ metric_name = child_string(metric_data, 'name')
+ try:
+ metric = config.Metric.get(metric_name)
+ except KeyError:
+ raise ValidationError("unknown metric '%s'" % metric_name)
- metric_value = child_number(metric_data, 'value')
+ metric_value = child_number(metric_data, 'value')
# Now actually store into the database
@@ -368,20 +389,30 @@ def process_report(data, machine_name):
})
report = Report(target=target_dbobj,
revision=revision,
- pull_time=pull_time)
+ pull_time=pull_time,
+ error=error if error is not None else '')
report.save()
+ if log is not None:
+ log_dir = os.path.join(settings.LOG_ROOT, target_name.replace('/', '-'))
+ if not os.path.exists(log_dir):
+ os.makedirs(log_dir)
+ log_path = os.path.join(log_dir,
+ pull_time.strftime('%Y-%m-%d-%H:%M:%S') + '-' + revision + '.json')
+ with open(log_path, 'w') as fp:
+ json.dump(log, fp)
+
+ if metrics_data is not None:
+ for metric_data in metrics_data:
+ metric_name = child_string(metric_data, 'name')
+ metric = config.Metric.get(metric_name)
+ metric_value = child_number(metric_data, 'value')
- for metric_data in metrics_data:
- metric_name = child_string(metric_data, 'name')
- metric = config.Metric.get(metric_name)
- metric_value = child_number(metric_data, 'value')
-
- (metric_dbobj, _) = Metric.objects.get_or_create(name=metric_name)
+ (metric_dbobj, _) = Metric.objects.get_or_create(name=metric_name)
- value = Value(report=report,
- metric=metric_dbobj,
- value=metric_value)
- value.save()
+ value = Value(report=report,
+ metric=metric_dbobj,
+ value=metric_value)
+ value.save()
@require_POST
@csrf_exempt
@@ -414,3 +445,172 @@ def upload(request):
return HttpResponseBadRequest(e.message)
return HttpResponse("OK\n")
+
+_PRIORITY_RE = re.compile('^[0-7]$')
+_LINE_RE = re.compile('^([^\n]*(?:\n[^\n]*){2})\n')
+_ESCAPE_RE = re.compile(r'\0x1b[0-9;]+m')
+# C0 and C1 control characters, DEL, except for \t\n
+_UNICODE_CONTROL_RE = re.compile(r'[\x00-\x08\x0A-\x1F\x7F-\x9F]')
+
+def escape_character(m):
+ c = m.group(0)
+ if c == '\n':
+ return '\\n'
+ elif c == '\r':
+ return '\\r'
+ else:
+ return '\\x%02x' % ord(c)
+
+def log(request, machine_name, partition_name, tree_name, testset_name, report_id, format):
+ target_name = machine_name + '/' + partition_name + '/' + tree_name + '/' + testset_name
+ try:
+ target = config.Target.get(target_name)
+ except KeyError:
+ return HttpResponseNotFound("No such target")
+
+ try:
+ report = Report.objects.get(id=report_id,
+ target__name=target.name)
+ except ObjectDoesNotExist:
+ return HttpResponseNotFound("No such log")
+
+ log_path = os.path.join(settings.LOG_ROOT,
+ target.name.replace('/', '-'),
+ report.pull_time.strftime('%Y-%m-%d-%H:%M:%S') + '-' + report.revision + '.json')
+ try:
+ fp = open(log_path, 'r')
+ except IOError, e:
+ return HttpResponseNotFound("Log has been purged")
+
+ if format == 'json':
+ # Could using StreamingHttpResponse; as below Django will just
+ # pile the chunks up in an array
+ response = HttpResponse(content_type="application/json")
+ while True:
+ buf = fp.read(32*1024)
+ if buf == '':
+ break
+ response.write(buf)
+ else:
+ if format == 'html':
+ response = HttpResponse(content_type="text/html; charset=utf-8")
+ response.write('''<!DOCTYPE html><html>
+<head>
+ <title>%s - %s</title>
+ <style type="text/css">
+ .error { color: red; }
+ .notice { font-weight: bold; }
+ </style>
+</head>
+<body><pre>''' % (report.pull_time.strftime('%Y-%m-%d %H:%M:%S'), target_name))
+ else:
+ response = HttpResponse(content_type="text/plain; charset=utf-8")
+ log = json.load(fp)
+ for record in log:
+ # We write out log records in roughly the same format as
+ # the journalctl -o iso-short format
+ try:
+ time = long(record['_SOURCE_REALTIME_TIMESTAMP'])
+ except KeyError:
+ time = 0
+ if time == 0:
+ for f in record['__CURSOR'].split(';'):
+ if f.startswith('t='):
+ time = long(f[2:], 16)
+ dt = datetime.utcfromtimestamp(time/1000000)
+ # This is like journalctl -o short-iso; default is '%b %d %H:%M:%S'
+ response.write(dt.strftime('%Y-%m-%dT%H:%M:%S%z '))
+
+ # journalctl shows the hostname here - that isn't interesting for us
+ #
+ # try:
+ # response.write(record['_HOSTNAME'])
+ # except KeyError:
+ # pass
+ #
+ # response.write(' ')
+
+ try:
+ response.write(record['SYSLOG_IDENTIFIER'])
+ except KeyError:
+ response.write(record['_COMM'])
+
+ pid = None
+ try:
+ pid = record['_PID']
+ except KeyError:
+ try:
+ pid = record['_SYSLOG_PID']
+ except KeyError:
+ pass
+ if pid != None:
+ response.write('[')
+ response.write(pid)
+ response.write(']')
+
+ response.write(': ')
+
+ message = record['MESSAGE']
+
+ # If the message was an escaped blob, try to convert it to text
+ if type(message) == list:
+ message = ''.join((chr(x) for x in message))
+ try:
+ message = unicode(message, 'UTF-8')
+ except UnicodeDecodeError:
+ # journalctl falls back to printing the bytes, this seems more useful
+ message = repr(message)[1:-1]
+
+ # 300 characters or 3 lines, whichever is less; journalctl does
+ # a more sophisticated approach to ellipsization where it ellipsizes
+ # middle lines
+ if len(message) > 300:
+ message = message[0:300] + "..."
+ m = _LINE_RE.match(message)
+ if m:
+ message = m.group(1) + "..."
+
+ # For multi-line log messages, journalctl indents subsequent lines
+ # with spaces to have the same indent as the first line has
+ # pre-message text, but multi-line log messages are quite rare
+ # so we skip that
+
+ # Remove ANSI color escapes and replace tabs with 8 spaces
+ message = _ESCAPE_RE.sub('', message)
+ message = message.replace("\t", " ")
+
+ # Escape control characters; again journalctl would just print
+ # the message as bytes
+ message = _UNICODE_CONTROL_RE.sub(escape_character, message)
+
+ # For HTML mode, use bold/red to mark different priorities
+ if format == 'html':
+ priority = 6 # Info
+ try:
+ priority_str = record['PRIORITY']
+ if _PRIORITY_RE.match(priority_str):
+ priority = int(priority_str)
+ except KeyError:
+ pass
+
+ if priority <= 3: # error
+ response.write('<span class="error">');
+ response.write(message)
+ response.write('</span>')
+ elif priority <= 5: #notice
+ response.write('<span class="notice">');
+ response.write(message)
+ response.write('</span>')
+ else:
+ response.write(message)
+ else:
+ response.write(message)
+
+ response.write('\n')
+
+ if format == 'html':
+ response.write('</pre></body')
+
+ fp.close()
+
+ return response
diff --git a/perf/urls.py b/perf/urls.py
index 6c1de22..2da9654 100644
--- a/perf/urls.py
+++ b/perf/urls.py
@@ -9,6 +9,7 @@ urlpatterns = patterns('',
url(r'^machines$', 'metrics.views.machines'),
url(r'^metric/(?P<metric_name>[^/]+)$', 'metrics.views.metric'),
url(r'^target/(?P<machine_name>[^/]+)/(?P<partition_name>[^/]+)/(?P<tree_name>[^/]+)/(?P<testset_name>[^/]+)$',
'metrics.views.target'),
+
url(r'^log/(?P<machine_name>[^/]+)/(?P<partition_name>[^/]+)/(?P<tree_name>[^/]+)/(?P<testset_name>[^/]+)/(?P<report_id>\d+).(?P<format>txt|json|html)$',
'metrics.views.log'),
# target=MACHINE/PARTITION/TREE/TESTSET, metric=METRIC, start=YYYY-MM-YY, end=YYYY-MM-YY,
group=none|hour6|day|week|month
url(r'^api/values$', 'metrics.views.values'),
url(r'^api/upload$', 'metrics.views.upload')
diff --git a/static/main.js b/static/main.js
index 029b829..d6ae9b4 100644
--- a/static/main.js
+++ b/static/main.js
@@ -1633,6 +1633,13 @@ PerfDisplay.prototype.onWindowLoaded = function() {
this.refresh();
}.bind(this));
+ var failureReference = $( "#failureReference" ).get(0);
+ if (failureReference != null) {
+ var failureTime = Number(failureReference.getAttribute("data-time"));
+ this.failureRevision = failureReference.getAttribute("data-revision");
+ this.fetchRevisions(failureTime, failureTime);
+ }
+
this.updateHints();
if (!this.loadedRanges.isEmpty())
@@ -1774,31 +1781,10 @@ PerfDisplay.prototype.refreshDetails = function() {
// In order to map revisions to builds and figure out what builds were
// between detailsInfo.lastRevision and detailsInfo.revision, we need to
- // have the index.json loaded for each day between the times, with a
- // bit of extra padding added on the sides. (We especially need padding
- // before since the build might have a date of the day before the
- // test controller pulled the result.)
-
- var startDate;
- if (this.detailsInfo.lastTime != null)
- startDate = new Date(this.detailsInfo.lastTime * 1000.);
- else
- startDate = new Date(this.detailsInfo.time * 1000.);
-
- var endDate = new Date(this.detailsInfo.time * 1000.);
-
- if (startDate.getUTCHours() == 0)
- startDate.setTime(startDate.getTime() - DAY_MSECS);
- if (endDate.getUTCHours() == 23)
- endDate.setTime(startDate.getTime() + DAY_MSECS);
-
- TIME_OPS['day'].truncate(startDate);
- TIME_OPS['day'].truncate(endDate);
+ // load all index.json's that could affect the time.
- while (startDate.getTime() <= endDate.getTime()) {
- this.fetchRevisions(startDate);
- TIME_OPS['day'].next(startDate);
- }
+ this.fetchRevisions(this.detailsInfo.lastTime != null ? this.detailsInfo.lastTime :
this.detailsInfo.time,
+ this.detailsInfo.time);
}
// Find all the build IDs starting from the one after lastBuild up to build.
@@ -1845,6 +1831,11 @@ PerfDisplay.prototype.buildsInRange = function(lastBuild, build) {
return builds;
}
+function buildPathToBuildId(path) {
+ var parts = path.split("/");
+ return parts[0] + parts[1] + parts[2] + "." + parts[3];
+}
+
// Fills in the commits into the details area.
// If we haven't loaded the necessary index.json from build.gnome.org, returns false
PerfDisplay.prototype.updateDetails = function() {
@@ -1867,8 +1858,7 @@ PerfDisplay.prototype.updateDetails = function() {
if (builds == null)
return false;
- var parts = build.split("/");
- var id = parts[0] + parts[1] + parts[2] + "." + parts[3];
+ var id = buildPathToBuildId(build);
$( "#detailsLink" )
.attr('href', 'https://build.gnome.org/#/build/' + id)
.text(id);
@@ -1966,7 +1956,23 @@ PerfDisplay.prototype.fillDetails = function(modifiedArrays) {
}
}
-PerfDisplay.prototype.fetchRevisions = function(date) {
+PerfDisplay.prototype.updateFailure = function() {
+ if (this.failureRevision == null)
+ return;
+
+ var build = this.revisions[this.failureRevision];
+ if (build == null)
+ return;
+
+ var buildId = buildPathToBuildId(build);
+
+ $( "<a target='buildDetails'/>" )
+ .text( buildId )
+ .attr('href', 'https://build.gnome.org/#/build/' + buildId)
+ .appendTo($( "#failureReference" ).empty());
+}
+
+PerfDisplay.prototype._fetchRevisionsForDay = function(date) {
var datePath = date.getUTCFullYear() + '/' + pad(date.getUTCMonth() + 1) + '/' + pad(date.getUTCDate());
if (datePath in this.loadedRevisions)
return;
@@ -1989,9 +1995,32 @@ PerfDisplay.prototype.fetchRevisions = function(date) {
}
this.buildCount[datePath] = buildCount;
this.updateDetails();
+ this.updateFailure();
}.bind(this)});
}
+// Fetch all the index.json information for the specified range of times,
+// with bit of extra padding added on the sides. (We especially need
+// padding before since the build might have a date of the day before the
+// test controller pulled the result.)
+PerfDisplay.prototype.fetchRevisions = function(startTime, endTime) {
+ var startDate = new Date(startTime * 1000.);
+ var endDate = new Date(endTime * 1000.);
+
+ if (startDate.getUTCHours() == 0)
+ startDate.setTime(startDate.getTime() - DAY_MSECS);
+ if (endDate.getUTCHours() == 23)
+ endDate.setTime(startDate.getTime() + DAY_MSECS);
+
+ TIME_OPS['day'].truncate(startDate);
+ TIME_OPS['day'].truncate(endDate);
+
+ while (startDate.getTime() <= endDate.getTime()) {
+ this._fetchRevisionsForDay(startDate);
+ TIME_OPS['day'].next(startDate);
+ }
+}
+
PerfDisplay.prototype.updateHints = function() {
$( "#hintPlus" ).toggleClass('hint-disabled', this.rangeType == 'day');
$( "#hintMinus" ).toggleClass('hint-disabled', this.rangeType == 'year');
diff --git a/static/perf.css b/static/perf.css
index f992b76..4e0822f 100644
--- a/static/perf.css
+++ b/static/perf.css
@@ -282,6 +282,7 @@ body.details-showing #mainRight {
#targetDetails {
border-collapse: collapse;
margin-top: 0.5em;
+ font-size: 13px;
}
#targetDetails td, #targetDetails th {
@@ -297,6 +298,10 @@ body.details-showing #mainRight {
font-weight: bold;
}
+#targetDetails th.target-failed {
+ color: #aa0000;
+}
+
#homePage #aboveMainLeft {
top: 90px;
}
diff --git a/templates/metrics/target.html b/templates/metrics/target.html
index 362f89e..a9be963 100644
--- a/templates/metrics/target.html
+++ b/templates/metrics/target.html
@@ -34,10 +34,19 @@
<td class="target-detail">{{ testset.name }}</td>
<td class="target-extra">{{ testset.description }}</td>
</tr>
+ {% if error_report %}
+ <tr>
+ {% if error_time > max_time %}
+ <th class="target-failed">Failed</th>
+ {% else %}
+ <th>Last failure</th>
+ {% endif %}
+ <td id="failureReference" class="target-detail target-failed" data-time="{{error_time}}"
data-revision="{{error_report.revision}}">{{ error_report.pull_time|date:"Y-m-d H:i" }}</td>
+ <td class="target-extra target-failed">{{ error_report.error }} (<a
href="/log/{{error_report.target.name}}/{{error_report.id}}.html">Log</a>)</td>
+ </tr>
+ {% endif %}
</table>
-
</div>
-
<div id="aboveMainLeft">
<span id="rangeLabels">
<a id="dayLink" href="#" onclick="setRange(event, this, 'day');">Day</a>
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]