File: //sbin/ras-mc-ctl
#!/usr/bin/perl -w
#******************************************************************************
# Copyright (c) 2013 Mauro Carvalho Chehab <[email protected]>
#
# This tool is a modification of the edac-ctl, written as part of the
# edac-utils:
# Copyright (C) 2003-2006 The Regents of the University of California.
# Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
# Written by Mark Grondona <[email protected]>
# UCRL-CODE-230739.
#
# This version uses the new EDAC v 3.0.0 and upper API, with adds proper
# representation for the memory controllers found on Intel designs after
# 2002. It requires Linux Kernel 3.5 or upper to work.
#
# This is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
# for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
#****************************************************************************/
use strict;
use File::Basename;
use File::Find;
use Getopt::Long;
use POSIX;
my $dbname = "/var/lib/rasdaemon/ras-mc_event.db";
my $prefix = "/usr";
my $sysconfdir = "/etc";
my $dmidecode = find_prog ("dmidecode");
my %conf = ();
my %bus = ();
my %dimm_size = ();
my %dimm_node = ();
my %dimm_label_file = ();
my %dimm_location = ();
my %csrow_size = ();
my %rank_size = ();
my %csrow_ranks = ();
my %dimm_ce_count = ();
my %dimm_ue_count = ();
my @layers;
my @max_pos;
my @max_csrow;
my $item_size;
my $prog = basename $0;
$conf{labeldb} = "$sysconfdir/ras/dimm_labels.db";
$conf{labeldir} = "$sysconfdir/ras/dimm_labels.d";
$conf{mbconfig} = "$sysconfdir/ras/mainboard";
my $status = 0;
my $usage = <<EOF;
Usage: $prog [OPTIONS...]
--quiet Quiet operation.
--mainboard Print mainboard vendor and model for this hardware.
--status Print status of EDAC drivers.
--print-labels Print Motherboard DIMM labels to stdout.
--guess-labels Print DMI labels, when bank locator is available.
--register-labels Load Motherboard DIMM labels into EDAC driver.
--delay=N Delay N seconds before writing DIMM labels.
--labeldb=DB Load label database from file DB.
--layout Display the memory layout.
--summary Presents a summary of the logged errors.
--errors Shows the errors stored at the error database.
--error-count Shows the corrected and uncorrected error counts using sysfs.
--help This help message.
EOF
parse_cmdline();
if ( $conf{opt}{mainboard} || $conf{opt}{print_labels}
|| $conf{opt}{register_labels} || $conf{opt}{display_memory_layout}
|| $conf{opt}{guess_dimm_label} || $conf{opt}{error_count}) {
get_mainboard_info();
if ($conf{opt}{mainboard} eq "report") {
print "$prog: mainboard: ",
"$conf{mainboard}{vendor} model $conf{mainboard}{model}\n";
}
if ($conf{opt}{print_labels}) {
print_dimm_labels ();
}
if ($conf{opt}{register_labels}) {
register_dimm_labels ();
}
if ($conf{opt}{display_memory_layout}) {
display_memory_layout ();
}
if ($conf{opt}{guess_dimm_label}) {
guess_dimm_label ();
}
if ($conf{opt}{error_count}) {
display_error_count ();
}
}
if ($conf{opt}{status}) {
$status = print_status ();
exit ($status ? 0 : 1);
}
if ($conf{opt}{summary}) {
summary ();
}
if ($conf{opt}{errors}) {
errors ();
}
exit (0);
sub parse_cmdline
{
$conf{opt}{mainboard} = '';
$conf{opt}{print_labels} = 0;
$conf{opt}{register_labels} = 0;
$conf{opt}{status} = 0;
$conf{opt}{quiet} = 0;
$conf{opt}{delay} = 0;
$conf{opt}{display_memory_layout} = 0;
$conf{opt}{guess_dimm_label} = 0;
$conf{opt}{summary} = 0;
$conf{opt}{errors} = 0;
$conf{opt}{error_count} = 0;
my $rref = \$conf{opt}{report};
my $mref = \$conf{opt}{mainboard};
Getopt::Long::Configure ("bundling");
my $rc = GetOptions ("mainboard:s" => sub { $$mref = $_[1]||"report" },
"help" => sub {usage (0)},
"quiet" => \$conf{opt}{quiet},
"print-labels" => \$conf{opt}{print_labels},
"guess-labels" => \$conf{opt}{guess_dimm_label},
"register-labels" => \$conf{opt}{register_labels},
"delay:s" => \$conf{opt}{delay},
"labeldb=s" => \$conf{labeldb},
"status" => \$conf{opt}{status},
"layout" => \$conf{opt}{display_memory_layout},
"summary" => \$conf{opt}{summary},
"errors" => \$conf{opt}{errors},
"error-count" => \$conf{opt}{error_count}
);
usage(1) if !$rc;
usage (0) if !grep $conf{opt}{$_}, keys %{$conf{opt}};
if ($conf{opt}{delay} && !$conf{opt}{register_labels}) {
log_error ("Only use --delay with --register-labels\n");
exit (1);
}
}
sub usage
{
my ($rc) = @_;
print "$usage\n";
exit ($rc);
}
sub run_cmd
{
my @args = @_;
system ("@args");
return ($?>>8);
}
sub print_status
{
my $status = 0;
open (MODULES, "/proc/modules")
or die "Unable to open /proc/modules: $!\n";
while (<MODULES>) {
$status = 1 if /_edac/;
}
print "$prog: drivers ", ($status ? "are" : "not"), " loaded.\n"
unless $conf{opt}{quiet};
return ($status);
}
sub parse_dimm_nodes
{
my $file = $File::Find::name;
if (($file =~ /max_location$/)) {
open IN, $file;
my $location = <IN>;
close IN;
my @temp = split(/ /, $location);
$layers[0] = "mc";
if (m,/mc/mc(\d+),) {
$max_pos[0] = $1 if (!exists($max_pos[0]) || $1 > $max_pos[0]);
} else {
$max_pos[0] = 0 if (!exists($max_pos[0]));
}
for (my $i = 0; $i < scalar(@temp); $i += 2) {
$layers[$i / 2 + 1] = $temp[$i];
$max_pos[$i / 2 + 1] = $temp[$i + 1];
}
return;
}
if ($file =~ /size_mb$/) {
my $mc = $file;
$mc =~ s,.*mc(\d+).*,$1,;
my $csrow = $file;
$csrow =~ s,.*csrow(\d+).*,$1,;
open IN, $file;
my $size = <IN>;
close IN;
my $str_loc = join(':', $mc, $csrow);
$csrow_size{$str_loc} = $size;
return;
}
if ($file =~ /location$/) {
my $mc = $file;
$mc =~ s,.*mc(\d+).*,$1,;
my $dimm = $file;
$dimm =~ s,.*dimm(\d+).*,$1,;
open IN, $file;
my $location = <IN>;
close IN;
my @pos;
# Get the name of the hierarchy labels
if (!@layers) {
my @temp = split(/ /, $location);
$max_pos[0] = 0;
$layers[0] = "mc";
for (my $i = 0; $i < scalar(@temp); $i += 2) {
$layers[$i / 2 + 1] = $temp[$i];
$max_pos[$i / 2 + 1] = 0;
}
}
my @temp = split(/ /, $location);
for (my $i = 1; $i < scalar(@temp); $i += 2) {
$pos[$i / 2] = $temp[$i];
if ($pos[$i / 2] > $max_pos[$i / 2]) {
$max_pos[$i / 2 + 1] = $pos[$i / 2];
}
}
if ($mc > $max_pos[0]) {
$max_pos[0] = $mc;
}
# Get DIMM size
$file =~ s/dimm_location/size/;
open IN, $file;
my $size = <IN>;
close IN;
my $str_loc = join(':', $mc, @pos);
$dimm_size{$str_loc} = $size;
$dimm_node{$str_loc} = $dimm;
$file =~ s/size/dimm_label/;
$dimm_label_file{$str_loc} = $file;
$dimm_location{$str_loc} = $location;
my $count;
$file =~s/dimm_label/dimm_ce_count/;
if (-e $file) {
open IN, $file;
chomp($count = <IN>);
close IN;
} else {
log_error ("dimm_ce_count not found in sysfs. Old kernel?\n");
exit -1;
}
$dimm_ce_count{$str_loc} = $count;
$file =~s/dimm_ce_count/dimm_ue_count/;
if (-e $file) {
open IN, $file;
chomp($count = <IN>);
close IN;
} else {
log_error ("dimm_ue_count not found in sysfs. Old kernel?\n");
exit -1;
}
$dimm_ue_count{$str_loc} = $count;
return;
}
}
sub guess_product {
my $pvendor = undef;
my $pname = undef;
if (open (VENDOR, "/sys/class/dmi/id/product_vendor")) {
$pvendor = <VENDOR>;
close VENDOR;
chomp($pvendor);
}
if (open (NAME, "/sys/class/dmi/id/product_name")) {
$pname = <NAME>;
close NAME;
chomp($pname);
}
return ($pvendor, $pname);
}
sub get_mainboard_info {
my ($vendor, $model);
my ($pvendor, $pname);
if ($conf{opt}{mainboard} && $conf{opt}{mainboard} ne "report") {
($vendor, $model) = split (/[: ]/, $conf{opt}{mainboard}, 2);
}
if (!$vendor || !$model) {
($vendor, $model) = guess_vendor_model ();
}
$conf{mainboard}{vendor} = $vendor;
$conf{mainboard}{model} = $model;
($pvendor, $pname) = guess_product ();
# since product vendor is rare, use mainboard's vendor
if ($pvendor) {
$conf{mainboard}{product_vendor} = $pvendor;
} else {
$conf{mainboard}{product_vendor} = $vendor;
}
$conf{mainboard}{product_name} = $pname if $pname;
}
sub guess_vendor_model_dmidecode {
my ($vendor, $model);
my ($system_vendor, $system_model);
my $line = 0;
$< == 0 || die "Must be root to run dmidecode\n";
open (DMI, "$dmidecode |") or die "failed to run $dmidecode: $!\n";
$vendor = $model = "";
LINE:
while (<DMI>) {
$line++;
/^(\s*)(board|base board|system) information/i || next LINE;
my $indent = $1;
my $type = $2;
while ( <DMI> ) {
/^(\s*)/;
$1 lt $indent && last LINE;
$indent = $1;
if ($type eq "system") {
/(?:manufacturer|vendor):\s*(.*\S)\s*/i && ( $system_vendor = $1 );
/product(?: name)?:\s*(.*\S)\s*/i && ( $system_model = $1 );
} else {
/(?:manufacturer|vendor):\s*(.*\S)\s*/i && ( $vendor = $1 );
/product(?: name)?:\s*(.*\S)\s*/i && ( $model = $1 );
}
last LINE if ($vendor && $model);
}
}
close (DMI);
$vendor = $system_vendor if ($vendor eq "");
$model = $system_model if ($model eq "");
return ($vendor, $model);
}
sub guess_vendor_model_sysfs {
#
# Try to look up DMI information in sysfs
#
open (VENDOR, "/sys/class/dmi/id/board_vendor") or return undef;
open (MODEL, "/sys/class/dmi/id/board_name") or return undef;
my ($vendor, $model) = (<VENDOR>, <MODEL>);
close (VENDOR);
close (MODEL);
return undef unless ($vendor && $model);
chomp ($vendor, $model);
return ($vendor, $model);
}
sub parse_mainboard_config
{
my ($file) = @_;
my %hash = ();
my $line = 0;
open (CFG, "$file") or die "Failed to read mainboard config: $file: $!\n";
while (<CFG>) {
$line++;
chomp; # remove newline
s/^((?:[^'"#]*(?:(['"])[^\2]*\2)*)*)#.*/$1/; # remove comments
s/^\s+//; # remove leading space
s/\s+$//; # remove trailing space
next unless length; # skip blank lines
if (my ($key, $val) = /^\s*([-\w]+)\s*=\s*(.*)/) {
$hash{$key}{val} = $val;
$hash{$key}{line} = $line;
next;
}
return undef;
}
close (CFG) or &log_error ("close $file: $!\n");
return \%hash;
}
sub guess_vendor_model {
my ($vendor, $model);
#
# If mainboard config file exists then parse it
# to get the vendor and model information.
#
if (-f $conf{mbconfig} ) {
my $cfg = &parse_mainboard_config ($conf{mbconfig});
# If mainboard config file specified a script, then try to
# run the specified script or executable:
#
if ($cfg->{"script"}) {
$cfg = &parse_mainboard_config ("$cfg->{script}{val} |");
die "Failed to run mainboard script\n" if (!$cfg);
}
return ($cfg->{vendor}{val}, $cfg->{model}{val});
}
($vendor, $model) = &guess_vendor_model_sysfs ();
return ($vendor, $model) if ($vendor && $model);
return (&guess_vendor_model_dmidecode ());
}
sub guess_dimm_label {
open (DMI, "$dmidecode |") or die "failed to run $dmidecode: $!\n";
LINE:
while (<DMI>) {
/^(\s*)memory device$/i || next LINE;
my ($dimm_label, $dimm_addr);
while (<DMI>) {
if (/^\s*(locator|bank locator)/i) {
my $indent = $1;
$indent =~ tr/A-Z/a-z/;
if ($indent eq "locator") {
/(?:locator):\s*(.*\S)\s*/i && ( $dimm_label = $1 );
}
if ($indent eq "bank locator") {
/(?:bank locator):\s*(.*\S)\s*/i && ( $dimm_addr = $1 );
}
}
if ($dimm_label && $dimm_addr) {
printf "memory stick '%s' is located at '%s'\n",
$dimm_label, $dimm_addr;
next LINE;
}
next LINE if (/^\s*\n/);
}
}
close (DMI);
}
sub parse_dimm_labels_file
{
my ($lh, $num_layers, $lh_prod, $num_layers_prod, $file) = (@_);
my $line = -1;
my $vendor = "";
my @models = ();
my @products = ();
my $num;
open (LABELS, "$file")
or die "Unable to open label database: $file: $!\n";
while (<LABELS>) {
$line++;
next if /^#/;
chomp;
s/^\s+//;
s/\s+$//;
next unless length;
if (/vendor\s*:\s*(.*\S)\s*/i) {
$vendor = lc $1;
@models = ();
@products = ();
$num = 0;
next;
}
if (/(model|board)\s*:\s*(.*)$/i) {
!$vendor && die "$file: line $line: MB model without vendor\n";
@models = grep { s/\s*(.*)\s*$/$1/ } split(/[,;]+/, $2);
@products = ();
$num = 0;
next;
}
if (/(product)\s*:\s*(.*)$/i) {
!$vendor && die "$file: line $line: product without vendor\n";
@models = ();
@products = grep { s/\s*(.*)\s*$/$1/ } split(/[,;]+/, $2);
$num = 0;
next;
}
# Allow multiple labels to be specified on a single line,
# separated by ;
for my $str (split /;/) {
$str =~ s/^\s*(.*)\s*$/$1/;
next unless (my ($label, $info) = ($str =~ /^(.*)\s*:\s*(.*)$/i));
unless ($info =~ /\d+(?:[\.\:]\d+)*/) {
log_error ("$file: $line: Invalid syntax, ignoring: \"$_\"\n");
next;
}
for my $target (split (/[, ]+/, $info)) {
my $n;
my ($mc, $top, $mid, $low, $extra) = ($target =~ /(\d+)(?:[\.\:](\d+)){0,1}(?:[\.\:](\d+)){0,1}(?:[\.\:](\d+)){0,1}(?:[\.\:](\d+)){0,1}/);
if (defined($extra)) {
die ("Error: Only up to 3 layers are currently supported on label db \"$file\"\n");
return;
} elsif (!defined($top)) {
die ("Error: The label db \"$file\" is defining a zero-layers machine\n");
return;
} else {
$n = 3;
if (!defined($low)) {
$low = 0;
$n--;
}
if (!defined($mid)) {
$mid = 0;
$n--;
}
map { $lh->{$vendor}{lc $_}{$mc}{$top}{$mid}{$low} = $label }
@models;
map { $lh_prod->{$vendor}{lc $_}{$mc}{$top}{$mid}{$low} = $label }
@products;
}
if (!$num) {
$num = $n;
map { $num_layers->{$vendor}{lc $_} = $num } @models;
map { $num_layers_prod->{$vendor}{lc $_} = $num } @products;
} elsif ($num != $n) {
die ("Error: Inconsistent number of layers at label db \"$file\"\n");
}
}
}
}
close (LABELS) or die "Error from label db \"$file\" : $!\n";
}
sub parse_dimm_labels
{
my %labels = ();
my %num_layers = ();
my %labels_prod = ();
my %num_layers_prod = ();
#
# Accrue all DIMM labels from the labels.db file, as
# well as any files under the labels dir
#
for my $file ($conf{labeldb}, <$conf{labeldir}/*>) {
next unless -r $file;
parse_dimm_labels_file (\%labels, \%num_layers, \%labels_prod, \%num_layers_prod, $file);
}
return (\%labels, \%num_layers, \%labels_prod, \%num_layers_prod);
}
sub read_dimm_label
{
my ($num_layers, $mc, $top, $mid, $low) = @_;
my $sysfs = "/sys/devices/system/edac/mc";
my $pos;
$pos = "$mc:$top:$mid:$low" if ($num_layers == 3);
$pos = "$mc:$top:$mid" if ($num_layers == 2);
$pos = "$mc:$top" if ($num_layers == 1);
if (!defined($dimm_node{$pos})) {
my $label = "$pos missing";
$pos = "";
return ($label, $pos);
}
my $dimm = $dimm_node{$pos};
my $dimm_label_file = $dimm_label_file{$pos};
my $location = $dimm_location{$pos};
return ("label missing", "$pos missing") unless -f $dimm_label_file;
if (!open (LABEL, "$dimm_label_file")) {
warn "Failed to open $dimm_label_file: $!\n";
return ("Error");
}
chomp (my $label = <LABEL> || "");
close (LABEL);
$pos = "mc$mc $location";
return ($label, $pos);
}
sub get_dimm_label_node
{
my ($num_layers, $mc, $top, $mid, $low) = @_;
my $sysfs = "/sys/devices/system/edac/mc";
my $pos = "$mc:$top:$mid:$low";
$pos = "$mc:$top:$mid:$low" if ($num_layers == 3);
$pos = "$mc:$top:$mid" if ($num_layers == 2);
$pos = "$mc:$top" if ($num_layers == 1);
return "" if (!defined($dimm_node{$pos}));
return "$dimm_label_file{$pos}";
}
sub _print_dimm_labels
{
my ($lref, $num_layers, $vendor, $model, $fh, $format) = @_;
for my $mc (sort keys %{$$lref{$vendor}{$model}}) {
for my $top (sort keys %{$$lref{$vendor}{$model}{$mc}}) {
for my $mid (sort keys %{$$lref{$vendor}{$model}{$mc}{$top}}) {
for my $low (sort keys %{$$lref{$vendor}{$model}{$mc}{$top}{$mid}}) {
my $label = $$lref{$vendor}{$model}{$mc}{$top}{$mid}{$low};
my ($rlabel,$loc) = read_dimm_label ($$num_layers{$vendor}{$model}, $mc, $top, $mid, $low);
printf $fh $format, $loc, $label, $rlabel;
}
}
}
}
print $fh "\n";
}
sub print_dimm_labels
{
my $fh = shift || *STDOUT;
my ($lref, $num_layers, $lref_prod, $num_layers_prod) = parse_dimm_labels ();
my $vendor = lc $conf{mainboard}{vendor};
my $model = lc $conf{mainboard}{model};
my $pvendor = lc $conf{mainboard}{product_vendor};
my $pname = lc $conf{mainboard}{product_name};
my $format = "%-35s %-20s %-20s\n";
if (!exists $$lref{$vendor}{$model} && !exists $$lref_prod{$pvendor}{$pname}) {
log_error ("No dimm labels for $conf{mainboard}{vendor} " .
"model $conf{mainboard}{model}\n");
return;
}
my $sysfs_dir = "/sys/devices/system/edac/mc";
find({wanted => \&parse_dimm_nodes, no_chdir => 1}, $sysfs_dir);
printf $fh $format, "LOCATION", "CONFIGURED LABEL", "SYSFS CONTENTS";
if (exists $$lref{$vendor}{$model}) {
_print_dimm_labels($lref, $num_layers, $vendor, $model, $fh, $format);
} elsif (exists $$lref_prod{$pvendor}{$pname}) {
_print_dimm_labels($lref_prod, $num_layers_prod, $pvendor, $pname, $fh, $format);
}
}
sub write_dimm_labels
{
my ($lref, $num_layers, $vendor, $model) = @_;
for my $mc (sort keys %{$$lref{$vendor}{$model}}) {
for my $top (sort keys %{$$lref{$vendor}{$model}{$mc}}) {
for my $mid (sort keys %{$$lref{$vendor}{$model}{$mc}{$top}}) {
for my $low (sort keys %{$$lref{$vendor}{$model}{$mc}{$top}{$mid}}) {
my $file = get_dimm_label_node($$num_layers{$vendor}{$model}, $mc, $top, $mid, $low);
# Ignore sysfs files that don't exist. Might just be
# unpopulated bank.
next unless -f $file;
if (!open (DL, ">$file")) {
warn ("Unable to open $file\n");
next;
}
syswrite DL, $$lref{$vendor}{$model}{$mc}{$top}{$mid}{$low};
close (DL);
}
}
}
}
}
sub register_dimm_labels
{
my ($lref, $num_layers, $lref_prod, $num_layers_prod) = parse_dimm_labels ();
my $vendor = lc $conf{mainboard}{vendor};
my $model = lc $conf{mainboard}{model};
my $pvendor = lc $conf{mainboard}{product_vendor};
my $pname = lc $conf{mainboard}{product_name};
my $sysfs = "/sys/devices/system/edac/mc";
if (!exists $$lref{$vendor}{$model} && !exists $$lref_prod{$pvendor}{$pname}) {
log_error ("No dimm labels for $conf{mainboard}{vendor} " .
"model $conf{mainboard}{model}\n");
return 0;
}
my $sysfs_dir = "/sys/devices/system/edac/mc";
find({wanted => \&parse_dimm_nodes, no_chdir => 1}, $sysfs_dir);
select (undef, undef, undef, $conf{opt}{delay});
if (exists $$lref{$vendor}{$model}) {
write_dimm_labels($lref, $num_layers, $vendor, $model);
} else {
write_dimm_labels($lref_prod, $num_layers_prod, $pvendor, $pname);
}
return 1;
}
sub dimm_display_layer_rev($@);
sub dimm_display_layer_rev($@)
{
my $layer = shift;
my @pos = @_;
$layer++;
if ($layer >= scalar(@pos) - 1) {
my $str_loc = join(':', @pos);
my $size = $dimm_size{$str_loc};
if (!$size) {
$size = 0;
}
my $s = sprintf " %4i MB |", $size;
$item_size = length($s);
return $s;
}
my $s;
for (my $i = 0; $i <= $max_pos[$layer]; $i++) {
$pos[$layer] = $i;
$s .= dimm_display_layer_rev($layer, @pos);
}
return $s;
}
sub dimm_display_layer(@)
{
my @pos = @_;
my $s;
for (my $i = 0; $i <= $max_pos[0]; $i++) {
$pos[0] = $i;
$s .= dimm_display_layer_rev(0, @pos);
}
return $s;
}
sub dimm_display_layer_header($$)
{
my $n_items = 1;
my $scale;
my $layer = shift;
my $tot_items = shift;
my $s;
for (my $i = 0; $i <= $layer; $i++) {
$n_items *= $max_pos[$i] + 1;
}
$scale = $tot_items / $n_items;
my $d = 0;
for (my $i = 0; $i < $n_items; $i++) {
my $val = sprintf("%s%d", $layers[$layer], $d);
$val = substr($val, 0, $scale * $item_size - 2);
my $fillsize = $scale * $item_size - 1 - length($val);
$s .= "|";
$s .= " " x ($fillsize / 2);
$s .= $val;
$s .= " " x ($fillsize - floor($fillsize / 2));
$d++;
if ($d > $max_pos[$layer]) {
$d = 0;
}
}
$s .= "|";
return $s;
}
sub dimm_display_mem()
{
my @pos = @max_pos;
my $sep = "";
my $tot_items = 1;
my $first = 1;
for (my $i = 0; $i < scalar(@pos) - 1; $i++) {
$pos[$i] = 0;
$tot_items *= $max_pos[$i] + 1;
}
my $is_even = $max_pos[scalar(@max_pos) - 1] % 2;
for (my $d = $max_pos[scalar(@max_pos) - 1]; $d >= 0; $d--) {
my $len;
my $s = sprintf("%s%d: |", $layers[scalar(@max_pos) - 1], $d);
my $p1 = length($s) - 1;
$pos[scalar(@pos) - 1] = $d;
$s .= dimm_display_layer(@pos);
$len += length($s);
$sep = "-" x $p1;
$sep .= "+";
$sep .= "-" x ($len - $p1 - 2);
$sep .= "+";
if ($first) {
my $sep1 = " " x $p1;
$sep1 .= "+";
$sep1 .= "-" x ($len - $p1 - 2);
$sep1 .= "+";
printf "$sep1\n";
for (my $layer = 0; $layer < scalar(@pos) - 1; $layer++) {
my $s = sprintf("%s%d: |", $layers[scalar(@max_pos) - 1], 0);
my $p1 = length($s) - 1;
my $msg = " " x $p1;
$msg .= dimm_display_layer_header($layer, $tot_items);
printf "$msg\n";
}
printf "$sep\n" if (!$is_even);
$first = 0;
}
if ($is_even && (($max_pos[scalar(@max_pos) - 1] - $d) % 2 == 0)) {
printf "$sep\n";
}
printf "$s\n";
}
printf "$sep\n";
}
sub fill_csrow_size()
{
foreach my $str_loc (keys %rank_size) {
my @temp = split(/:/, $str_loc);
my $csrow = join(':', $temp[0], $temp[1]);
if ($csrow_ranks{$csrow}) {
$rank_size{$str_loc} = $csrow_size{$csrow} / $csrow_ranks{$csrow};
}
}
}
sub display_memory_layout
{
my $sysfs_dir = "/sys/devices/system/edac/mc";
find({wanted => \&parse_dimm_nodes, no_chdir => 1}, $sysfs_dir);
if (!scalar(%csrow_size)) {
log_error ("No memories found at via edac.\n");
exit -1;
} elsif (!scalar(%dimm_size)) {
fill_csrow_size;
$layers[0] = "mc";
$layers[1] = "csrow";
$layers[2] = "channel";
@max_pos = @max_csrow;
%dimm_size = %rank_size;
}
dimm_display_mem();
}
sub display_error_count
{
my $sysfs_dir = "/sys/devices/system/edac/mc";
my $key;
my $max_width = 0;
my %dimm_labels = ();
find ({wanted => \&parse_dimm_nodes, no_chdir => 1}, $sysfs_dir);
if (!scalar(keys %dimm_node)) {
log_error ("No DIMMs found in /sys or new sysfs EDAC interface not found.\n");
exit -1;
}
foreach $key (keys %dimm_node) {
my $label_width;
open IN, $dimm_label_file{$key};
chomp(my $label = <IN>);
close IN;
$label_width = length $label;
if ($label_width > $max_width) {
$max_width = $label_width;
}
$dimm_labels{$key} = $label;
}
my $string = "Label";
$string .= " " x ($max_width - length $string);
print($string . "\tCE\tUE\n");
foreach $key (keys %dimm_node) {
my $ce_count = $dimm_ce_count{$key};
my $ue_count = $dimm_ue_count{$key};
print("$dimm_labels{$key}\t$ce_count\t$ue_count\n");
}
}
sub find_prog
{
my ($file) = @_;
for my $dir ("/sbin", "/usr/sbin", split ':', $ENV{PATH}) {
return "$dir/$file" if -x "$dir/$file";
}
# log_error ("Failed to find $file in PATH\n");
return "";
}
sub get_extlog_type
{
my @types;
if ($_[0] < 0 || $_[0] > 15) {
return "unknown-type";
}
@types = ("unknown",
"no error",
"single-bit ECC",
"multi-bit ECC",
"single-symbol chipkill ECC",
"multi-symbol chipkill ECC",
"master abort",
"target abort",
"parity error",
"watchdog timeout",
"invalid address",
"mirror Broken",
"memory sparing",
"scrub corrected error",
"scrub uncorrected error",
"physical memory map-out event",
"unknown-type");
return $types[$_[0]];
}
sub get_extlog_severity
{
my @sev;
if ($_[0] < 0 || $_[0] > 3) {
return "unknown-severity";
}
@sev = ("recoverable",
"fatal",
"corrected",
"informational",
"unknown-severity");
return $sev[$_[0]];
}
use constant {
CPER_MEM_VALID_NODE => 0x0008,
CPER_MEM_VALID_CARD => 0x0010,
CPER_MEM_VALID_MODULE => 0x0020,
CPER_MEM_VALID_BANK => 0x0040,
CPER_MEM_VALID_DEVICE => 0x0080,
CPER_MEM_VALID_ROW => 0x0100,
CPER_MEM_VALID_COLUMN => 0x0200,
CPER_MEM_VALID_BIT_POSITION => 0x0400,
CPER_MEM_VALID_REQUESTOR_ID => 0x0800,
CPER_MEM_VALID_RESPONDER_ID => 0x1000,
CPER_MEM_VALID_TARGET_ID => 0x2000,
CPER_MEM_VALID_ERROR_TYPE => 0x4000,
CPER_MEM_VALID_RANK_NUMBER => 0x8000,
CPER_MEM_VALID_CARD_HANDLE => 0x10000,
CPER_MEM_VALID_MODULE_HANDLE => 0x20000,
};
sub get_cper_data_text
{
my $cper_data = $_[0];
my ($validation_bits, $node, $card, $module, $bank, $device, $row, $column, $bit_pos, $requestor_id, $responder_id, $target_id, $rank, $mem_array_handle, $mem_dev_handle) = unpack 'QSSSSSSSSQQQSSS', $cper_data;
my @out;
if ($validation_bits & CPER_MEM_VALID_NODE) {
push @out, (sprintf "node=%d", $node);
}
if ($validation_bits & CPER_MEM_VALID_CARD) {
push @out, (sprintf "card=%d", $card);
}
if ($validation_bits & CPER_MEM_VALID_MODULE) {
push @out, (sprintf "module=%d", $module);
}
if ($validation_bits & CPER_MEM_VALID_BANK) {
push @out, (sprintf "bank=%d", $bank);
}
if ($validation_bits & CPER_MEM_VALID_DEVICE) {
push @out, (sprintf "device=%d", $device);
}
if ($validation_bits & CPER_MEM_VALID_ROW) {
push @out, (sprintf "row=%d", $row);
}
if ($validation_bits & CPER_MEM_VALID_COLUMN) {
push @out, (sprintf "column=%d", $column);
}
if ($validation_bits & CPER_MEM_VALID_BIT_POSITION) {
push @out, (sprintf "bit_position=%d", $bit_pos);
}
if ($validation_bits & CPER_MEM_VALID_REQUESTOR_ID) {
push @out, (sprintf "0x%08x", $requestor_id);
}
if ($validation_bits & CPER_MEM_VALID_RESPONDER_ID) {
push @out, (sprintf "0x%08x", $responder_id);
}
if ($validation_bits & CPER_MEM_VALID_TARGET_ID) {
push @out, (sprintf "0x%08x", $target_id);
}
if ($validation_bits & CPER_MEM_VALID_RANK_NUMBER) {
push @out, (sprintf "rank=%d", $rank);
}
if ($validation_bits & CPER_MEM_VALID_CARD_HANDLE) {
push @out, (sprintf "mem_array_handle=%d", $mem_array_handle);
}
if ($validation_bits & CPER_MEM_VALID_MODULE_HANDLE) {
push @out, (sprintf "mem_dev_handle=%d", $mem_dev_handle);
}
return join (", ", @out);
}
sub get_uuid_le
{
my $out = "";
my @bytes = unpack "C*", $_[0];
my @le16_table = (3, 2, 1, 0, 5, 4, 7, 6, 8, 9, 10, 11, 12, 13, 14, 15);
for (my $i = 0; $i < 16; $i++) {
$out .= sprintf "%.2x", $bytes[$le16_table[$i]];
if ($i == 3 or $i == 5 or $i == 7 or $i == 9) {
$out .= "-";
}
}
return $out;
}
sub summary
{
require DBI;
my ($query, $query_handle, $out);
my ($err_type, $label, $mc, $top, $mid, $low, $count, $msg);
my ($etype, $severity, $etype_string, $severity_string);
my $dbh = DBI->connect("dbi:SQLite:dbname=$dbname", "", "", {});
# Memory controller mc_event errors
$query = "select err_type, label, mc, top_layer,middle_layer,lower_layer, count(*) from mc_event group by err_type, label, mc, top_layer, middle_layer, lower_layer";
$query_handle = $dbh->prepare($query);
$query_handle->execute();
$query_handle->bind_columns(\($err_type, $label, $mc, $top, $mid, $low, $count));
$out = "";
while($query_handle->fetch()) {
$out .= "\t$err_type on DIMM Label(s): '$label' location: $mc:$top:$mid:$low errors: $count\n";
}
if ($out ne "") {
print "Memory controller events summary:\n$out\n";
} else {
print "No Memory errors.\n\n";
}
$query_handle->finish;
# PCIe AER aer_event errors
$query = "select err_type, err_msg, count(*) from aer_event group by err_type, err_msg";
$query_handle = $dbh->prepare($query);
$query_handle->execute();
$query_handle->bind_columns(\($err_type, $msg, $count));
$out = "";
while($query_handle->fetch()) {
$out .= "\t$count $err_type errors: $msg\n";
}
if ($out ne "") {
print "PCIe AER events summary:\n$out\n";
} else {
print "No PCIe AER errors.\n\n";
}
$query_handle->finish;
# extlog errors
$query = "select etype, severity, count(*) from extlog_event group by etype, severity";
$query_handle = $dbh->prepare($query);
$query_handle->execute();
$query_handle->bind_columns(\($etype, $severity, $count));
$out = "";
while($query_handle->fetch()) {
$etype_string = get_extlog_type($etype);
$severity_string = get_extlog_severity($severity);
$out .= "\t$count $etype_string $severity_string errors\n";
}
if ($out ne "") {
print "Extlog records summary:\n$out";
} else {
print "No Extlog errors.\n";
}
$query_handle->finish;
# MCE mce_record errors
$query = "select error_msg, count(*) from mce_record group by error_msg";
$query_handle = $dbh->prepare($query);
$query_handle->execute();
$query_handle->bind_columns(\($msg, $count));
$out = "";
while($query_handle->fetch()) {
$out .= "\t$count $msg errors\n";
}
if ($out ne "") {
print "MCE records summary:\n$out";
} else {
print "No MCE errors.\n";
}
$query_handle->finish;
undef($dbh);
}
sub errors
{
require DBI;
my ($query, $query_handle, $id, $time, $count, $type, $msg, $label, $mc, $top, $mid, $low, $addr, $grain, $syndrome, $detail, $out);
my ($mcgcap,$mcgstatus, $status, $misc, $ip, $tsc, $walltime, $cpu, $cpuid, $apicid, $socketid, $cs, $bank, $cpuvendor, $bank_name, $mcgstatus_msg, $mcistatus_msg, $user_action, $mc_location);
my ($timestamp, $etype, $severity, $etype_string, $severity_string, $fru_id, $fru_text, $cper_data);
my $dbh = DBI->connect("dbi:SQLite:dbname=$dbname", "", "", {});
# Memory controller mc_event errors
$query = "select id, timestamp, err_count, err_type, err_msg, label, mc, top_layer,middle_layer,lower_layer, address, grain, syndrome, driver_detail from mc_event order by id";
$query_handle = $dbh->prepare($query);
if (!$query_handle) {
log_error ("mc_event table missing from $dbname. Run 'rasdaemon --record'.\n");
exit -1
}
$query_handle->execute();
$query_handle->bind_columns(\($id, $time, $count, $type, $msg, $label, $mc, $top, $mid, $low, $addr, $grain, $syndrome, $detail));
$out = "";
while($query_handle->fetch()) {
$out .= "$id $time $count $type error(s): $msg at $label location: $mc:$top:$mid:$low, addr $addr, grain $grain, syndrome $syndrome $detail\n";
}
if ($out ne "") {
print "Memory controller events:\n$out\n";
} else {
print "No Memory errors.\n\n";
}
$query_handle->finish;
# PCIe AER aer_event errors
$query = "select id, timestamp, err_type, err_msg from aer_event order by id";
$query_handle = $dbh->prepare($query);
$query_handle->execute();
$query_handle->bind_columns(\($id, $time, $type, $msg));
$out = "";
while($query_handle->fetch()) {
$out .= "$id $time $type error: $msg\n";
}
if ($out ne "") {
print "PCIe AER events:\n$out\n";
} else {
print "No PCIe AER errors.\n\n";
}
$query_handle->finish;
# Extlog errors
$query = "select id, timestamp, etype, severity, address, fru_id, fru_text, cper_data from extlog_event order by id";
$query_handle = $dbh->prepare($query);
$query_handle->execute();
$query_handle->bind_columns(\($id, $timestamp, $etype, $severity, $addr, $fru_id, $fru_text, $cper_data));
$out = "";
while($query_handle->fetch()) {
$etype_string = get_extlog_type($etype);
$severity_string = get_extlog_severity($severity);
$out .= "$id $timestamp error: ";
$out .= "type=$etype_string, ";
$out .= "severity=$severity_string, ";
$out .= sprintf "address=0x%08x, ", $addr;
$out .= sprintf "fru_id=%s, ", get_uuid_le($fru_id);
$out .= "fru_text='$fru_text', ";
$out .= get_cper_data_text($cper_data) if ($cper_data);
$out .= "\n";
}
if ($out ne "") {
print "Extlog events:\n$out\n";
} else {
print "No Extlog errors.\n\n";
}
$query_handle->finish;
# MCE mce_record errors
$query = "select id, timestamp, mcgcap, mcgstatus, status, addr, misc, ip, tsc, walltime, cpu, cpuid, apicid, socketid, cs, bank, cpuvendor, bank_name, error_msg, mcgstatus_msg, mcistatus_msg, user_action, mc_location from mce_record order by id";
$query_handle = $dbh->prepare($query);
$query_handle->execute();
$query_handle->bind_columns(\($id, $time, $mcgcap,$mcgstatus, $status, $addr, $misc, $ip, $tsc, $walltime, $cpu, $cpuid, $apicid, $socketid, $cs, $bank, $cpuvendor, $bank_name, $msg, $mcgstatus_msg, $mcistatus_msg, $user_action, $mc_location));
$out = "";
while($query_handle->fetch()) {
$out .= "$id $time error: $msg";
$out .= ", CPU $cpuvendor" if ($cpuvendor);
$out .= ", bank $bank_name" if ($bank_name);
$out .= ", mcg $mcgstatus_msg" if ($mcgstatus_msg);
$out .= ", mci $mcistatus_msg" if ($mcistatus_msg);
$out .= ", $mc_location" if ($mc_location);
$out .= ", $user_action" if ($user_action);
$out .= sprintf ", mcgcap=0x%08x", $mcgcap if ($mcgcap);
$out .= sprintf ", mcgstatus=0x%08x", $mcgstatus if ($mcgstatus);
$out .= sprintf ", status=0x%08x", $status if ($status);
$out .= sprintf ", addr=0x%08x", $addr if ($addr);
$out .= sprintf ", misc=0x%08x", $misc if ($misc);
$out .= sprintf ", ip=0x%08x", $ip if ($ip);
$out .= sprintf ", tsc=0x%08x", $tsc if ($tsc);
$out .= sprintf ", walltime=0x%08x", $walltime if ($walltime);
$out .= sprintf ", cpu=0x%08x", $cpu if ($cpu);
$out .= sprintf ", cpuid=0x%08x", $cpuid if ($cpuid);
$out .= sprintf ", apicid=0x%08x", $apicid if ($apicid);
$out .= sprintf ", socketid=0x%08x", $socketid if ($socketid);
$out .= sprintf ", cs=0x%08x", $cs if ($cs);
$out .= sprintf ", bank=0x%08x", $bank if ($bank);
$out .= "\n";
}
if ($out ne "") {
print "MCE events:\n$out\n";
} else {
print "No MCE errors.\n\n";
}
$query_handle->finish;
undef($dbh);
}
sub log_msg { print STDERR "$prog: ", @_ unless $conf{opt}{quiet}; }
sub log_error { log_msg ("Error: @_"); }
# vi: ts=4 sw=4 expandtab