#!/usr/bin/env perl -w # # Program: Website content check # # Author: Matty < matty91 at gmail dot com > # # Current Version: 1.0 # # Revision History: # # Version 1.0 # Initial Release # # Last Updated: 01-05-2007 # # Purpose: # The content-check script can be used to verify that a web server is returning # valid content to clients. This is accomplished by comparing a precomputed # checksum with a current checksum, and can help locate problems with servers # that are accempting connections, but failing to return valid content ( # (e.g., when a bad code deployment occurs). # # License: # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Installation: # Copy the perl script to a suitable location # # Usage: # To use content-check, you will need to first generate a checksum for the site # you would like to monitor. You can generate a checksum for a site by invoking # content-check.pl with the "-g" option and a URL to checksum: # # $ content-check.pl -g http://prefetch.net # Checksum for http://prefetch.net is: c3feb8bcbffff321f7db227a1c11dc7794e2fa70 # # Once you generate a hash, you will need to create a configuration file with # one or more site definitions. Each site definition should take the following # form: # # # This is a comment # [site prefetch.net] # url = http://prefetch.net/index.html # checksum = ac3feb8bcbffff321f7db227a1c11dc7794e2fa70 # header = "Host: prefetch.net" # syslog = "yes" # email = "foo@prefetch.net" # logfile = "/var/log/prefetch.net" # # The site keyword is used to start a site definition, and to provide a logical # description for the site (in the example above, prefetch.net is used for the # description). Each site definition contains one or more key value pairs, # which define the url to use, a precomputed checksum for that URL, an optional # header to include, and one or more actions (e.g., write an entry to syslog, # send an email, or write an entry to a logfile) to take when a checksum fails. # Once the site definition file is created, content-check.pl can be run with # the "-c" option and the site defintion file: # # $ content-check.pl -c site-definitions.cfg # ### Required modules use Getopt::Std; use Sys::Syslog; ### Globals $program = "Content-check"; $wget = "/usr/sfw/bin/wget"; $openssl = "/usr/sfw/bin/openssl"; $sendmail = "/usr/sbin/sendmail"; $verbose = 0; ############################################################################# # Purpose: Parse the arguments passed to the script # # Arguments: # None ############################################################################# sub parseoptions { ### Make sure a configuration file was passed on the command line %options=(); getopts("c:g:v",\%options); ### Check if a config file is present, my $config = $options{c} || ""; my $generate = $options{g}; ### Enable verbose output if requested if ( defined $options{v} ) { $verbose = 1; } #### Print the checksum on the console if ( defined $generate) { my $header = $ARGV[0]; print "Checksum for $generate is: " . computechecksum($generate, $header) . "\n"; exit(1); } ### If the configuration file doesn't exist, let the user know if ( !$config ) { print "ERROR: No configuration file passed on the command line\n"; usage(); exit(1); } return $config; } ############################################################################# # Purpose: Parses the configuration file passed as arg0 # # Arguments: # $_[0] -> Configuration file to parse # $_[1] -> Reference to an array to store the site information ############################################################################# sub parseconfig { my $config = $_[0]; my $sitesref = $_[1]; my $site = ""; if ( !open CONFIGFILE, $config) { die "ERROR: Cannot open $config"; exit(1); } print "Creating site definitions array for entries in $config\n" if ($verbose); ### TO DO: Harden the processing to deal with broken site definition files while ( my $line = ) { chomp($line); ### Get rid of lines with comments if ( $line =~ /^\#$/ ) { next; ### Check for a site that looks like [site foo.com] } elsif ( $line =~ /\s*\[\s*site\s*(.*?)\s*\]/ ) { $site = $1; ### Split key value pairs and stuff them into an associative array } elsif ( ($site) && ( $line =~ /\s*(\w*)\s*\=\s*(.*)/) ) { $$sitesref{$site}{$1} = $2; } } } ############################################################################# # Purpose: Checksums each site passed in the sites associative array # # Arguments: # $_[0] -> Contains a reference to the sites array ############################################################################# sub checksites { my $sitesref = $_[0]; foreach my $site (keys %$sitesref) { print "Processing site definition entry for $site\n" if ($verbose); ### Store the URL and the any custom headers my $url = $$sitesref{$site}{url}; my $header = $$sitesref{$site}{header} || ""; ### Calculate a checksum for the URI my $checksum = computechecksum($url, $header); ### Compare the checksum with the precomputed checksum if (!( $checksum eq $$sitesref{$site}{checksum})) { print "Site $site failed checksum:\n" . " Current checksum: $checksum:\n" . " Precomputed checksum: $$sitesref{$site}{checksum}\n" if ($verbose); ### Create a message that can be sent to a logfile or emailed my $dt = scalar localtime time; $msg = "$program detected a problem with $url:\n"; $msg = $msg . " Site definition: $site\n"; $msg = $msg . " Date: $dt\n"; $msg = $msg . " URL: $url\n"; $msg = $msg . " Precomputed checksum: "; $msg = $msg . "$$sitesref{$site}{checksum}\n"; $msg = $msg . " Current checksum: $checksum\n\n"; ### If syslog is defined, write a message to the system log if ( $$sitesref{$site}{syslog} ) { writesyslog("$program detected problems with content on $site"); } ### If an email address is defined, send an email if ( $$sitesref{$site}{email} ) { my $addr = $$sitesref{$site}{email}; my $subject = "$program detected problems with content on $site"; sendemail($addr, $addr, $subject, $msg); } ### If a logfile is defined, write an entry to it if ( $$sitesref{$site}{logfile} ) { writelogfile($$sitesref{$site}{logfile}, $msg); } } } } ############################################################################# # Purpose: Compute a checksum for a URI # # Arguments: # $_[0] -> Site to checksum # $_[1] -> Additional headers to include in request ############################################################################# sub computechecksum { my $site = $_[0] | ""; my $header = $_[1] || ""; ### Generate a checksum for the site in $site ($_[0]) and nix the newline my $checksum = `$wget -t 1 --header=$header -q -O - $site | $openssl sha1`; chomp($checksum); print "Checksum for $site is $checksum (optional header: $header)\n" if ($verbose); ### Return the checksum to the caller return($checksum); } ############################################################################# # Purpose: Log a message to the system log # # Notes: The solaris Sys::Syslog module will only work if the # daemon is configured to accept messages on UDP port 514 :( # # Arguments: # $_[0] -> Error message to write ############################################################################# sub writesyslog { my $msg = $_[0]; ### TO DO: Need to clean this up to allow more flexible logging options openlog($program, 'pid', 'daemon'); syslog('LOG_NOTICE', $msg); closelog(); } ############################################################################# # Purpose: Log a message to the system log # Arguments: # $_[0] -> Message to write ############################################################################# sub writelogfile { my $logfile = $_[0]; my $msg = $_[1]; ### Open logfile and write the msg passed as arg0 ($_[0]) if (open(LOGFILE, ">>$logfile")) { print LOGFILE $msg; close(LOGFILE); } else { print "ERROR: Unable to open logfile $logfile\n"; } } ############################################################################# # Purpose: Send an email to alert personnel that a site is down # # Arguments: # $_[0] -> Email address to send to # $_[1] -> Email address the message is from # $_[2] -> Subject to use # $_[3] -> Body of the email message ############################################################################# sub sendemail { my $toaddress = $_[0]; my $fromaddress = $_[1]; my $subject = $_[2]; my $body = $_[3]; ### Open sendmail binary and send an email to address passed as arg0 ($_[0]) if ( open(SENDMAIL, "|$sendmail -t") ) { print SENDMAIL "From: $fromaddress\n"; print SENDMAIL "To: $toaddress\n"; print SENDMAIL "Subject: $subject\n\n"; print SENDMAIL "$body\n"; close(SENDMAIL); ### Odds are the binary doesn't exist, or the permissions are wrong } else { print "ERROR: Cannot find sendmail utility at $sendmail\n"; print "ERROR: Please adjust the \$sendmail variable in the program header\n"; } } ############################################################################# # Purpose: Write a usage message to the console # # Arguments: # No arguments ############################################################################# sub usage { print "Usage: content-check [-c configuration file] || [ -g URL \"Header: Value\"]\n"; print " -c config : configuration file with site definitions\n"; print " -g site : generate a checksum for the site passed as an argument\n"; print " -v : produce verbose output on the console\n"; } ############################################################################# # Purpose: Make sure the binaries exist # # Arguments: # No arguments ############################################################################# sub validateenvironment { ### TO DO: Look for ways to dynamically locate binaries if ( ! -e $wget ) { print "ERROR: Cannot find wget utility at $wget\n"; print "ERROR: Please adjust the \$wget variable in the program header\n"; exit(1); } if ( ! -e $openssl ) { print "ERROR: Cannot find openssl utility at $openssl\n"; print "ERROR: Please adjust the \$openssl variable in the program header\n"; exit(1); } } ################################## # Start of the main program ################################## ### Check to make sure dependencies are met &validateenvironment(); ### Parse the command line options if there are any my $config = &parseoptions; ### Parse the configuration file if there is one my %sites; &parseconfig($config, \%sites); ### Check to make sure sites hash correctly &checksites(\%sites);