checkhtml.pl
#!/usr/bin/perl -w
##
## This program is free software; you can redistribute it
## and/or modify it under the terms of the GNU General
## Public License as published by the Free Software Foundation
## (http://www.gnu.org/); either version 2 of the License, or
## (at your option) any later version.
##
## Author : Dirk Steinchen (dirk@atelier89.de)
## Webpage: http://www.atelier89.de/users/dirk/html/validator.html
## Version: 1.0
use strict;
use LWP::UserAgent;
use HTTP::Request::Common;
## edit this to your needs
## upload (post) files or get validation results
## via uri? 0=uri, 1=upload
my $upload = 0;
## absolute path to your local html files
## e.g. '/exports/www/testsite/' or 'c:/www/testsite/'
## don't forget the "/"
my $releasedir = '/exports/www/testsite/';
## absolute path to your files on local www-server
## e.g. '/testsite/' don't forget the "/"
my $localw3dir = '/testsite/';
## your local www-server
## don't insert the "/"
my $localserv = 'http://localhost';
## the validator listens here
my $validator = 'http://validator.w3.org/check';
## use "\n\r" for DOS/Win and "\n" for unix-like OS, "\r" for MAC
my $cr = "\n";
## Generate Output: 0=yes, 1=no
my $quiet = 0;
## mask for files we test, actually *.htm and *.html
my $html_regex = '^([a-z0-9\-\_\.]*?)\.html?$';
## name of the user-agent for testing, libwww version is appended later
my $useragent = "W3C_Validator/local ";
## edit lines below this line shouldn't be needed
## error-counter
my $errors = 0;
## file-counter
my $files = 0;
## start-time
my $time = time;
print $cr, $cr, "Beginn processing", $cr, "-----", $cr if $quiet == 0;
## process (and recurse) the given directory
process_directory($releasedir);
## statistics
$time = time - $time;
print "----", $cr, "Finished: $files files in $time sec" if $quiet == 0;
print " - $errors errors found.", $cr, $cr if $quiet == 0;
## return 0 if no errors otherwise the error-count
exit $errors;
sub process_directory {
my $directory = $_[0];
## internal var's
my (@subdir, @verzeichnis,
$key, $content, $ua, $req, $data, $file);
## read current directory
opendir (VERZEICHNIS, $directory) || die "$directory: $!".$cr;
@verzeichnis = readdir (VERZEICHNIS);
closedir (VERZEICHNIS);
print $cr, "Processing directory $directory", $cr if $quiet == 0;
## process each entry in directory list
foreach $key (@verzeichnis) {
## is current entry a html-file?
if ($key =~ /$html_regex/i) {
## if so, process this entry
print " processing\t$key\t" if $quiet == 0;
$data = $directory;
## if errors found, they go in this file
$file = $key.".err.html";
## create user-agent
$ua = new LWP::UserAgent;
## user agent's name
$ua->agent($useragent.$ua->agent);
## validation by uri oder upload?
if ($upload == 0) {
## get the resource name on local www-server
$data =~ s/$releasedir/$localw3dir/i;
## request for validation (get via uri)
$req = new HTTP::Request (
GET=>"$validator?uri=$localserv$data$key");
} else {
## request for validation (upload file)
$req = POST "$validator",
Content_Type => 'multipart/form-data',
Content => [
uploaded_file => [ "$data$key" ]
];
}
## result of validation
$content = $ua->request($req)->content;
## one more file...
$files++;
## write result to file
open (DATEI, "> $directory$file") || die $!;
print DATEI $content;
close (DATEI);
## valid/wellformed document?
if ($content =~ /No errors found\!/) {
## if so, clean up
print "o.k.\t" if $quiet == 0;
unlink "$directory$file";
}
else {
## if document is not valid, inc error-counter
## and leave file with results on disk
$errors++;
print "Error\t" if $quiet == 0;
}
## that's it for this page
print " done.$cr" if $quiet == 0;
} else {
## if directory entry is a subdir
if (-d "$directory$key") {
## save this entry for later processing
push (@subdir, $key)
}
}
}
## process subdir's in current directory
foreach $key (@subdir) {
## process only entrys that not refer to
## current (.) or upper (..) directory
if (!($key =~ /^\.*$/)) {
## process subdirectory (and recursivly it's subdirectorys)
process_directory ("$directory$key/");
}
}
}
zum Download · zur Funktionsweise