Listing checkhtml.pl

checkhtml.pl



#!/usr/bin/perl -w 
##
## This program is free software; you can redistribute it
## and/or modify it under the terms of the GNU General
## Public License as published by the Free Software Foundation
## (http://www.gnu.org/); either version 2 of the License, or
## (at your option) any later version.
##
## Author : Dirk Steinchen (dirk@atelier89.de)
## Webpage: http://www.atelier89.de/users/dirk/html/validator.html
## Version: 1.0

use strict;
use LWP::UserAgent;
use HTTP::Request::Common;

## edit this to your needs
    ## upload (post) files or get validation results
    ##  via uri? 0=uri, 1=upload
    my $upload = 0;
    ## absolute path to your local html files
    ##  e.g. '/exports/www/testsite/' or 'c:/www/testsite/'
    ##  don't forget the "/"
    my $releasedir = '/exports/www/testsite/';
    ## absolute path to your files on local www-server
    ##  e.g. '/testsite/' don't forget the "/"
    my $localw3dir = '/testsite/';
    ## your local www-server
    ##  don't insert the "/"
    my $localserv = 'http://localhost';
    ## the validator listens here
    my $validator = 'http://validator.w3.org/check';
    ## use "\n\r" for DOS/Win and "\n" for unix-like OS, "\r" for MAC
    my $cr = "\n";
    ## Generate Output: 0=yes, 1=no
    my $quiet = 0;
    ## mask for files we test, actually *.htm and *.html
    my $html_regex = '^([a-z0-9\-\_\.]*?)\.html?$';
    ## name of the user-agent for testing, libwww version is appended later
    my $useragent = "W3C_Validator/local ";

## edit lines below this line shouldn't be needed
    ## error-counter
    my $errors = 0;
    ## file-counter
    my $files = 0;
    ## start-time
    my $time = time;

    print $cr, $cr, "Beginn processing", $cr, "-----", $cr if $quiet == 0;

    ## process (and recurse) the given directory
    process_directory($releasedir);
    ## statistics
    $time = time - $time;
    print "----", $cr, "Finished: $files files in $time sec" if $quiet == 0;
    print " - $errors errors found.", $cr, $cr if $quiet == 0;
    ## return 0 if no errors otherwise the error-count
    exit $errors;

sub process_directory {
    my $directory = $_[0];
    ## internal var's
    my (@subdir, @verzeichnis,
     $key, $content, $ua, $req, $data, $file);

    ## read current directory
    opendir (VERZEICHNIS, $directory) || die "$directory: $!".$cr;
	@verzeichnis = readdir (VERZEICHNIS);
    closedir (VERZEICHNIS);

    print $cr, "Processing directory $directory", $cr if $quiet == 0;

    ## process each entry in directory list
    foreach $key (@verzeichnis) {
	## is current entry a html-file?
	if ($key =~ /$html_regex/i) {
	    ## if so, process this entry
	    print " processing\t$key\t" if $quiet == 0;
	    $data = $directory;
	    ## if errors found, they go in this file
	    $file = $key.".err.html";
    	    ## create user-agent
	    $ua = new LWP::UserAgent;
	    ## user agent's name
	    $ua->agent($useragent.$ua->agent);
	    ## validation by uri oder upload?
	    if ($upload == 0) {
		## get the resource name on local www-server
		$data =~ s/$releasedir/$localw3dir/i;
		## request for validation (get via uri)
		$req = new HTTP::Request (
                    GET=>"$validator?uri=$localserv$data$key");
	    } else {
		## request for validation (upload file)
		$req = POST "$validator",
		    Content_Type => 'multipart/form-data',
		    Content => [
			uploaded_file => [ "$data$key" ]
		    ];
	    }
	    ## result of validation
	    $content = $ua->request($req)->content;
	    ## one more file...
	    $files++;
	    ## write result to file
	    open (DATEI, "> $directory$file") || die $!;
		print DATEI $content;
	    close (DATEI);
	    ## valid/wellformed document?
	    if ($content =~ /No errors found\!/) {
		## if so, clean up
		print "o.k.\t" if $quiet == 0;
		unlink "$directory$file";
	    }
	    else {
		## if document is not valid, inc error-counter
		##  and leave file with results on disk
		$errors++;
		print "Error\t" if $quiet == 0;
	    }
	    ## that's it for this page
	    print " done.$cr" if $quiet == 0;
	} else {
	    ## if directory entry is a subdir
	    if (-d "$directory$key") {
		## save this entry for later processing
		push (@subdir, $key)
	    }
	}
    }
    ## process subdir's in current directory
    foreach $key (@subdir) {
	## process only entrys that not refer to
	##  current (.) or upper (..) directory
	if (!($key =~ /^\.*$/)) {
	    ## process subdirectory (and recursivly it's subdirectorys)
    	    process_directory ("$directory$key/");
	}
    }
}