Perl script: 'authenticate.cgi'
----
NOTE: This is a cut-down version of the Shibboleth authentication script used for ProQuest's Chadwyck-Healey products. It omits details of how to generate the encrypted parameter used for the secure redirection back into the product, following a successful authentication. It also omits details of how Shibboleth attributes are spoofed when technical support staff need to simulate a login by a particular customer.
#!/usr/local/bin/perl -w # Shibboleth authentication and redirection script for ProQuest's # Chadwyck-Healey products. # # ------------------------------------------------------------------------- # Script: /opt/shibboleth/apache2.2/htdocs/secure/authenticate.cgi # Author: Geoff Leach # Date: November 2007 # Copyright: (c) 2007 ProQuest LLC # # This software is available for use, modification and distribution under # the Apache License, Version 2.0, a copy of which may be obtained at: # http://www.apache.org/licenses/LICENSE-2.0 # ------------------------------------------------------------------------- # # This script is invoked via a URL of the form: # # https://shibboleth.chadwyck.co.uk/secure/authenticate.cgi? # product=HCPP& # location=UK& # returnpage=http://parlipapers.chadwyck.co.uk/shibbolethLogin.do& # forward=/search/search.jsp # # This will authenticate the user via Shibboleth; then try to match the user # to a customer account in Webtools; and return the user to a page in the # product which will either display an error message, or forward the user # to the page that they were originally attempting to access. # # The Shibboleth authentication sequence is performed by the Shibboleth module # that is loaded by Apache 2.2, and the associated 'shibd' daemon, which # place the Shibboleth attributes in environment variables before this # script is invoked. # # The job of this script is to match the values in these attributes to a # particular institution's account in the 'Webtools' customer account # database, and then redirect back to a URL in the product which either # logs the user in under this institution's account (if found), or else # displays a suitable error message. # # As a special case this script may be invoked with parameter 'testmode=Y' # which causes it to display a page containing diagnostic information from # the Shibboleth authentication. This page may then be emailed to ProQuest # Technical Support in order to diagnose problems with authentications. # # Parameters to this script are: # # returnpage The URL of a page in the product which will process # the results of the Shibboleth authentication. This # page will either display an error message (e.g. that # the user cannot be matched to any customer account) # or, after a successful login, will forward the user # to the original URL that the user was attempting to # access in the product. # # The 'returnPage' URL will have the following CGI # parameters appended: # # shibbolethToken Encrypted value identifying the # customer account following a # successful authentication. This # needs to be validated to prevent # the user bookmarking the return # page URL and reusing it in an # attempt to bypass authentication. # # shibbolethError Error message to be displayed # if authentication failed. # # shibbolethForward URL to which the page should # forward after validating the # 'shibbolethToken' parameter. # This is the value from any # 'forward' parameter supplied # to this script. # # product Product to which the user is trying to authenticate, # as the 'product code' in Webtools (ignoring case). # # Knowing which product the user is trying to access may # help in certain cases where the user matches more than # one account in Webtools. # # location Location of the product as one of 'UK', 'US' or 'DEV'. # This determines the Webtools database which is searched # for a customer account that matches the user's # Shibboleth attributes. # # If this parameter is omitted then the value will be # inferred from the 'returnpage' parameter, according to # whether this specifies a URL '.chadwyck.co.uk', # '.chadwyck.com' or '.private.chadwyck.co.uk' # # forward URL that the user was originally trying to access in # the product, to which the user should be forwarded # following a successful authentication. # # testmode=Y Add this parameter to display the Shibboleth test page, # instead of returning the user back to a product. When # this option is used the 'returnpage' parameter can be # omitted. # # The test page displays a diagnostic report of the # Shibboleth attributes retrieved from the identity # provider # use strict; use CGI; main(); sub main { # Obtain the CGI parameters my $retpage = CGI::param( "returnpage" ) || ""; my $product = CGI::param( "product" ) || ""; my $location = CGI::param( "location" ) || ""; my $forward = CGI::param( "forward" ) || ""; my $testmode = CGI::param( "testmode" ) || ""; # Convert product and location to uppercase $product = uc( $product ); $location = uc( $location ); # Check that the return page has been specified, and is a fully # qualified URL (i.e. beginning 'http://') # # (Not required for test mode) # if ( ! $testmode ) { if ( ! $retpage ) { fatal_error( "Mandatory parameter 'returnpage' is missing" ); } elsif ( $retpage !~ m|^http://|i ) { fatal_error( "Parameter 'returnpage' has an invalid value " . "'$retpage'. Should be a fully-qualified URL " . "beginning with 'http://'" ); } } # Check that the product code has been specified if ( ! $product ) { fatal_error( "Mandatory parameter 'product' is missing" ); } # Check that the location has been specified, or can be inferred # from the return page URL # if ( $location eq "" ) { # Determine location from the form of the return page URL if ( $retpage =~ m|^http://[^/]+\.private\.chadwyck\.co\.uk|i ) { $location = "DEV"; } elsif ( $retpage =~ m|^http://[^/]+\.chadwyck\.co\.uk|i ) { $location = "UK"; } elsif ( $retpage =~ m|^http://[^/]+\.chadwyck\.com|i ) { $location = "US"; } else { fatal_error( "Parameter 'location' has not been specified " . "and the location cannot be determined from the " . "return page URL '$retpage'" ); } } elsif ( $location !~ /^(UK|US|DEV)$/ ) { fatal_error( "Parameter 'location' has an invalid value '$location'. " . "Should be one of 'UK', 'US' or 'DEV'." ); } # Diagnostic report that is generated for test mode my $report = "SHIBBOLETH AUTHENTICATION - DIAGNOSTIC REPORT\n" . "\n" . "Authentication performed at: " . localtime() . "\n" . "\n" . "Parameters supplied to authentication script:\n"; foreach my $paramname ( sort( CGI::param() ) ) { $report .= " $paramname = '" . CGI::param( $paramname ) . "'\n"; } $report .= "\n"; # Get the user's Shibboleth attributes # Report on which Shibboleth attributes are present $report .= "Environment variables containing Shibboleth attributes are:\n"; foreach my $envvar ( sort keys( %ENV ) ) { my $envvalue = $ENV{ $envvar }; if ( $envvar =~ /^HTTP_SHIB/ && $envvalue ne "" ) { $report .= " $envvar = '$envvalue'\n"; } } $report .= "\n"; # Identity provider my $identityprovider = $ENV{ HTTP_SHIB_IDENTITY_PROVIDER }; $report .= "Identity Provider is: \n" . " $identityprovider\n\n"; # Scoped affiliations, e.g. 'staff@ProQuest.co.uk' my @scopedaffiliations = ( ); if ( $ENV{ HTTP_SHIB_EP_AFFILIATION } ) { my $scopedaffiliation = $ENV{ HTTP_SHIB_EP_AFFILIATION }; # Trim leading and trailing spaces and semicolons $scopedaffiliation =~ s/^[\s;]+//; $scopedaffiliation =~ s/[\s;]+$//; # Split into multiple values at semicolons @scopedaffiliations = split( /\s*;\s*/, $scopedaffiliation ); } # Entitlements my @entitlements = ( ); if ( $ENV{ HTTP_SHIB_EP_ENTITLEMENT } ) { my $entitlement = $ENV{ HTTP_SHIB_EP_ENTITLEMENT }; # Trim leading and trailing spaces and semicolons $entitlement =~ s/^[\s;]+//; $entitlement =~ s/[\s;]+$//; # Split into multiple values at semicolons @entitlements = split( /\s*;\s*/, $entitlement ); } # ================================================================== # Details of how attribute spoofing is implemented have been omitted # ================================================================== my $numaffiliations = scalar( @scopedaffiliations ); my $numentitlements = scalar( @entitlements ); $report .= "Obtained $numaffiliations value(s) for " . "scoped affiliation: \n " . join( "\n ", @scopedaffiliations ) . "\n\n"; $report .= "Obtained $numentitlements value(s) for " . "entitlement: \n " . join( "\n ", @entitlements ) . "\n\n"; # Load the details used to map Shibboleth attributes to customer # accounts in the Webtools database at the specified location # my $mappings = load_mapping_rules( $location ); # Find any customer accounts that match the Shibboleth attributes, # based on either the scoped affiliations or the entitlements # my $matchedcustomers = { }; foreach my $scopedaffiliation ( @scopedaffiliations ) { # Split the value into affiliation and scope my ( $affiliation, $scope ) = split( /@/, $scopedaffiliation ); # Look for a customer matching the affiliation and scope find_matching_customer( "affiliation=\"$affiliation\" && " . "scope=\"$scope\"", $mappings, $matchedcustomers ); } foreach my $entitlement ( @entitlements ) { # Look for a customer matching the identity provider and entitlement. # # This may be a general entitlement that just asserts that the user # is a bona-fide member of an institution, in which case the # rule is not specific to particular products. # find_matching_customer( "entitlement=\"$entitlement\" && " . "identityprovider=\"$identityprovider\"", $mappings, $matchedcustomers ); # Otherwise, the entitlement is to allow particular users to access # particular products. # find_matching_customer( "entitlement=\"$entitlement\" && " . "identityprovider=\"$identityprovider\" && " . "product=\"$product\"", $mappings, $matchedcustomers ); } # Report on whether the user was matched to any customer accounts # my $nummatches = scalar( keys %$matchedcustomers ); if ( $nummatches == 0 ) { $report .= "These Shibboleth attributes do not match any " . "customer accounts in the $location database.\n\n"; } elsif ( $nummatches == 1 ) { $report .= "These Shibboleth attributes match exactly one " . "customer account in the $location database:\n\n"; } elsif ( $nummatches > 1 ) { $report .= "These Shibboleth attributes match $nummatches " . "customer accounts in the $location database:\n\n"; } if ( $nummatches > 0 ) { # Display details of the matched customer accounts foreach my $clientcode ( sort( keys %$matchedcustomers ) ) { my $customer = $matchedcustomers->{ $clientcode }; my $clientname = $customer->{ NAME }; my $rulelist = $customer->{ RULELIST }; my $matchedrules = $customer->{ MATCHEDRULES }; my $productcodes = $customer->{ PRODUCTS }; $report .= ( "-" x 72 ) . "\n"; $report .= "Client code : $clientcode\n"; $report .= "Client name : $clientname\n"; $report .= "Shibboleth rule list : $rulelist\n"; $report .= "Subscribed products : " . join( ", ", @$productcodes ) . "\n"; foreach my $rule ( @$matchedrules ) { $report .= "Matches user via rule : $rule\n"; } } $report .= ( "-" x 72 ) . "\n\n"; } # If the rules matched more than one customer, then discard those # customers who are not subscribed to the requested product. # if ( $nummatches > 1 ) { foreach my $clientcode ( keys %$matchedcustomers ) { # Get list of product codes for this customer my $productcodes = $matchedcustomers->{ $clientcode }->{ PRODUCTS }; # Discard this customer if not subscribed to requested product if ( ! grep { uc( $_ ) eq uc( $product ) } @$productcodes ) { delete( $matchedcustomers->{ $clientcode } ); } } # Update the number of matched customers my $oldnummatches = $nummatches; $nummatches = scalar( keys %$matchedcustomers ); # Report the customers subscribed to the requested product $report .= "$nummatches out of $oldnummatches of these customer " . "accounts are subscribed to the '$product' product:\n\n"; foreach my $clientcode ( sort( keys %$matchedcustomers ) ) { my $clientname = $matchedcustomers->{ $clientcode }->{ NAME }; $report .= " $clientcode ($clientname) \n"; } } # Generate the redirection back to the return page with the appropriate # parameters: either specifying the Webtools account that the user has # been authenticated as; or an error message. # my $redirecturl; if ( $nummatches == 1 ) { # Get the single client code to which the user has been matched my $clientcode = ( keys( %$matchedcustomers ) )[ 0 ]; $report .= "\n\nAUTHENTICATION SUCCEEDED - " . "USER AUTHENTICATED AS: $clientcode\n\n"; # Form the authentication token my $token = format_authentication_token( $clientcode, $product, $location ); # Form the return URL for a successful authentication $redirecturl = $retpage . "?shibbolethToken=" . CGI::escape( $token ) . "&shibbolethForward=" . CGI::escape( $forward ); } elsif ( $nummatches > 1 ) { # Form the list of matching customer accounts my @customers; foreach my $clientcode ( sort keys( %$matchedcustomers ) ) { my $clientname = $matchedcustomers->{ $clientcode }->{ NAME }; push( @customers, "'$clientcode' ($clientname)" ); } # Error message for ambiguous mapping my $errmsg = "The details obtained from your login cannot be " . "matched to a single ProQuest customer account. " . "The details were ambiguous and matched $nummatches " . "accounts: " . join( ", ", @customers ); $report .= "\n\nAUTHENTICATION FAILED - " . "ERROR MESSAGE RETURNED TO PRODUCT IS: $errmsg\n\n"; # Form the return URL containing this error message $redirecturl = $retpage . "?shibbolethError=" . CGI::escape( $errmsg ); } else { # Error message for no matching customer my $errmsg = "The details obtained from your login cannot be " . "matched to any ProQuest customer account"; $report .= "\n\nAUTHENTICATION FAILED - " . "ERROR MESSAGE RETURNED TO PRODUCT IS: $errmsg\n\n"; # Form the return URL containing this error message $redirecturl = $retpage . "?shibbolethError=" . CGI::escape( $errmsg ); } # In test mode, output the diagnostic page with the report on # what Shibboleth attributes were present and how these matched # against customer accounts. # if ( $testmode ) { output_test_page( $location, $report ); } # Redirect to the return page print CGI::redirect( $redirecturl ); exit( 0 ); } # Given a matching rule containing some combination of attributes, check # whether the 'mappings' hash contains a matching entry and, if so, # copy the matched customer's details to the hash of matched customers. # sub find_matching_customer { my ( $rule, $mappings, $matchedcustomers ) = @_; # Look for any matching customer $rule = lc( $rule ); if ( exists( $mappings->{ $rule } ) ) { # Get the details of the matched customer(s) my $customerarray = $mappings->{ lc( $rule ) }; foreach my $customer ( @$customerarray ) { my $clientcode = $customer->{ CODE }; my $clientname = $customer->{ NAME }; my $products = $customer->{ PRODUCTS }; my $rulelist = $customer->{ RULELIST }; # Copy these into the hash of matched customers if ( ! exists( $matchedcustomers->{ $clientcode } ) ) { # Create the entry for this customer $matchedcustomers->{ $clientcode } = { CODE => $clientcode, NAME => $clientname, RULELIST => $rulelist, PRODUCTS => [ @$products ], MATCHEDRULES => [ $rule ] }; } else { # The entry for the customer already exists (as a result # of a successful match on another rule). Just add the # current rule to the list for the customer my $matchedrules = $matchedcustomers->{ $clientcode }->{ MATCHEDRULES }; push( @$matchedrules, $rule ); } } } } # Load the information for mapping Shibboleth attributes to customer accounts # in the Webtools customer database (from either the UK, US or Development # server). # # The extract of customer information is supplied to this script as a flat # file with four tab-separated fields on each line: # # 1. The customer identifier (or 'client code'), e.g. 'ucambridge' # # 2. The customer name, e.g. 'University of Cambridge' # # 3. The Shibboleth mapping rules for this account, e.g. # # 'affiliation="member|staff|student|employee" && scope="cam.ac.uk"' # # This is a single long string containing a boolean expression which # can be evaluated against terms extracted from the Shibboleth # attributes (such as scope and affiliation, or entitlement). # # 4. The list of products for this customer, as a list of product codes # separated by spaces, e.g. 'COLLECTIONS HCPP NEW_LION WELLESLEY' # # Note that the list of products is optional. This field is only used when # a user matches more than one customer account, in which case this script # will select only the account(s) subscribed to the requested product. # # This function returns a 'mappings' hash whose keys are the different # combinations of possible attributes, and whose values are the details of # the corresponding customer(s), e.g. # # $mappings{ 'affiliation="member" && scope="cam.ac.uk"' } = # [ # { # CODE => "ucambridge", # NAME => "University of Cambridge", # PRODUCTS => [ "COLLECTIONS", "HCPP", ... ] # } # ] # # This mapping is then used to lookup the customer(s) matching a retrieved # attribute value 'eduPersonScopedAffiliation = member@cam.ac.uk' # sub load_mapping_rules { # Product location (one of DEV, UK or US) my ( $location ) = @_; # Read the file of mappings for this location if ( ! open( MAPPINGS, "< Webtools_$location.txt" ) ) { fatal_error( "Cannot open file containing the customer details " . "for the $location server. \n" ); } # Load the details for each customer account, consisting of the list # of rules to evaluate, and the list of products for the customer # my $mappings = { }; while ( my $line = <MAPPINGS> ) { # Ignore the start and end lines beginning with a hash if ( $line =~ /^#/ ) { next; } # Get the fields from the next line chomp( $line ); my ( $clientcode, $clientname, $rules, $products ) = split( /\t/, $line ); # Split the rules into a list at the '||' separators my @rules = split( /\s*\|\|\s*/, $rules ); # Split the product codes at spaces my @products = split( /\s+/, $products ); # Create a hash containing this customer's details my $details = { CODE => $clientcode, NAME => $clientname, RULELIST => $rules, PRODUCTS => [ @products ] }; # Expand the rules into their different possible # combinations of attributes my @expansions = expand_rules( @rules ); # For each rule, create an entry in the mappings hash, to map # the combination of attributes to the customer details foreach my $expansion ( @expansions ) { # Create the array of customers for this rule if ( ! exists( $mappings->{ $expansion } ) ) { $mappings->{ $expansion } = [ ]; } # Add this customer to the array my $arrayref = $mappings->{ $expansion }; push( @$arrayref, $details ); } } close( MAPPINGS ); return $mappings; } # Expand a set of mapping rules into a set of alternatives that are used as # keys in the '%mappings' hash, e.g. the pair of rules: # # 'scope="ProQuest.co.uk|ProQuest.com" && affiliation="STAFF|EMPLOYEE"' # 'entitlement="http://url/of/contract" && product="PIO|PAO|HCPP"' # # is expanded into the list: # # 'affiliation="staff" && scope="ProQuest.co.uk"' # 'affiliation="employee" && scope="ProQuest.co.uk"' # 'affiliation="staff" && scope="ProQuest.com"' # 'affiliation="employee" && scope="ProQuest.com"' # 'entitlement="http://url/of/contract" && product="pio"' # 'entitlement="http://url/of/contract" && product="pao"' # 'entitlement="http://url/of/contract" && product="hcpp"' # # The expansion process involves converting the rule into a canonical format: # # - Removing leading and trailing spaces, and squeezing embedded spaces # to single spaces. The '&&' separators are surrounded by single spaces # # - Converting names and values to lowercase # # - Reordering the components into alphabetical order by name # # Thus the process of finding a match for a particular combination of # attribute values is reduced to a lookup in a hash of mappings, instead # of searching through a list of rules and testing each in turn. # sub expand_rules { my @rules = @_; # Expand each rule and concatenate the results into a single array my @expansions; foreach my $rule ( @rules ) { push( @expansions, expand_rule( $rule ) ); } return @expansions; } # Expand a single rule # sub expand_rule { my ( $rule ) = @_; # Convert the rule to lowercase $rule = lc( $rule ); # Remove leading and trailing spaces, and squeeze embedded spaces $rule =~ s/^\s+//; $rule =~ s/\s+$//; $rule =~ s/\s+/ /g; # Split the rule into a hash of its components, e.g. the rule: # # 'scope="ProQuest.co.uk|ProQuest.com" && affiliation="staff|employee"' # # becomes the hash: # # { # affiliation => [ "staff", "employee" ], # scope => [ "ProQuest.co.uk", "ProQuest.com" ] # } # my $rulehash = { }; my @components = split( /\s*&&\s*/, $rule ); foreach my $component ( @components ) { if ( $component =~ /\s*([A-Za-z][A-Za-z0-9]*)\s*=\s*"([^"]*)"\s*$/ ) { my $name = $1; my $valuelist = $2; my @values = split( /\|/, $valuelist ); $rulehash->{ $name } = [ split( /\|/, $valuelist ) ]; } else { print STDERR "WARNING - Unrecognised rule '$component' \n"; } } # Expand this hash into all the alternative rules # my @expanded = expand_rule_hash( $rulehash ); ### print STDERR "Expanded rule: '$rule' into " . ### scalar( @expanded ) . " items: \n" . ### join( "\n", @expanded ) . "\n"; return @expanded; } # Recursive function to expand a rule provided as a hash # sub expand_rule_hash { my ( $rulehash ) = @_; # Extract the first component from the hash my @sortednames = sort( keys %$rulehash ); my $firstname = $sortednames[ 0 ]; my $firstvalues = $rulehash->{ $firstname }; # Expand this into its list of alternatives my @firstexpanded; foreach my $value ( @$firstvalues ) { push( @firstexpanded, "$firstname=\"$value\"" ); } # If this was the only component, return these expansions if ( @sortednames == 1 ) { return @firstexpanded; } # Otherwise, form a hash containing all the remaining components my $remainderhash = { }; for ( my $i = 1; $i < @sortednames; $i ++ ) { my $name = $sortednames[ $i ]; my $values = $rulehash->{ $name }; $remainderhash->{ $name } = [ @$values ]; } # Expand this hash my @remainderexpanded = expand_rule_hash( $remainderhash ); # Combine the expansions of the first component with the # expansions of the remaining components # my @expanded; foreach my $firstexpansion ( @firstexpanded ) { foreach my $remainderexpansion ( @remainderexpanded ) { push( @expanded, "$firstexpansion && $remainderexpansion" ); } } # Return all the expansions return @expanded; } # Output an error page for a fatal error that prevents this script from # running. # # Such an error only occurs when this script is called incorrectly, such # as the programmer forgetting to include a mandatory parameter, or is # configured incorrectly, such as a required file being missing. # # Errors which occur during the authentication process are handle by # returning the user to the product with an error message. # sub fatal_error { my ( $errmsg ) = @_; print CGI->header( -type => "text/html" ); my $title = "Shibboleth Authentication Error Page"; print "<head>\n" . "<title>$title</title>\n" . "</head>\n"; print "<body>\n" . "<h2>$title</h2>\n"; print "<p><font color=\"red\"><b>$errmsg</b></font></p>\n"; # Display the environment variables print "<p>Note: environment variables are:</p>\n"; print "<table>\n"; foreach my $varname ( sort keys %ENV ) { my $value = $ENV{ $varname }; print "<tr><td>$varname</td><td>$value</td>\n"; } print "</table>\n"; print "</body>\n" . "</html>\n"; exit( 0 ); } # Output the test page containing a diagnostic report of the Shibboleth # authentication and a form to email this to Technical Support. # # This needs the HTML template 'test_page_template.html' and an associated # script 'cgi-bin/test_page_send_email.cgi' # sub output_test_page { my ( $location, $report ) = @_; # Read the HTML template for the test page my $templatefile = "test_page_template.html"; if ( ! open( TEMPLATE, "< $templatefile" ) ) { fatal_error( "Unable to open the template file '$templatefile' " . "for reading: $!" ); } local $/ = undef; my $templatehtml = <TEMPLATE>; close( TEMPLATE ); # Form the HTML version of the report text my $reporthtml = $report; # Convert newlines to <br> tags $reporthtml =~ s/\n/\n<br>/sg; # Preserve leading spaces $reporthtml =~ s/<br>( +)/ "<br>" . ( " " x length( $1 ) ) /sge; # Convert multiple spaces (used for alignment) into entities $reporthtml =~ s/( +)/ " " x length( $1 ) /sge; # Form a plain-text version of the report for insertion in a hidden field my $reporttext = $report; # Escape ampersands, quotes and newlines as entities $reporttext =~ s/&/&/g; $reporttext =~ s/"/"/g; $reporttext =~ s/\n/ /g; # Substitute in the supplied values $templatehtml =~ s/XXX_REPORTTEXT_XXX/$reporttext/g; $templatehtml =~ s/XXX_REPORTHTML_XXX/$reporthtml/g; $templatehtml =~ s/XXX_LOCATION_XXX/$location/g; # Output the test page print CGI::header( -type => "text/html" ) . $templatehtml; exit( 0 ); } # Construct the encrypted token that indicates successful authentication # as a particular customer (identified by their 'client code') # sub format_authentication_token { my ( $clientcode, $productcode, $location ) = @_; my $token = ""; # =================================================================== # Details of how the encrypted token is constructed have been omitted # =================================================================== return $token; }