#!/usr/bin/perl
#
# Filename: biblelinkXML_v06.pl
# Author: Eric Pement
my $version = "0.6"; # Biblelink for XML (based on javascript version)
my $verdate = "2005-05-19"; # Last revised date
# Requires: perl v5.6 or better
#
# Purpose: Given 1 or more input files with Bible references, create HTML
# hyperlinks wherever possible. Where the link reference is
# ambiguous, prompt the user. Execute with no arguments for a
# brief help menu, or with argument "-h" for full documentation.
# Changelog (reverse chronological order):
# 2005/05/16 : began work on recognizing – and – (en dash) as hyphen :
# (0) Search for \x02 (Ctrl-B), and abort if Ctrl-B is already found;
# (1) Change html entities to \x02 (Ctrl-B);
# (2) Embed char into [char-set]
# (3) selective replace
# 2004/05/25 : Added -r to recurse down subdirectories.
# 2004/03/25 : Skip the pseudo-link generated by "XMLSPY v5 rel." (string)
# Added $squeeze_spaces to prettify display.
# Changed my $count_3 to our $count_3; perl complained of unitialized values with no links.
# 2004/02/11 : Biblegateway significantly changed their way of passing the English translations
# in the URL. Updates to script to fix that. Find "KJV" to locate typical changes.
# 2004/02/10 : Replace UTF-8 en dashes on Bible verses. If this is done, display on console.
# 2004/02/04 : If filename ~ /syllabus.xml/, only link if // ... m''
# If filename ~ /lesson\d+\.xml/i, do not link if // ... m''
# Added ? quantifier to \s (space) on line 457 to support "Romans 10:9-10,13"
# Grep for "Unicode" below to find the process used to handle Unicode input.
# 2003/03/12 : Changed "javascript:bible(...)" references to XML output forms
# Also had to fix about 6 references to }}" which were changed to }}.
# 2003/03/12 : Bugfix of spaces being inserted in strings like "This12" or "That34". Added
# -m switch to make links even in pre-linked files; regex optimizations; some
# varnames and comments made easier to understand.
# 2002/10/30 : Clarified comments in source code and minor cosmetic changes
# 2002/10/08 : Changed references for "javascript:bible(..)" to "javascript:bible('..')"
# 2002/10/04 : Fixed skipped bare-refs; fixed hrefs with spaces which should have been +
# 2002/09/26 : Added 'Dt' to the list of valid Bible abbreviations
#
# --------------------------------------------------
# BEGIN - Settings you may want to change
# --------------------------------------------------
# use warnings; # uncomment for diags
# use diagnostics; # uncomment for diags
my $book = "Genesis"; # default book, but changeable via -b switch
my $chap = "1"; # default chapter, changeable via -c switch
my $m = "\x10\x10"; # $m for "marker"; shows up nicely on Win2K.
my $sp = "\x04 "; # $sp is the string printed at the beginning
# of each line when the -s switch is used. It
# is a graphic character and single space.
#
# Quote-Left and Quote-Right chars to highlight text when prompting the
# user for input. Set to guillemets (European double-quote marks) for Win32.
my $ql = "\xAE"; # Left guillemet
my $qr = "\xAF"; # Right guillemet
# Biblelink creates hyperlinks that look like this:
#
# 1 John 2:1
#
# Our 'bible' function creates a popup window of a specified size, passing
# the reference to Biblegateway to display the passage. To link directly to
# the Bible Gateway site, change the $link variable below:
# OLD: my $link = 'Lk 1:1
# You _ALSO_ need to change "Trick 3" and "Trick 4" near line 594 to remove
# the (' ... ') which enclose the scripture references for the parser.
# --------------------------------------------------
# END - Settings you may want to change
# --------------------------------------------------
# --------------------------------------------------
# Potential pitfalls in this script
# --------------------------------------------------
# (1) Some input files contain very long lines, such as Microsoft Wordpad
# files, in which a newline is only used at the end of PARAGRAPHS.
# With such input files, the display may be longer than we expect.
# We may need to test the length of the 3-line buffer and truncate the
# head of the buffer if it exceeds a certain value. I will implement
# this if we get enough input files to require it.
#
# (2) The L (skip rest of line) command may confuse some users. This command
# only skips the rest of the current TYPE of matches. If we're matching
# on verses at this point, L skips all other verse references, but will
# return for chapter references or bare references, if they exist.
#
# (3) This script is presumes a Win32 environment, especially the parts that
# look for the COMSPEC and try to set the $pager. I'd like to modify it
# to run under Linux, if time permits.
# --------------------------------------------------
# main program begins here
# --------------------------------------------------
use strict;
use Getopt::Std;
our($opt_a, $opt_d, $opt_h, $opt_l, $opt_m, $opt_p, $opt_r, $opt_s, $opt_b, $opt_c);
getopts('adhlmprsb:c:');
# Foreach of (a,d,h,l,m,p,r,s), sets $opt_* = 1.
# Sets $opt_b, $opt_c to strings.
# -a = automatically link "chapter" and "verse" references
# -d = turn on diagnostic prompts
# -h = display additional help or info
# -l = corrects most ignorant use of 'l' instead of '1'
# -m = make links even though file is linked already
# -p = page display to STDOUT (implies -s)
# -r = recurse down subdirectories
# -s = send results to STDOUT instead of creating an output file
# -b Exod. = change default book to "Exodus"
# -c 5 = change default chapt to 5
my %books; # hash of books of the Bible
my $count_1 = 0; # count files successfully processed
my $count_2 = 0; # count files skipped (already marked up)
our $count_3 = 0; # count links created in any file
my $skipfile = 0; # will be set to 1 if file should be skipped
my $n = "[1-9][0-9]{0,2}"; # numeric value, resolves from 1-999.
#----not currently activated; save for later----
# my $local_book = ""; # for books w/in parens "(Jn 1:1 and 14:6)"
# my $local_chap = ""; # likewise: "(Jn 1:1, but compare verse 14)"
# Yes, the $pager variable is created and clobbered 2 times. The first loop
# looks for "less" or "more" on the DOS/Win path. But if 4DOS is installed,
# use LIST instead. And if PAGER environment variable exists, use it instead.
my $pager;
my @cmds = ("less.exe", "more.exe");
my %cmdh = ("less.exe" => "less", "more.exe" => "more");
my @paths = split(';', $ENV{'PATH'});
STOP_LOOKING: foreach my $cmd (@cmds) {
foreach my $dir (@paths) {
if ( -e "$dir/$cmd" ) {
$pager = $cmdh{$cmd};
next STOP_LOOKING;
}
}
} # end foreach
$pager = "$ENV{COMSPEC} /C list /s" if $ENV{'COMSPEC'} =~ /4DOS|4NT|TCMD/;
$pager = $ENV{"PAGER"} || $pager; # fails is none is found
# Get the abbreviation names at the EOF before processing files.
# DATA is a special filehandle that points to what is after '__END__'.
while () {
chomp;
next if /^#/;
next unless /:/;
my ($abbrev, $full) = split /:/ ;
$books{"$abbrev"} = "$full";
}
close(DATA) or die "Could not close the perl script: $!\n";
if ( $opt_h ) { # user request to see docs
system("perldoc", $0);
exit;
} else {
if ( $opt_b && $opt_b =~ m/^([123] ?)?[A-Za-z]+\.*/ ) {
# Looks like a valid Bible book name.
$opt_b = &fixbookname($opt_b); # Convert to proper case
if ( exists($books{$opt_b}) ) { # Final test for valid book
$book = $books{$opt_b};
} else {
&bad_book; # Invalid book name
}
} elsif ( $opt_b ) {
&bad_book; # Not even potentially valid!!
}
if ( $opt_c && ($opt_c >= 1 || $opt_c <= 150) ) {
$chap = $opt_c; # Set default chapter.
} elsif ( $opt_c ) {
&bad_chap; # Invalid chapter number
}
}
# Recursive switch
if ( $opt_r ) {
use File::Find;
}
# Valid switches are removed from @ARGV before processing.
# Invalid switches generate an error message, halting the perl script.
my @filelist;
my @tempcopy = @ARGV;
# Get list of all args (potential ambiguous filespecs) passed on the
# command line. Use glob() to expand them to DOS/Windows filenames.
if ($opt_r) {
find(\&wanted, "."); # from File::Find
} else {
push @filelist, glob shift @ARGV while @ARGV;
}
# If the command tail was (s*.* *.htm *.h*), there would be duplicate
# filenames. Eliminate duplicate filenames from processing, unless
# -r was used.
my %uniquefiles = ();
# Revise @filelist to contain only unique, valid filenames.
foreach my $arg ( @filelist ) {
if ( -e $arg ) {
$uniquefiles{$arg}++;
} else {
# The "\a" rings the bell at the console.
print STDERR "\a>> Diskfile \"$arg\" does not exist! \n";
}
}
@filelist = sort keys %uniquefiles;
&no_args if scalar(@tempcopy) < 1; # syntax message if no fileargs
&bad_names if (scalar(@filelist) < 1 && scalar(@tempcopy) >= 1);
# Do it all here ...
&go_thru_files;
&liststats; # cleanup
#----end of main----
# ------------------------------------------------------------------------
# Subroutines go at the end.
# ------------------------------------------------------------------------
############################################################################
sub wanted #05/25/2004 10:24AM
# called by File::Find
# Builds an array of files, recursed by dir, that match
# the ambiguous filespec passed on the command line.
############################################################################
{
foreach my $ambig (@ARGV) {
print "DIAG wanted: ambig spec is [$ambig]\n" if $opt_d;
my $regex = glob2pattern($ambig);
print "DIAG wanted: regex is [$regex]\n\n" if $opt_d;
/$regex/ && push(@filelist, $File::Find::name);
}
} ##wanted
############################################################################
sub glob2pattern #05/25/2004 10:31AM
# called by &wanted
# Converts a DOS ambig filespec like "*.xml" into a
# regex pattern like /^.*\.xml$/
############################################################################
{
my $globstr = shift;
my %patmap = (
'*' => '.*',
'?' => '.',
'[' => '[',
']' => ']',
);
$globstr =~ s{(.)} { $patmap{$1} || "\Q$1" }ge;
return '^' . $globstr . '$';
} ##glob2pattern
############################################################################
sub go_thru_files #05/25/2004 11:12AM
# Called by the main: block
############################################################################
{
# Process each file, one at a time.
foreach my $file ( @filelist ) {
next if $file =~ m/\.
(BAK|EXE|DLL|LNK|COM|SYS|BAT|PL|JS|DOC|PDF|RTF|JPG|PNG|GIF|SWF|FLA|ZIP|TAR|GZ)$/ix;
my $old = "$file";
# additional Boolean variables added on 03 Feb 2004. Need to use "our"
our $b_IsLesson = 1 if $file =~ m/lesson\d+\.xml/; # So, do not link quizzes
our $b_IsSyllabus = 1 if $file =~ m/syllabus\.xml/; # So, only link assignments
open(OLD, "< $old") or die "Cannot open input file, $old: $!";
my $rename_files = ""; # used later
my $new = "$file.NEW";
# Now we check -s and -p switches! This is a list of all possible options
# 1. files==1 && -s print to STDOUT
# 2. files==1 && -p print to %PAGER
# 3. files>1 && (-s || -p) issue ERROR message
# 4. neither -s nor -p print to diskfile.NEW
if ( scalar(@filelist) == 1 && $opt_s ) {
open(NEW, ">-") or die "Could not open standard output: $!";
} elsif ( scalar(@filelist) == 1 && $opt_p ) {
open(NEW, "| $pager") or die "Could not open 'more' or %pager utility: $!";
} elsif ( scalar(@filelist) > 1 && ($opt_s || $opt_p ) ) {
&option_conflict; # issue error message and exit
} else {
# We want to write changes to the disk.
open(NEW, "> $new") or die "Cannot open output file, $new: $!";
$rename_files = "yes";
} # end if-scalar test
select(NEW); # 'print' now goes to NEW if not specified
&process('OLD'); # single quoted 'OLD' required
$count_1++;
close(OLD) or die "Cannot close input file, $old: $!";
print STDERR "$file - $::count_3 links created.\n" unless $skipfile==1;
if ( $rename_files eq "yes" ) {
if ( $skipfile == 1 ) { # The file was already processed
$count_1--; # Remove one processing count
print STDERR "Skipping the file $old ... it was already done!\n";
close(NEW) or die "Cannot close output file, $new: $!";
# Silently delete the temporary file.
unlink($new) or warn "\a The file $new could not be deleted.\n";
$skipfile = 0;
} else {
close(NEW) or die "Cannot close output file, $new: $!";
rename($old, "$old.BAK") or die "Cannot rename $old to $old.BAK: $!";
rename($new, $old) or die "Cannot rename $new to $old: $!";
}
}
$::count_3 = 0; # reset counter for next file.
} # end foreach
}
############################################################################
sub process # created: 07/19/02 3:47:PM
# modified: Thu, 25 Mar 2004, 17:54:16 -0600
# Parse each line of an input file
############################################################################
{
my $fh = shift @_; # Takes the filehandle "OLD"
my ($third, $prev, $this);
LINE: while (<$fh>) {
# step 0 of en-dash hack
die "\a\a Forbidden Ctrl-B found on line $. of input file $fh \n" if /\x02/;
if (/-- edited with XMLSPY v/) {
print $_;
next LINE;
}
if ($::b_IsLesson && m'' ... m'') {
print $_;
next LINE;
}
if ($::b_IsSyllabus) {
unless (m'' ... m'') {
print $_;
next LINE;
}
}
chomp;
s/\s+$//; # remove trailing whitespace from each line
# Sliding window. Some lines need to be appended to the previous line.
# Example: Given an input line that begins like this:
#
# Tim 3:16
# or
# 3:22-23, 4:12,
#
# we need to append such lines to the previous line to determine the
# full reference (e.g., "2 Tim 3:16" or "Acts 3:22-23, 4:12").
$third = $prev; # We'll use $third later, not now
$prev = $this; # Keep previous line
$this = $_; # Assign current line to $this
# Maybe "die" is too strong. Suppose there are multiple files...?
die "\a\n\n Input file already contains [$m] char on line $. - Aborting.\n\n" if /$m/;
# This test is skipped if $opt_m (-m switch) was true
if ( /); # .. get another line
$_ =~ s/^\s+//; # .. remove leading whitespace,
$this = $this . " " . $_; # .. reassign $this line,
$_ = $this; # .. reassign $_ .
} else {
print STDOUT "! DIAG 1b: partial match bkname is not valid\n" if $opt_d;
last PARTIAL;
}
} ## end while
while ( m/(:\d+,)$/ ) { # Second routine for partial matches. Different rule.
print STDOUT "! DIAG 1c: found trailing chars /$1/ at line $.\n" if $opt_d;
chomp($_ = <$fh>); # .. get another line
$_ =~ s/^\s+//; # .. remove leading whitespace,
$this = $this . " " . $_; # .. reassign $this line,
$_ = $this; # .. reassign $_.
} ## end while
# This position is important, because if the block occurs above the PARTIAL
# loop, some lines will be read in and not corrected.
if ( $opt_l ) {
# Try to correct most mistaken uses of l instead of 1, though this
# sometimes comes from OCR scanned documents, not always human error.
print STDOUT "! DIAG x1 - matched /$&/ \n" if ( $opt_d && m/ l(\d+[:]?)/ );
s/ l(\d+[:]?)/ 1$1/g;
print STDOUT "! DIAG x2 - matched /$&/ \n" if ( $opt_d && m/\d+l[-:\d ]/ );
s/(\d+)l([-:\d ])/$1 . "1" . $2/ge;
print STDOUT "! DIAG x3 - matched /$&/ \n" if ( $opt_d && m/\d[-:]l/ );
s/(\d[-:])l/$1 . "1"/ge;
s/ l / 1 /g;
}
# Correct problems with Unicode (UTF-8) input. How Unicode is handled in
# this script: Rather than searching for "John 3:16-u-18" and all the
# possible permutations, we just replace the 3-byte UTF-8 dash "-u-"
# with a standard hyphen. But we don't want to change things like "years
# 1991-u-1995", so we limit it to verse differences by requiring a
# leading colon, followed by 1 to 3 digits (never 4!). There is still
# the possiblity of changing wrong refs, e.g., vols 1:23-u-45. I'm not
# happy with this solution, and I may need to change it later. For the
# time being, I'm adding a diagnostic routine to show me how often this
# occurs.
if ( m/:[1-9]\d{0,2}\xE2\x80\x93[1-9]/ ) {
print STDERR "\a\a\n" . "\xCD" x 67 . "\n"; # \xCD is an IBM graphics double-bar
print STDERR "Found UTF-8 en dashes between digits, here. They will be replaced.\n";
my $tmp_diag = $_;
$tmp_diag =~ s/ +/ /g;
print STDERR "[$.] $tmp_diag\n"; # $. display the current line number.
print STDERR "\xCD" x 67 . "\n\n";
s/(:[1-9]\d{0,2})\xE2\x80\x93([1-9])/$1-$2/g;
}
# step 1 of en-dash problem corrections.
if ( m/:[1-9]\d{0,2}(\–|\–)[1-9]/ ) {
print STDERR "Found HTML char entities for en-dash at probable location $. Replaced.\n";
s/(:[1-9]\d{0,2})(\–|\–)([1-9])/\1\x02\3/g;
}
# Correct Microsoft Word-to-HTML problems with inclusive numbers. In
# Microsoft Word, the en dash is not converted to a hyphen properly,
# resulting in "Gen. 1:26?28" instead of "Gen. 1:26-28" when using
# Word's "save-as-HTML" feature. NOTE: if the input file is *supposed*
# to have strings like "2?3" or "10?11", this will create problems.
s/(\d)\?(\d)/$1-$2/g;
# Convert Roman numbers in Bible refs to Arabic numerals. Side effect:
# This will affect embedded quotations from older books.
s/\b(II{0,2}) ?(Sam|Kin|Chron|Cor|Thess|Tim|Pet|John)/length($1) . " $2"/ge;
# Redefine global $book and global $chap
# --------------------------------------
# The "class=.subheading" string redefines the global chapter, but
# sometimes the closing '' tag is on the next line. The tag may look
# like:
# Ezekiel's Temple (40-48)
# The Word Became Flesh (John 1:1-18)
# Abraham's Call (12)
#
# with the title being long enough to span two lines. The text is usually
# marked with the CLASS tags before, which is why we do this extra step.
if ( m|class=['"]subheading|i && $_ !~ m|| ) {
# We cannot find a closing '' tag on the same line, so...
print STDOUT "! DIAG 02: found subheading, no closing\n" if $opt_d;
$this = $_; # .. save this line,
chomp($_ = <$fh>); # .. get the next,
s/^\s+//; # .. remove leading whitespace from the next,
$this = $this . " " . $_; # .. combine both into this,
$_ = $this; # .. and set the current line equal to $this.
}
# NOTE: Do not put a '~' in any comment, since it's the m~~ delimiter.
if ( m~
class=['"]subheading.*? # Required string
\( # Literal paren, required
( # Group 1 - optional bookname
([123]\s?)? # Group 2 - 1 Sam, 2Tim, etc.
[A-Z][a-z]\.? # Gen., Ex., Lev., Numb., etc.
)? # End optional Group 1
\s? # Optional space
(\d+) # Required Group 3 - may be (16):31-32
.* # The :31-32 including close paren
# Required closing tag, case-insensitive
~ix ) {
$book = $1 if $1; # reset the global book
$chap = $3; # reset the global chapter
print STDOUT "! DIAG 03: reset default chapter as $chap\n" if $opt_d;
# Match on bare num (12) or span (12-15), less than bare references.
$::count_3 += ($_ =~ s|(?<=\()$n([-\x02]($n))?(?=\))|$link\{{$1}}>$1|);
} ## end if-match
# Verse/chapt substitutions - may occur multiple times on a line.
if ($opt_a) { # If told to generate links automatically...
# Verse substitutions
# Count substitutions with $count_3
$::count_3 += ($_ =~ s~
( # Group 1
(?$1$3~gx );
print STDOUT "! DIAG 50: vv link\n" if $opt_d;
# Chapter substitutions
# NB: Chapter NN must occur before hash refs to permit it. Else,
# the hash ref will forbid the string "Chapter 12" or 3, etc.!
$::count_3 += ( $_ =~ s~
[Cc]hapter
\s
(\d+)(?![:<])
~$link\{{$book\+$1}}>$&~gx );
print STDOUT "! DIAG 51: chapt link\n" if $opt_d;
} else { # Do interactive verse/chapter substitutions
# Verse substitutions
s/(?$bk+!+$u_tail$m~;
print STDOUT "! DIAG 07: hashref exists, line after sub:\n -$_\n" if $opt_d;
&move_the_marker('hash'); # move marker forward to next match
print STDOUT "! DIAG 08: hashref exists, after markrmove:\n -$_\n" if $opt_d;
$::count_3++; # increment the counter
} else {
# What looked like a Bible book was invalid, e.g., Sura 4:157 or Mosiah 3:10.
# We will prevent it from being matched as a Bare Block by using '==skip=='.
print STDOUT "! DIAG 09: Bible book $tbk not found\n" if $opt_d;
s~$bk$m$cv$xlat~$bk==skip==$cv$xlat$m~;
&move_the_marker('hash'); # and forward to the next match
} ## end if-else
} # end while /Book Chap:verse/ search
# Bare references (no bookname, no prefixes).
# Examples of bare references which should be linked:
#
# The statement of the headwaiter in 2:10 was prophetic ...
# Jesus said, "Go, call your husband and come back" (4:16).
# ... the second day is in 1:29-34, the third in 1:35-43, ...
#
# The subroutine must avoid rematching:
# 1. internal A HREF links: bible('John+1:1,+8:58,+14:6')
# 2. external A HREF links: ')">John 1:1, 8:58, 14:6
#
# Bare references should always be interactive because of:
# - Multivolume bibliographic notes: ISBE, 5:15-37 (Zondervan, 1975)
# - Nonbiblical references: Moroni 9:9
#
# The task of this s/// cmd is to put a $m (marker) in front of the first
# probable bare reference it finds. We don't need to look for commas, extra
# chapters, etc., since they're not relevant to setting the marker.
#
# After setting of the marker here (only once), each time a hyperlink is
# successfully created, the marker is moved two more times. (1) by the
# function make_some_links, which moves it from the front of the reference
# to the end of a hyperlink, and by (2) move_the_marker, which moves it
# from the end of a hyperlink to the beginning of the next probable
# reference.
#
# The /x on s/// permits whitespace and #comments in the search expression.
#
# 4 types of lookaround. All are nonmatching. Lookbehind must be fixed-len.
# ------------------------------------------------------------------------
# Pos lookbehind (?<=abc) Succeeds if 'abc' CAN match to the LEFT
# Neg lookbehind (?....
} # end process() subroutine
############################################################################
sub fixbookname # convert potential book names to proper case
# Input passed here may include digits, like
# "1JN. 5:12" or "2cor. 8:36"
############################################################################
{
my $bkname = $_[0];
$bkname =~ s/^\s*//; # remove leading spaces
$bkname =~ s/\s*$//; # remove trailing spaces
$bkname =~ s/.*/\L$&/g; # lowercase everything
$bkname =~ s/([a-z])\./$1/; # remove period after a word
$bkname =~ s/[a-z][a-z]/\u$&/; # capitalize the first letter
return $bkname;
};
############################################################################
sub deldot #07/16/02 10:14:AM
# Remove any periods. Change Gen. to Gen, Rom. to Rom, etc.
############################################################################
{
my $bk_str = shift @_;
$bk_str =~ s/\.$//; # strip trailing period, if any
return $bk_str;
} ##deldot
############################################################################
sub make_some_links # 08/28/02 10:40:AM
# make links for verses, chapters, bare refs
############################################################################
{
my $pattern = $_[0]; # string: 'vv. 33-34' 'chapter 21' '12:4-10'
my $digits = $_[1]; # string: '3' '11-12' '12:4-10'
my $type = $_[2]; # string: 'verse' 'chapt' 'bare'
my $one = $_[3];
my $two = $_[4] || "" ;
my $thr = $_[5] || "" ;
my $ref = "";
# Don't omit the STDOUT filehandle; SELECT has changed it ...
print STDOUT "\n\nLast 3 lines of context:\n";
my $squeeze_spaces = "$thr\n$two\n$_\n";
$squeeze_spaces =~ s/[ \t]+/ /g;
print STDOUT "$squeeze_spaces";
LOOP_1:
if ($type eq "verse") { $ref = "$chap:$digits" }
elsif ($type eq "chapt") { $ref = "$digits" }
elsif ($type eq "bare" ) { $ref = "$digits" }
print STDOUT <<"ENDBLOCK";
Should I link $ql$pattern$qr as $ql$book $ref$qr?
Press Y to accept the link we suggest,
to skip it and go to the next item,
L to skip it and all other $type references on this line
B book to change the default bookname,
C num to change the default chapter,
ENDBLOCK
print STDOUT " Or enter a substitute reference (eg, Acts 4:12): ";
my $change = "";
chomp($change = );
# If $change is Y, make the hyperlink we suggested.
# If $change is null, do not hyperlink at all.
# If $change is L, skip the rest of this line.
# If $change is B str set a new default bookname.
# If $change is C str set a new default chapter.
# If $change is N, assume that he didn't read.
# If $change is a valid Bible reference, make the link.
# If $change is anything else, issue error message and
# return the user to the prompt above.
if ( $change =~ /^[yY]$/ ) {
# Make the hyperlink we suggested
print STDOUT "! DIAG 60: y, make the link\n" if $opt_d;
$ref =~ s/\s+/+/g; # replace spaces in ref with literal plus
s!$m$pattern!$link\{{$book\+$ref}}>$pattern$m!;
$::count_3++;
&move_the_marker($type);
&update_line_one;
} elsif ( $change eq "") {
# Do not hyperlink at all,
# but first confirm that the user wants to skip this reference.
if ( &confirm_skip("$pattern") ) {
# User entered 'Y, I'm sure I want to skip' when prompted to confirm
s/$m($pattern)/$1$m/; # moves marker to end of this pattern
&move_the_marker($type); # moves marker to beginning of next pattern
} else {
# User entered 'N, I made a mistake' when prompted for confirmation
goto LOOP_1;
}
} elsif ( $change =~ /^[lL]$/ ) {
# Skip the rest of this line, but ask for confirmation first.
if (&confirm_skip2($pattern)) {
# User entered 'Y, I'm sure I want to skip' when prompted to confirm
# but note: this only skips the rest of the category (verse/chapter),
# but does not absolutely skip of the rest of the line.
s/$m//g; # This action will exit the while() loop
} else {
# User entered 'N, I made a mistake' when prompted to confirm.
goto LOOP_1;
}
} elsif ( $change =~ /^[bB]\s+(\w.+)$/ ) {
# Set a new default bookname
my $testname = $1;
$testname = &fixbookname($testname);
if (exists $books{$testname}) {
$book = $books{$testname}; # The bookname is valid. Change default book.
print STDOUT " Default book is now $book\n";
goto LOOP_1;
} else {
# Whatever the user entered is not a valid book of the Bible.
print STDOUT "\a\a"; # beep twice (\a for 'alert')
print STDOUT
" $ql$testname$qr is not a valid name/abbreviation for a book of the Bible.\n";
goto LOOP_1;
}
} elsif ( $change =~ /^[cC]\s+($n)$/ ) {
# Set a new default chapter
my $testchap = $1;
if ($testchap <= 150) {
$chap = $testchap;
print STDOUT " Default chapter is now $chap\n";
goto LOOP_1;
} else {
# The number the user entered is greater than 150
print STDOUT "\a\a"; # beep twice (\a for 'alert')
print STDOUT " The number $testchap is too large.\n";
goto LOOP_1;
}
} elsif ( $change =~ /^[nN]$/ ) {
# The user entered N in response to: Should I link "$pattern" as "$book $ref" ?
# Though N is not a valid response, we assume he wants to choose another reference.
# This is a silent bit of robust programming.
print STDOUT
"\a\n---If the link we suggest is wrong, type in the replacement yourself.---\n";
goto LOOP_1;
} elsif ( &reference_is_valid($change) ) {
# Create the link
$change = &fixbookname($change);
print STDOUT "! DIAG 63: after fixbookname: /$change/\n" if $opt_d;
$change =~ s/^(([123]\s?)?[A-Z][a-z]+)\.?/$books{$1}/e; # expand the book names
$change =~ s/\s+/+/g; # replace spaces with '+'
s!$m$pattern!$link\{{$change}}>$pattern$m!;
print STDOUT "! DIAG 64: y, ref is valid, change link\n" if $opt_d;
$::count_3++;
&move_the_marker($type);
&update_line_one;
} else {
# Issue an error message and return the user to the prompt above.
print STDOUT << "ENDERR";
\a
=======================================================================
>>> $ql$change$qr is not a valid Bible reference.
>>> Please try again.
ENDERR
goto LOOP_1;
} # end of 4-part test
} ## make_some_links
############################################################################
sub move_the_marker # Wed, 04 Sep 2002 16:26:39 -0500
# Move the marker forward to the next verse, chapt,
# bare reference or hash reference. Works on $_.
############################################################################
{
my $typec = $_[0]; # strings: "verse chapt bare hash"
# At this point, the marker is ALREADY positioned after the last match.
# This subroutine moves the marker forward to the next match, or else
# deletes the marker if no match can be found.
if ($typec eq "verse") {
# If the first s/// cannot match, the 2nd one will be executed.
# Resolves to "either move the marker or delete the marker"
s/$m(.*?)(?);
if ($inp =~ m/^Y$/i ) {
return 1;
} else {
return 0;
}
} ##confirm_skip
############################################################################
sub confirm_skip2 #08/30/02
############################################################################
{
my $ref = $_[0];
print STDOUT "\nYou responded that you want to skip \"$ref\", and\n";
print STDOUT "all the rest of this line. Is this correct? (Y/N) ";
my $inp = "";
chomp($inp = );
if ($inp =~ m/^Y$/i ) {
return 1;
} else {
return 0;
}
} ##confirm_skip2
############################################################################
sub reference_is_valid # FUNCTION written on 8/28/2002 2:25PM
# Return TRUE only if reference is valid;
# Function expects input variable to look like
# a Bible verse, e.g., "Mark 8:34-36"
############################################################################
{
my $ref = shift @_;
$ref = fixbookname($ref);
# Do not put a '/' in any comment, since it's the m/// delimiter!
if ( $ref =~ m/
( # Group 1 - the full book = $bk
(?:[123]\s?)? # 1 Sam, 2 Tim, etc. noncap parens
[A-Z][a-z]+\.? # Gen, Ex., Lev., Numb., etc.
) # End Group 1
\s? # space - THE ONE DIFFERENCE BETWEEN THE
# - PROCESS SUB AND THIS ONE: NO $M !
( # Group 2 - chapt and verses = $cv
$n # Min requirement: chapter number
(?:[-:]$n[abc]? # opt verse or "12b" noncap parens
(?:-$n # opt new chapt noncap parens
(:$n[abc]?)? # opt new verse Group 3
)? # end opt chapt
)? # end opt verse
(?:[,;]\s$n # opt add-on chapter noncap parens
(?! \s[A-Z][a-z]) # Do NOT match on "; 1 Pe" or the like
(?:[-:]$n # opt 2d verse noncap parens
(?:-$n)? # opt chapt span noncap parens
)? # end opt 2d verse
)* # end opt add-on chapters
| # OR
$n # Min requirement: chapter number
(?:-$n)? # opt chap span noncap parens
(?:[;,]\s$n # opt 2nd chap noncap parens
(?:-$n)? # opt 2nd chap span noncap parens
)? # end opt 2nd chap
) # End Group 2
(\s # Group 4: translation = $xlat
(?i) # Group 5, case insensitive match below:
(AV|NASV|AMP\.?|ASV|ESV|CEV|KJV|MSG\.?|NASB|NIV|NKJV|NLT|YLT)
# AV = KJV, NASV=NASB, MSG=Peterson, YLT=Young's Literal Translation
# ESV = English Standard Version, CEV = Contemporary English Version
# Translations we DO NOT have access to: RSV, NRSV, JB, NEB, LB
)? # End Group 4
/gx ) {
# We now have a POTENTIAL match. Check it.
my $bk = $1;
my $cv = $2;
$bk = &deldot($bk); # delete any trailing period
# Does bookname actually exist in the hash?
if ( exists($books{$bk}) ) {
print STDOUT "! DIAG 20: book ref /$ref/ is valid\n" if $opt_d;
return 1;
} else {
# What looked like a Bible book was invalid, e.g., "Moroni 9:10" or "Chicago 7"
print STDOUT "! DIAG 21: bookref /$ref/, \$bk /$bk/ looks valid but isn't.\n" if $opt_d;
return 0;
}
} else {
# The reference pattern doesn't even look possible. In other words,
# to get here, the pattern looks like "blurk" or "23Skidoo".
print STDOUT "! DIAG 22: bookref /$ref/ doesn't look valid\n" if $opt_d;
return 0;
} # end of if-else clause
} ## reference_is_valid
############################################################################
sub update_line_one # 08/28/02
# Updates or copies the changes to variable $
# so the user doesn't think nothing was changed.
############################################################################
{
my ($line_number,$line_contents) = split(/:/, $_, 2);
our $this;
$this = "$line_number: " . "$_";
} ##update_line_one
############################################################################
sub format_xlation # Wed, 11 Feb 2004 16:50:11 -0600
############################################################################
{
# expected input: (AV|NASV|AMP\.?|ASV|ESV|CEV|KJV|MSG\.?|NASB|NIV|NKJV|NLT|YLT)
my $abbrev = uc $_[0]; # capitalize input
$abbrev =~ s/\.$//; # remove trailing period, usually on "Msg." or "Amp."
$abbrev = "KJV" if $abbrev eq "AV";
$abbrev = "NASB" if $abbrev eq "NASV";
# Sample desired output: NASB_version=yes&showxref=no
#
# Valid translations: (AMP|ASV|ESV|CEV|KJV|MSG|NASB|NIV|NKJV|NLT|YLT)
# AV, NASV, are invalid abbreviations.
# Copyright restrictions forbid RSV, NRSV in Bible Gateway.
return "$abbrev" . '_version=yes&showxref=no'; # Note the '&'
}
############################################################################
sub liststats #07/22/02 11:00:AM
############################################################################
{
if ( $count_1 > 0 ) {
print STDERR " \nBiblelink:\n $count_1 file";
print STDERR ($count_1 == 1 ? "" : "s") . " successfully processed.\n";
}
print STDERR " $count_2 file" . ($count_2==1?"":"s") . " skipped.\n" if $count_2 > 0;
} ##liststats
############################################################################
sub option_conflict #07/22/02 11:16:AM
############################################################################
{
print STDOUT <<"CONFLICT";
\a\a
>>> ERROR! <<<
When using the -s switch to send output to stdout, only one
filename may be passed on the command line. The -s is mainly
intended for diagnostics before changing a file directly. To
send multiple files to the screen (not changing them on disk),
use a for..in..do command from the DOS/CMD prompt, e.g.,
for %%v in ( *.* ) do biblelink -s %%v
For more help, type "biblelink -h" .
CONFLICT
exit;
} ##option_conflict
############################################################################
sub bad_book #07/22/02 4:12:PM
############################################################################
{
print STDOUT <<"BADBOOK";
\a\a
>>> ERROR! <<<
The -b switch must be followed by a valid name or abbreviation
of one of the books of the Bible. For example, -b 2Jn is okay.
You followed it with the string <<$opt_b>>, which
is not a valid book name. Please try again.
BADBOOK
exit;
} ##bad_book
############################################################################
sub bad_chap #07/22/02 4:46:PM
############################################################################
{
print STDOUT <<"BADCHAP";
\a\a
>>> ERROR! <<<
The -c (chapter) switch must be followed by a valid chapter
number for a Bible book. No chapter can be higher than 150.
For example, "-c 12" sets the default chapter to 12. You
used "$opt_c", which is not a valid chapter number.
Please try again.
BADCHAP
exit;
} ##bad_chap
############################################################################
sub bad_names # Fri, 06 Sep 2002 18:21:28 -0500
# args given, but none were filenames
############################################################################
{
print STDOUT "\a\a\a\nERROR! No valid filenames found on the command line!\n\n";
print STDOUT << "BADNAMES";
The string issued on the command line was: @tempcopy
At least one of these tokens must resolve to a valid filename
for the program to execute. None of them match any files in the
current directory. (Type DIR /W at a command prompt to check.)
---------------------------------------------------------------
BADNAMES
exit;
} ##bad_names
############################################################################
sub no_args # 07/19/02 3:57:PM
# help message to display if invoked without any args
############################################################################
{
print STDOUT <<"SYNTAX";
\a\aBIBLELINK (v$version) - Create XML hyperlinks to Bible references
USAGE:
biblelink [-options] file1*.* [file2*.php file3* ...]
Option switches begin with a hyphen. File specs may be ambiguous (using
'*' or "?" is okay), but there must be AT LEAST ONE FILENAME on the
command line. Files are changed in-place under the same name, and the
unchanged versions are saved as "filename.typ.BAK" . Items in square
brackets [...] are optional; don\'t include the brackets when using the
option switches.
OPTIONS:
-a # automatically create verse/chapter links (non-interactive)
-b Eph. # set default book to Ephesians (default is Genesis)
-c 5 # set default chapter to chapt. 5
-l # change lowercase L to 1, where it should be a numeral
-m # make links, even though file has links already
-s # send output to stdout. Used with only one file.
-p # same as s, but page output thru MORE, LESS, or %PAGER% env var.
-r # recurse down subdirectories for the pattern.
-h # display more complete help
-d # diagnostic helps to STDOUT (used for debugging)
Copyleft 2002-2005 by Eric Pement. Type "biblelink -h" for distribution terms.
Last revised: $verdate
SYNTAX
exit;
} ##no_args
# ------------------------------------------------------------------------
# End of subroutines. Perl POD (Plain Old Documentation) goes here.
=head1 NAME
biblelink_XML (v06) - Bible Link, create XML hyperlinks to Bible references
on input files
=head1 USAGE
GENERAL: biblelink [-options] file1*.* [file2*.htm* file3.as* ...]
EXAMPLE: biblelink -b Galatians u1*.php v2*.*
biblelink -b John -c 15 -p somefile.htm
=head1 OPTIONS
Options may be clustered (e.g., "-sb Deut.").
=over 4
=item B<-a> (automatic links)
Intead of prompting the user for "chapter" and "verse" links, create
them automatically without prompting. This may result in some false
matches, which is why it is off by default. Bare references like
"cf. 14:6" are always interactive.
=item B<-b> "1 Samuel"
Change default book to 1 Samuel (default is Genesis). Abbreviations
and lowercase are okay. E.g., "1sa" or "1 sam" are recognized.
=item B<-c> 12
Change default chapter to 12 (default is 1).
=item B<-d> (debug)
Print script debug info to stdout to track down script problems.
=item B<-h> (help)
Issue help message in perldoc, and exit. Cancels all other options.
=item B<-l> (change l to 1)
Silently changes occurrences of letter 'l' (lowercase L) where the
number 1 (one) was expected. Sometimes needed where input files
come from OCR scanning.
=item B<-m> (make links anyway)
The default behavior is to skip the file if BibleLinks are detected,
since this might re-link (or double-link) some Bible references. If
the -m switch is used, this overrides the default and processes the
file normally. Use this switch when a few links are already coded
in the input file. NB: Double-linking MAY occur with this switch!
=item B<-p> (pause)
Send output to the screen, pausing each screenful. Pipes the output
to the pager specified by the enviroment variable %PAGER, or to LIST
(if 4DOS or 4NT are active), or to LESS or MORE (in that order). This
option does not alter the input file. When -p is used, only one
filename may be given on the command line.
=item B<-r> (recurse)
Process all the files specified on the command line, recursing down
the directory tree. Recursion starts from the current directory!
=item B<-s> (stdout)
Send output to standard output device (the screen), without paging.
This option does not alter the input file. When -s is used, only one
filename may be passed on the command line.
=back 4
=head1 DESCRIPTION
Given a list of input files on the command line (ambiguous filespecs
okay), BIBLELINK first checks to see that the list of files is valid.
Files named *.BAK are skipped. The file is changed in-place with the same
filename. The original file is saved as "sourcefile.type.BAK". If a file
does not exist, such as a command line like:
biblelink *.htm ooops!-a-typo.htm
the mistaken word or filename is echoed to STDERR (the screen) with a
beep, while the other files are modified. The -r switch sets the program to
recurse down all the specified files in the current directory tree.
Files with the following filetypes are always skipped (case-insensitive
match), even if they are explicitly listed on the command line:
BAK EXE DLL LNK COM SYS BAT PL JS DOC PDF RTF JPG PNG GIF SWF FLA ZIP TAR GZ
Wherever a valid Bible reference occurs (eg., Rom 6:10, 23), the entire
reference is changed to an XML hyperlink of the following form:
Rom 6:10, 23
Ac. 1:8 ASV
All standard abbreviations of books of the Bible are recognized. A period
is ignored when making the link, but is retained in the text.
SPECIAL FILES: For filename "syllabus.xml", links are created only
between "" and "" tags. For filenames like
"lesson\d+.xml", links are never created between "" and ""
tags.
SILENT CORRECTIONS: Roman numerals (I Tim., II Cor.) before books of the
Bible are automatically changed to digits. Many of our input files are
generated by Microsoft Word ("Save As Web Page" or "Save As HTML"), which
does not convert the en dash to a hyphen, resulting in "Gen. 1:26?28"
instead of "Gen. 1:26-28" as expected. Any question mark between two
digits ("2?3" or "8?4") is silently changed to a hyphen. A UTF-8 en dash
between digits ":\d{1,3}" and "\d" is automatically changed to a hyphen;
however, a note to stderr tells the user of each such change.
BIBLELINK uses a default book (Genesis) and default chapter (1). These are
used because BIBLELINK will also generate links for all these:
verse 14 vv. 19-22 (cf. 22:8-10) (10:9-10) chapters 2-3
The default book or chapter may be changed from the command line via an
option switch (-b or -c, followed by a string), or in the middle of a match
at a user prompt. The default chapter is also automatically changed each
time it finds a particular subheading declaration, described next.
SUBHEADING DEFINITIONS: A large expository document will sometimes
need to change the default chapter several times as it works through
successive portions of a book. A single input file is usually broken
into sections, each section dealing with a certain chapter or set of
chapters in one of the books of the Bible. At Moody, we use stylesheet
"classes" to change the appearance of heading titles. The heading title
usually signals when a new chapter is required. For example:
The Word Became Flesh (John 1:1-18)
The Millennial Temple (40-48)
The string "subheading" followed by digits in parenthesis forms a
pattern to indicate when the default chapter should be changed. The
first string of digits with parentheses resets the default chapter.
The "subheading" pattern MUST be followed by a closing tag such as
, , . If the closing tag cannot be found, the script
reads one more line to find it. Any new default chapter continues
until the next "class=.subheading" is encountered.
NORMAL MODE OF OPERATION: The script looks for digits (2), digit spans
(2-4), chapt:verse (5:6), with spans or additions (, 7:8-9:10), and
then looks backward for books of the Bible or words like "vv",
"verse", "chapter", etc. Clear matches ("Gen. 3:15") are automatically
linked without user input.
User intervention is requested when matching "vv.", "chapter", etc., since
these usually require reading the context to determine which book and
chapter is intended. However, if the -a switch is used, chapter-and-verse
linking is done automatically, without prompting. If you use the -a
switch, you should use the -b (book) and -c (chapter) switches as well.
Thus, to set the default to John, chapter 10, linking chapter and verse
references automatically, enter this:
perl biblelink -ab John -c 10 John*.htm
This script also links "bare references", which must be specified
item-by-item from the console. A "bare reference" to a passage occurs
without a book name at all. For example:
The construction of 2:13 is significant.
The user is prompted to link "2:13" to something. The last three lines of
context are displayed on screen, and then the user is prompted with a
suggestion, using the default book value:
Should I link "2:13" as "Genesis 2:13"?
Press Y to accept the link we suggest,
to skip it and go to the next item,
L to skip it and all other bare references
on this line,
B book to change the default bookname,
C num to change the default chapter,
Or enter a substitute reference (e.g., Acts 4:12):
Note in particular that 'b luke' will change the default book to
Luke for the rest of the session. Likewise, the user may choose to skip
this particular reference or all remaining references on the line.
If the user enters a substitute reference ("Mark 51:99"), the title is
checked to see if the name of the book is valid, but no further checking
is done to confirm the validity of the chapter or verse.
As currently configured, the script looks for references that roughly
match this regular expression (in the code, it's far more complex):
([1-3] )?[A-Z][a-z]+\.? [1-9]
but skips them if the "word pattern" isn't in its table of books of the
Bible. For example, the string "Romns. 3:10" is not a valid abbreviation
for the book of Romans. The script will not prompt you for a different
spelling, and ALSO will not try to match the bare reference "3:10". As
currently configured, the script does not recognize books of the
Apocrypha, the Qur'an, the Book of Mormon, or other nonbiblical works.
MASTER TRICKS: There is no easy way to save your work halfway through
a file, quit, and resume later where you left off. Pressing Control-C
will abort the script. Any lines processed up to that point will be
saved in a temporary file called "filename.NEW", which you may examine
if you wish. The original file will be unchanged. This could be
construed as a feature, since the script halts if it finds the
"