%%% ====================================================================
%%%  @@C-Web-file{
%%%     author          = "Robert M. Solovay",
%%%     version         = "1.06",
%%%     date            = "15 July 1994",
%%%     time            = "19:17:08 MDT",
%%%     filename        = "checksum.w",
%%%     address         = "Department of Mathematics
%%%                        University of California
%%%                        Berkeley, CA
%%%                        USA
%%%                        Tel: (415) 642-2252",
%%%     checksum        = "32612 1201 6756 43641",
%%%     email           = "solovay@@math.berkeley.edu (Internet)",
%%%     codetable       = "ISO/ASCII",
%%%     keywords        = "checksum",
%%%     supported       = "yes",
%%%     docstring       = "The checksum field above contains a CRC-16
%%%                        checksum as the first value, followed by the
%%%                        equivalent of the standard UNIX wc (word
%%%                        count) utility output of lines, words, and
%%%                        characters.  This is produced by checksum
%%%                        itself.",
%%%  }
%%% ====================================================================

%%% Revision history (reverse time order):
%%%
%%% 1.06 [15-Jul-1994]
%%%	Add change file for version 1.06; no changes in the .w files.
%%%	other than these comments.
%%%
%%% 1.05 [??-???-????]
%%%	Not recorded.
%%%
%%% 1.04 [15-Mar-1993]
%%%	Add support for Apple Macintosh.
%%%
%%% 1.03 [25-Jun-1992]
%%%	Add support for VAX VMS C run-time library deficiency.
%%%
%%% 1.02 [11-Sep-1991]
%%%	Add missing initialization of answer_from_critical; this bug
%%%	was discovered when garbage output was produced from an empty
%%%	input file.
%%%
%%%	Correct two ">From" strings to "from" (mangled by UNIX mailer).
%%%
%%%	Shorten two long lines to <= 80 characters.
%%%
%%% 1.01 [06-Sep-1991]
%%%	Minor wording changes.
%%%
%%%	Add EXIT_SUCCESS and EXIT_FAILURE, and use in all exit()
%%%	calls.  Make successful checksum installation or verification
%%%	return EXIT_SUCCESS, and all other conditions, EXIT_FAILURE,
%%%	so shell commands can test execution status.
%%%
%%%	Add missing initialization of verify_successful.
%%%
%%% 1.00 [04-Mar-1991]
%%%	Original version.
%%%
%%% ====================================================================

@* The checksum program.
\medskip
 Copyright 1991 Robert Solovay -- University of California, Berkeley.
Email: solovay@@math.berkeley.edu
\medskip
Permission to use, copy, modify, and distribute this software and its
documentation for any purpose and without fee is hereby granted, provided
that the above copyright notice appear in all copies and that both that
copyright notice and this permission notice appear in supporting
documentation.  This software is provided ``as is'' without express or
implied warranty.



\medskip
	This program will compute a 16-bit cyclic redundancy checksum
(CRC)
for a file as well
as a count of the words, lines and characters. If called from the
command line with:

{\tt \%checksum  inputfile outputfile}

{\parindent=0pt the program will compute a 16-bit  checksum for the file
{\sl inputfile} and write a copy of this file with the checksum
installed to the file {\sl outputfile}.}

If the checksum has previously been installed in the input file, and
the input file has not been corrupted since then,
the output file will be identical to the input file.

	If called with the command line:

{\tt \%checksum -v file}

{\parindent=0pt where {\sl file} is a file with an ``installed''
checksum, the program will proceed to verify whether the checksum is
correct and report its conclusions to |stdout|.}
%Here is the checksum for this web file:
%checksum = "02100 1108 6383 40704"
@ More precisely, the program will search for the first line of
{\sl inputfile}
which contains the word {\sl checksum} in lowercase and no other
alphabetic characters. We refer to this line as the ``critical line''
of the file. If the critical line contains no quotation marks, then
the file {\sl outputfile} will be created
which is a copy of {\sl inputfile} except that the critical line
will be replaced by a line where the word {\tt checksum} is
replaced by {\tt checksum = "xxxxx lc wc cc"}. Here {\sl lc} is the
number of lines in the file {\sl outfile}, written in decimal.
Similarly, {\sl wc}
is the word count of {\sl outfile} and {\sl cc} is the character count
(when the end of line character is taken to be the single character
\.{\\n}.)

	It is difficult to arrange that a file contains its own
checksum. Instead,
the field {\sl xxxxx} contains the checksum,  written in
decimal in a five-digit field (with  possible leading 0's) of the file
obtained from {\sl outfile} by replacing the field containing the
checksum by the string |"ZZZZZ"|.

	If the critical line already contains after the word
``checksum'' precisely two quotation marks, and the first is the last
character of the four-character string |" = \""|, then the material
between the two quotes will be deleted and replaced by a checksum and
three counts as described above.

	The program is designed to be usable as a UNIX filter; that
is, it can take its input from |stdin| or send its output to |stdout|.
The user can indicate the choce of |stdin| or |stdout| either by
omitting the relevant file-name from the command line or by using the
single character |'-'|. (If the command given is:

{\tt \%checksum file}

{\parindent=0pt the program will interpret |file| as the name of the
input file, and will send the output to |stdout|.)}

@* The skeleton.
 Here is the main module of the program.

@c
@<Global type definitions@>@/
@<Global |#include|s@>@/
@<Global variables@>@/
@<All functions@>@/
@<Main@>


@
We have to include the standard I/O definitions, as well as some
other standard header files.  Until all C implementations reach
the level of the 1989 ANSI C standard, we have to be careful about
which files we try to include.  The GNU C compiler defines the
symbol |__STDC__|, but is commonly run with native pre-ANSI header
files.
@f CheckSum int
@<Global |#in...@>=
#include <stdio.h>
#include <ctype.h>
#if defined(__STDC__) && !defined(__GNUC__)
#include <stddef.h>
#include <stdlib.h>
#else
#ifndef SEEK_SET
#define   SEEK_SET        0
#endif
#endif
#include<string.h>

#ifdef THINK_C
#include <console.h>
#endif

#ifndef EXIT_SUCCESS
#define EXIT_SUCCESS 0
#endif

#ifndef EXIT_FAILURE
#define EXIT_FAILURE 1
#endif

#if defined(__STDC__)||defined(__GNUC__)||defined(__TURBOC__)||defined(MSDOS)
#define ARGS(plist) plist
#else
#define ARGS(plist) ()
#endif

CheckSum add_a_byte_to_checksum ARGS((unsigned int new_byte,CheckSum
oldCheckSum));@/
int critical ARGS((char *s));
int main ARGS((int argc,char * *argv));@/
CheckSum merge_three_checksums ARGS((CheckSum s,CheckSum t,CheckSum u));
CheckSum MultiplyTwoPolys ARGS((CheckSum u,CheckSum v));@/
unsigned long int word_count ARGS((char *s));

@ Now we come to the general layout of the |main| function.  The
|status| variable communicates to the operating system if the run was
successful or not, and |prog_name| is used in case there's an error message to
be printed.

@d OK 0
@d usage_error 1
@d cannot_open_read_file 2
@d cannot_open_write_file 4
@d error_reading_input_file 8
@d cannot_open_temporary_file 16
@<Global variables@>=
int status=OK; /* exit status of command */
char *prog_name;


@ @<Main@>=
int
main (argc,argv)
int argc;
char **argv;
{
	@<Variables local to |main|@>@/

#ifdef THINK_C
	argc = ccommand( & argv );
#endif

	prog_name=argv[0];
	@<Parse the command@>@/
	@<Open the input and output files@>@/
	@<Process the input file@>@/
	@<Do endgame procedures@>@/
	if (mode == COMPUTING){@<Do second pass on input file@>}@/
	@<Close all files@>@/
	if (status != 0)
	    exit(EXIT_FAILURE);
	else
	    exit(verify_successful ? EXIT_SUCCESS : EXIT_FAILURE);
	return(0);
}

@* Parsing the command line.

	We will have a variable |mode| which is set to indicate
whether we are ``computing'' or ``verifying'' the checksum.
@d VERIFY 0
@d COMPUTING 1
@<Variables local to |main|@>=
int mode = COMPUTING;
@ The mode will be |VERIFY| iff the variable |argc| is $\geq 2$ and
the second ``argument'' is |"-v"|. In any case we should have |argc|
$\leq 3$. We update |argc| and |argv| to reflect the arguments already
``consumed''.
@<Parse the command@>=
if(argc > 3){@<Complain about improper command format@>}
else if ( (argc >= 2) && !(strcmp(argv[1], "-v")))
{mode = VERIFY; argc -= 2; argv += 2;}
else{argc--;argv++;}



@* The main  loop.


Our main loop reads a line at a time from the input file.
It looks
for the ``critical line'' which first contains the word ``checksum''
(and no other alphabetic characters).

Suppose first that we are in |VERIFY| mode. As the program reads each
line, it updates its various counts (|lc|, |wc|, and |cc|) as well as
computing the checksum. When we encounter the critical line, we have
to record its predictions as to our counts and checksum, to compare
with those that are computed directly from the file. In addition, we
have to modify the line (as discussed above) replacing the checksum by
the string |"ZZZZZ"| before using the line in our checksum calculations.

Our processing of the input file will end when we reach an end of
file.
At that point, we compare the predicted counts and checksums with
those that are recorded on the critical line, and make a report to
|stdout| as to whether or not the verification was successful.

@

Next we describe the main loop when we are in |COMPUTING| mode. As we
read each line, we see if it is the critical line or not. The first
phase of our operations consists of the processing of those lines
before the critical line. We update our counts and checksum. In
addition, we write each line as we read it out to the output file.

When we reach
the critical line, we store it in a special string buffer
|critical_line|. We also record our place in the input file, so that
we will be able on our second pass to write the remainder of the input
file to the output file. (We must modify the critical line before
writing it to the output file so as to contain the counts and
checksum. We will only be able to compute these at the end of the
first pass through the input file.) The place in the input file that
we record will be directly after the critical line.

 After
processing the remainder of the file, we will be able to correctly
compute the version of the critical line to write to the output file,
and we will be able to write the remainder of the input file to the
output file.

@
We want to have a special type which is the kind of variable used for
accumulating checksums. The reason is that if we wish to change to a
32-bit checksum, it is useful to have just one definition to change.
@d SIZE 16
@<Global type definitions@>=
typedef unsigned int CheckSum;
@^Checksum size dependencies@>
@
Here are the basic variables needed to accumulate the various counts,
etc.

The reasons for the mysterious initializations of the checksum
variables will become apparent below when we discuss the ``theory'' of
our device for doing the checksum calculation in one pass through the
input file.
@<Variables local to |main|@>=
unsigned long int lc = 0; /*line count*/
unsigned long int wc = 0; /* word count*/
unsigned long int cc = 0; /*character count*/
int current_line_is_critical; /* This flag is set to 1 if the current
                               line is the critical line*/
int found_critical_line = 0; /*This flag is set to 1 after the
                              critical line is located.     */
CheckSum chksum = 0, auxchksum = 0, auxchksum2 = 1;
long int loc_critical_line; /*This stores the location needed to
			rewind to the start of the line after the critical
			line in the input file*/
int answer_from_critical = 0;
char current_line[257];
char critical_line[257];


@ Here we read in the next line. If the pointer returned by |fgets| is
null, it might signal an |EOF| or it might be an error condition. We
use the |feof| facility to determine which. If it is an error
condition, we signal the error and abort the program. (If not, we are
done reading the input file, and the module \.{<Do endgame procedures>}
will take over.)

If the critical line has not yet been encountered, we must first check
if the current line is the critical line. If it is, we set two flags.
The first tells us that the critical line has indeed been found. The
second tells us that the current line  is indeed the critical line.
(This second flag must be set to 0 before we test for the critical
line.)


@<Process the input file@>=

while(  fgets(current_line,257,input_file) != NULL)@/
{
current_line_is_critical = 0; /*Reset this flag.*/
if (found_critical_line == 0)@<Check if current line is critical line
and set flags appropriately@>@/
if(current_line_is_critical){@<Process critical line@>}@/
@<Write line to output file if appropriate@>@/
@<Update counts@>@/
@<Update checksums@>@/
}@/
if (!feof(input_file))@<Signal error while reading input file and abort@>@/

@ In order to check whether or not a line is critical, we use an
auxiliary function |critical|. This function returns a positive
integer  if the
line is critical and $0$ if it isn't.
The input to |critical| is a
pointer to the string that contains the current line.

If the line is critical, the function will set the value of the global
variable |loc_chksum| to indicate the position of the word
|"checksum"| in the critical line. Then it will
search the portion of the
string after the word |"checksum"| for an occurrence of the string
|" = \""|. If it doesn't find this string, it sets the return value of
|critical|
to 1 and exits. If it does, it records the position of the quote
character of the string in
the global variable |loc_first_quote_mark|. It then proceeds to look
in the critical line
for a quote character occurring  after this string. If it doesn't
find this, the function returns 2; if it does, it stores its location
in the global variable |loc_second_quote_mark| and returns the value
3.
@<Global variables@>=
int loc_chksum, loc_first_quote_mark, loc_second_quote_mark;
@
@<All functions@>=

int critical(s)
	char *s;
	{
	@<Variables for ``critical''@>@/
        @<Code for ``critical''@>@/
	}
@

The variable |i| is a state variable which takes a value in the range
$0 \leq i \leq 3$. The precise meaning of the different states will be
explained below.

The variable $j$ holds our current place in the ``input string'' |s|.
Thus we usually refer to |s[j]| in the code that follows.

The array of characters |chksm| holds the string |"checksum"|.

The
variable $k$ holds our place in this string.

The variable $jj$ holds the location of the first character of the
string |"checksum"|
in the ``input string''.
@<Variables for ``critical''@>=
	int i = 0;
	int j = 0;
	char *chksm = "checksum";
	int k = 0;
	int jj;



@ We first determine if the string we are inspecting is the critical
line. If it is not, we bail out and return 0. If it is we set the
global variable |loc_chksum| and continue.
 As we read through the string |s| during this first phase, we are in
one of four states: We are in state $i=0$ until we have spotted the
first alphabetic character.

We are in state $i=1$ if we know definitely that the string we are
looking at is not critical.

We are in state $i=2$ while we are checking that the part of the
string we are now looking at is an occurrence of the word
``checksum''.

After we have seen the occurrence of the word ``checksum'' we are in
state 3.

@<Code for ``critical''@>=
while ((i != 1) && (s[j] != 0))

	switch(i)

	{

	case 0:if(isalpha(s[j])){ i = 2; jj = j;} else j++; break;
	case 2:if(s[j] == chksm[k])
	{k++;j++; if (k == 8)i = 3;}
		else i = 1;
	break;
	case 3: if(isalpha(s[j])) i = 1; else j++; break;} /*end
switch*/

	if (i !=3) return(0);
	loc_chksum = jj;
@
We next proceed to look for the string |" = \""|. We search for the
final quote character of this string. If we find it, we check if the
preceding characters match up too. In this part of the code, we use
|i| as a flag which we set to one when we find the pattern we are
looking for.
@<Code for ``critical''@>=
	j = jj + 8; i = 0;@/
	while (s[j] != 0)
	{
	if (s[j] != '"') {j++; continue;}
	if( (s[j-1] == ' ') && (s[j-2] == '=') && (s[j-3] == ' '))
	{loc_first_quote_mark = j; i = 1; break;}
	else j++;}
	if(i==0) return(1);
@
In this part of the code, we are looking for the second ``quote
mark''.
@<Code for ``critical''@>=
	j++; i = 0;
	while(s[j] != 0)
	{

if(s[j] == '"'){i=1; loc_second_quote_mark = j; break;}
	j++;}
	if(i==0) return(2); else return(3);


@ We stash the answer from the function |critical| away for future
use. Note that once the answer is positive, we will never call this
function again (because the flag |found_critical_line| will be set.
@<Check if current line...@>=
if( answer_from_critical=critical(current_line))
	 current_line_is_critical =
	found_critical_line = 1;

@ The flag |input_from_pipe| is set to $1$ if we are getting our input
from |stdin|. In that case, it is not possible to save our location in
the input file. Instead, we will store the remainder of the input file
to a temporary file and read that during the second pass.
@<Process critical line@>=
{strcpy(critical_line,current_line);
if((mode == COMPUTING) && !input_from_pipe)
{@<Store current location in input file@>}@/
else if (mode == VERIFY)
{@<Adjust current line@>}@/
}

@
@<Variables local to ...@>=
int i;
@
@<Adjust current line@>=
for(i = 1; i<6;i++) current_line[loc_first_quote_mark  + i] = 'Z';


@
@<Store current location...@>=
loc_critical_line  = ftell(input_file);
@
We write the current line to the output file if it is before the
critical line. If our input is from |stdin|, then we are unable to
rewind the ``input file'' for the second pass. We therefore copy out
the remainder of the input file to a temporary file.
@<Write line to output file...@>=
if(mode == COMPUTING)
{if (!found_critical_line)
fputs(current_line,output_file);
else if( (input_from_pipe) && !(current_line_is_critical))
fputs(current_line,temp_file);}
@
This section updates the various counts. The word count needs a
``home-grown'' function |word_count| which we write next. If we are in
|COMPUTING| mode, we postpone the calculations for the ``critical
line''; they will be handled in the ``end-game'' procedures below.
@<Update counts@>=
if(!(current_line_is_critical) || (mode == VERIFY))
{
lc++;
cc += strlen(current_line);
wc += word_count(current_line);
}
@

@<All functions@>=

unsigned long int word_count(s)
	char *s;@/
	{int i ;
	int in_word  = 0;
	int word_cnt = 0;
	char c;
	for(i = 0; (s[i] != 0) && (s[i] != '\n'); i++)
	{ c = s[i];
	if (c  > ' ' &&  c < 0177) {/*visible ASCII codes */
	if(!in_word) {word_cnt++; in_word++;}
	continue;
	}
	if (c != ' ' && c != '\t') continue;
	in_word = 0;
	}
	return( (unsigned long int)word_cnt);
	}

@

Our checksum can be computed ``a byte at a time''. Thus if the
sequence of bytes in the file is $s_0, s_1, \ldots, s_r$ (where the
$s_i$'s are all of type |unsigned char|, then the checksum will be the
value $t_{r+1}$ where the $t_i$'s are
computed inductively via the equations: $t_0 = 0$; $t_{j+1} =
|add_a_byte_to_checksum(@t$s_j,t_j$@>)|$.

However, we are faced with the following problem. We don't know the
version of the ``critical line'' to be written to the output file
until we have read the whole file. It turns out that we can compute
two auxiliary checksums for the portion of the file that comes after
the critical line; when we reach the end of the file, we can compute
the output file version of the critical line, and then the checksum
for the portion of the file up to and including the critical line.
Then there is a simple procedure (to be explained below) which will
compute the checksum for the whole output file from these three
partial checksums.
@<Update checksums@>=
 if ( mode == VERIFY ||!found_critical_line) {@<Update one
checksum@>}@/
 else
if(!current_line_is_critical) {@<Update two checksums@>}

@
@<Update one checksum@>=
 for(i=0;current_line[i] != 0;i++)@/
 chksum = add_a_byte_to_checksum( (unsigned
int)current_line[i],chksum);
 @
@<Update two...@>=
 for(i=0;current_line[i] != 0;i++)@/
{ auxchksum = add_a_byte_to_checksum( (unsigned int)current_line[i],auxchksum);
auxchksum2 = add_a_byte_to_checksum( (unsigned
int)current_line[i],auxchksum2);
 }

@ We here simply rewind the input
file to the place directly after we discovered the critical line and
write all the following lines to the output file. (If we are receiving
our input from |stdin|, then we ``rewind'' the temporary file and make
that the input file.) The revised version of the critical line has
already been written to the output file by the ``end-game'' portion of
the program.
@<Do second pass on...@>=
if (!input_from_pipe)
fseek(input_file,loc_critical_line,SEEK_SET);
else
{
rewind(temp_file);
fclose(input_file);
input_file = temp_file;
}
while(fgets(current_line,257,input_file) != NULL)
fputs(current_line,output_file);
if (!feof(input_file))
@<Signal error while reading input file and abort@>@/

@* Endgame procedures.

@<Do endgame procedures@>=
if(mode == COMPUTING){@<Do |COMPUTING| endgame procedures@>}@/
else {@<Do |VERIFY| endgame procedures@>}
@
@<Do |COM...@>=
@<Split |critical_line| into two strings@>@/
@<Install the dummy checksum into the critical line.@>@/
@<Compute the final line count and install it.@>@/
@<Compute the final word count and install it@>@/
@<Compute the final character count and install it@>@/
@<Compute the correct checksum and install it@>@/
@<Write the critical line to the output file@>@/
@

A major part of the work of the |COMPUTING| endgame procedures is in
assembling the string which will be the ``critical line'' in the
output file. For this we need a bunch of auxiliary string buffers.

We can use the space that was previously used to hold the ``current
line'' and just use a pointer into this space for one of our temporary
buffers. We will also have a smaller temporary buffer |temp_buf2|
which is used to assemble various ``count fields'' for inclusion in the
new version of the critical line.


@<Variables local to |main|@>=
char *temp_buf;
char temp_buf2[17];

@
We will now split the critical line into two parts. Precisely how we
do this will depend on the value of |answer_from_critical|. But after
this is done, the beginning part of the line will be the buffer
|critical_line|, ending in the four character string |" = \""|. The
portion of the line that will follow the ``second quote'' will be in
the buffer |temp_buf|.

We don't actually delete the right portion of the ``critical line''
from the buffer |critical_line|. Instead, we simply store a null
character at the appropriate place.
@<Split...@>=
temp_buf = current_line;
switch (answer_from_critical) {
@ The first case to consider is when |answer_from_critical| $=0$. This
case ``can't happen'' so we print an appropriate error message and
exit the program.
@<Split...@>=
	case 0:{@<Issue complaint about the bad format of the input file@>}@/
break;
@ For the second case, the line does not contain the pattern |" = \""|
after the word |"checksum"|, We will put the portion of the critical
line after the word |"checksum"| in the buffer |temp_buf|, and place
the string |" = \""| after the end of the word |"checksum"| followed
by a null character.
@<Variables local to |main|@>=
char *quote_pattern = " = \"";
@
@<Split...@>=
	case 1:strcpy(temp_buf,critical_line + (loc_chksum + 8));
	critical_line[loc_chksum + 8] = 0;
	strcat(critical_line,quote_pattern);
	loc_first_quote_mark = loc_chksum + 11;
	break;
@ The next case is when we can find the pattern |" = \""| in the
critical line but not a following ``quote''. In that case, we split at
the quote mark of the pattern |" = \""|.
@<Split...@>=
	case 2:strcpy(temp_buf,critical_line + (loc_first_quote_mark
+1));
	critical_line[loc_first_quote_mark + 1] = 0;
	break;
@ In the final case, we can find both the pattern |" = \""| and a
following ``quote-mark''. We store the portion of the critical line
after the second ``quote-mark'' in |temp_buf| and put a null character
in the buffer |critical_line| after the first ``quote-mark''. This has
the effect of erasing the text that is between the two ``quote-marks''
in the input file. It will be replaced by the new checksum and counts
in the later portions of the program.
@<Split...@>=
	case 3:strcpy(temp_buf,critical_line + (loc_second_quote_mark
+1));
	critical_line[loc_first_quote_mark +1] = 0;
	break;} /* end of switch*/
@
We will next insert the string |"ZZZZZ "| as part of the new
``critical line'' we are building. Later we will have to replace the
string |"ZZZZZ"| by the correct checksum.
@<Variables local to...@>=
char *Zstring = "ZZZZZ ";
char *one_space = " ";
char *quote = "\"";
@
@<Install the dummy...@>=
strcat(critical_line, Zstring);
@
The installation of the final line count is easy. We have to increment
the line count by 1 for the ``critical line''.
@<Compute the final line...@>=
lc++;
sprintf(temp_buf2,"%lu",lc);
strcat(critical_line,temp_buf2);
strcat(critical_line,one_space);
@ Computing the final word count is slightly more involved. We will
add two dummy fields to the critical line and then tack on the text in
|temp_buf|. We will take the ``word count'' of this line and add it to
|wc|.  Then we
erase the material we have added from the line, but add in the new
word count much as we added in the line count in the preceding module.
We need to keep track of our position in the line, wo we define a
variable in which to stash it.
@<Variables local to |main|@>=
int loc_in_line;
char *dummy_field_pattern = "B C\"";
@

@<Compute the final word count...@>=
loc_in_line = strlen(critical_line);
strcat(critical_line,dummy_field_pattern);
strcat(critical_line,temp_buf);
wc += word_count(critical_line);
critical_line[loc_in_line] = 0;
sprintf(temp_buf2,"%lu",wc);
strcat(critical_line,temp_buf2);
strcat(critical_line,one_space);

@
The computation of the final character count involves an iterative
procedure. The problem is that the total character count includes the
number of characters in the character-count itself. A simple analysis
shows the process converges in at most three iterations. (The 1 in the
computation of |cc_guess| comes from the length of the string |"\""|.)

At the end of this module, we splice back the second half
of the critical line which we stashed in |temp_buf|.
@<Variables local to |main|@>=
int char_field_length = 0;
int old_char_field_length;
unsigned long int cc_guess;
@
@<Compute the final character count...@>=
cc += strlen(temp_buf) + strlen(critical_line);
do
{cc_guess = cc + char_field_length + 1;
sprintf(temp_buf2,"%lu",cc_guess);
old_char_field_length = char_field_length;
char_field_length = strlen(temp_buf2);}
while(old_char_field_length != char_field_length);
strcat(critical_line,temp_buf2);
strcat(critical_line,quote);
strcat(critical_line,temp_buf);
@
The function |merge_three_checksums| does the computation of the final
checksum from the three partial checksums alluded to above.
@<Compute the correct checksum...@>=
for(i=0;critical_line[i] != 0; i++)@/
chksum = add_a_byte_to_checksum((unsigned
int)critical_line[i],chksum);@/
chksum = merge_three_checksums(chksum,auxchksum,auxchksum2);
sprintf(temp_buf2,"%05u",chksum);
strncpy(critical_line + (loc_first_quote_mark + 1),temp_buf2,5);
@
@<Write the critical line...@>=
fputs(critical_line,output_file);

@

The verify ``endgame procedures'' consist in comparing the various
computed counts and the checksum with those stored in the critical
lines. If everything checks out, we write a ``success'' message to
|stdout|; if not we write a ``failure'' message.

We need some variables to store the fields extracted from the critical
line. We also need a pointer to use in extracting these fields.


@<Variables local to |main|@>=
unsigned long int stored_wc = 0; /* stored word count*/
unsigned long int stored_cc = 0; /*stored character count*/
unsigned long int stored_lc = 0; /*stored line count*/
CheckSum stored_chksum;
int verify_successful = 1;
#if vms
char *format = "%d %ld %ld %ld"; /* VMS C sscanf() doesn't support u item. */
				/* none of these numbers overflow into the */
				/* sign bit on the VAX, so use %d instead */
#else
char *format = "%u %lu %lu %lu";
#endif
char *position_in_line;
@ The first command positions the pointer |position_in_line| at the
start of the |chksum| field.
@<Do |VERIFY| endgame procedures@>=
if(!found_critical_line)
{printf("The input file did not have the correct format.\n");
exit(EXIT_FAILURE);}
position_in_line = critical_line + loc_first_quote_mark + 1;
if ( 4 != sscanf(position_in_line,format, &stored_chksum, &stored_lc,
	&stored_wc, &stored_cc))
{printf("The input file did not have the correct format.\n");
exit(EXIT_FAILURE);}
verify_successful = (chksum == stored_chksum && wc == stored_wc &&
	lc == stored_lc && cc == stored_cc);
if(verify_successful)
printf("The checksum verification of the input file was successful.\n");
else
printf("The checksum verification of the input file did not succeed.\n");
@* CRC checksums.

	In computing the checksum of the file {\sl outputfile} as
discussed above, we run into the following problem. We will only know
the various counts (such as the word count) after reading the whole
file. A naive implementation of the program would require three passes
through the input file: (1) A pass through the file to compute the
word, character, and line counts. (2) A pass through the file to
compute the CRC-16 checksum. (3) A final pass to copy the input file
to the output file.

However, we will be able to get by with only two  passes through the input
file because of the following feature of the CRC-checksum. Let $s_1$
be a string of bytes with the checksum $c_1$. Let $s_2$ be a string of
bytes, and let $s$ be the concatenation of $s_1$ and $s_2$ in that
order, and let $c$ be the checksum of $s$. Then we will be able to
compute from $s_2$ two summary numbers $a_2$ and $c_2$ (where $c_2$ is
the usual checksum for $s_2$, and $a_2$ is also an integer such that
$0 \leq a_2 < 2^{16}$) such that $c$ can be quickly computed from
$c_1$, $c_2$, and $a_2$.

In this section of the program, we develop the ideas needed to verify
the property of CRC checksums just stated, and write the code for the
various functions needed to compute CRC checksums.

@

For an excellent discussion of the ideas involved in the CRC checksum,
we refer the reader to the paper {\sl A Tutorial in CRC Computations}
by
T.~V.~Ramabadran and S.~S.~Gaitonde which appeared in IEEE Micro vol.
8, no. 4, (August 1988), pp. 62--75.

\def\Zt{{\bf Z}_2}

 $\Zt$ is the two element field. $\Zt[X]$ is the ring of polynomials
in the variable $X$
with coefficients in the field $\Zt$. A crucial role in the CRC-16
algorithm is played by the polynomial $p(X) = X^{16} + X^{15} + X^2 +
1$.

We let $R$ be the quotient ring obtained by dividing $\Zt[X]$ by the
ideal generated by the polynomial $p$. Then $R$ has cardinality
$2^{16}$. Clearly each element of $R$ lies in the same equivalence
class as a unique polynomial of $\Zt[X]$ of degree less than $16$, and
in this way, we identify $R$ with the set of polynomials of degree
less than $16$.

We associate to each integer $x$ such that $0 \leq x < 256$ a
polynomial in $\Zt[X]$ of degree at most $7$ as follows. Let
$a_7\ldots a_0$ be the binary expansion of $x$ (so $0 \leq a_i < 2$
for $0 \leq i < 7$ and $x = \sum_{i=0}^7 a_i 2^i$. Then the polynomial
associated to $x$ is $\sum_{i=0}^7 a_i X^i$.

@ Now consider a string of bytes $s_0\ldots s_r$. (Here the $s_i$'s
may be viewed as integers in the range $0 \leq s_i < 256$. We wish to
associate to this sequence an element of $R$ which will be its
checksum.

We do this as follows. Let $p_i$ be the polynomial associated to $s_i$
as discussed in the preceding paragraph. Let $p$ be the polynomial
$\sum_{i=0}^r p_i X^{8(r-i)}$. There is a canonical homomorphism of
$\Zt[X]$ onto $R$. The checksum of the sequence $s_0\ldots s_r$ is the
image of $p$ in $R$.

The following way of looking at $p$ may make its definition seem more
natural. View the string of $r+1$ bytes $s_0\ldots s_r$ as a string of
$8(r+1)$ bits $b_0 \ldots b_{8r+7}$ by replacing each byte $s_i$ by
its binary expansion $s_{i,7} \ldots s_{i,0}$. To this string of bits
we associate the polynomial $\sum_{i=0}^{8r+7} b_i X^{(8r + 7) - i}$.

This allows the following inductive definition of the checksum. Let
$t_0 = 0$. Let $t_{i+1}$ be the canonical image in $R$ of $X^8 \cdot
t_i + p_{i+1}$. Then $t_{i+1}$ is easily checked to be the checksum of
the sequence $s_0 \ldots s_i$.

@ We can identify $R$ with the set of integers $x$ such that $0 \leq x
< 2^{16}$ as follows. Let $r \in R$. Then $r$ is the image of a unique
polynomial of degree at most 15 in $\Zt[X]$, say $\sum_{i=0}^{15} a_i
X^i$. We identify $r$ with the integer $\sum_{i=0}^{15} a_i 2^i$.

Suppose then that $r \in R$ and that $s$ is an unsigned byte. We wish
to indicate how we will implement a function that will quickly compute
the element of $R$ that corresponds to $X^8 \cdot r + s$.

The first point to make is that via our identification of $R$ with
unsigned integers in the range $0 \leq x < 2^{16}$, the operation of
addition in $R$ corresponds to the bit-wise exclusive {\sl or\/} (rendered in
C as |^|).

Second, if we write a typical element $r$ in $R$ as a polynomial of
degree less than $16$ in $\Zt[X]$, and then write this polynomial as
$r_1 X^8 + r_2$ where $r_1$ and $r_2$ are polynomials of degree at
most $7$ in $\Zt[X]$, then $r_1$ and $r_2$ correspond respectively to
the high-order byte and low-order byte of the unsigned integer
corresponding to $r$.

We will precompute a table |crc_table[i]| (where $0 \leq i < 256$ such
that if $p$ is the polynomial of degree at most $7$ corresponding to
$i$, then $|crc_table|[i]$ gives the unsigned integer corresponding to
the element of $R$ which is the homomorphic image of $X^{16}\cdot p$.

In these terms, if we add the byte |new_byte| to the |old_checksum| wth
high-order byte $r_1$ and low order byte $r_2$, the resulting
element of $R$ will be $|crc_table|(r_1) + X^8\cdot r_2 + |new_byte|$.
To complete the explanation of the code that follows we need merely
note that the operation of multiplying by $X^8$ corresponds to a left
shift by 8 bits.
@
We now begin writing the code for the |add_a_byte_to_checksum|
function. A key role is played by the following table. This table is
computed in the program described in the CWeb file {\tt polynom.w}
which is enclosed with this distribution.
@<Global variables@>=
static CheckSum crc_table[] =
{
0x0000, 0x8005, 0x800f, 0x000a, 0x801b, 0x001e, 0x0014, 0x8011,
0x8033, 0x0036, 0x003c, 0x8039, 0x0028, 0x802d, 0x8027, 0x0022,
0x8063, 0x0066, 0x006c, 0x8069, 0x0078, 0x807d, 0x8077, 0x0072,
0x0050, 0x8055, 0x805f, 0x005a, 0x804b, 0x004e, 0x0044, 0x8041,
0x80c3, 0x00c6, 0x00cc, 0x80c9, 0x00d8, 0x80dd, 0x80d7, 0x00d2,
0x00f0, 0x80f5, 0x80ff, 0x00fa, 0x80eb, 0x00ee, 0x00e4, 0x80e1,
0x00a0, 0x80a5, 0x80af, 0x00aa, 0x80bb, 0x00be, 0x00b4, 0x80b1,
0x8093, 0x0096, 0x009c, 0x8099, 0x0088, 0x808d, 0x8087, 0x0082,
0x8183, 0x0186, 0x018c, 0x8189, 0x0198, 0x819d, 0x8197, 0x0192,
0x01b0, 0x81b5, 0x81bf, 0x01ba, 0x81ab, 0x01ae, 0x01a4, 0x81a1,
0x01e0, 0x81e5, 0x81ef, 0x01ea, 0x81fb, 0x01fe, 0x01f4, 0x81f1,
0x81d3, 0x01d6, 0x01dc, 0x81d9, 0x01c8, 0x81cd, 0x81c7, 0x01c2,
0x0140, 0x8145, 0x814f, 0x014a, 0x815b, 0x015e, 0x0154, 0x8151,
0x8173, 0x0176, 0x017c, 0x8179, 0x0168, 0x816d, 0x8167, 0x0162,
0x8123, 0x0126, 0x012c, 0x8129, 0x0138, 0x813d, 0x8137, 0x0132,
0x0110, 0x8115, 0x811f, 0x011a, 0x810b, 0x010e, 0x0104, 0x8101,
0x8303, 0x0306, 0x030c, 0x8309, 0x0318, 0x831d, 0x8317, 0x0312,
0x0330, 0x8335, 0x833f, 0x033a, 0x832b, 0x032e, 0x0324, 0x8321,
0x0360, 0x8365, 0x836f, 0x036a, 0x837b, 0x037e, 0x0374, 0x8371,
0x8353, 0x0356, 0x035c, 0x8359, 0x0348, 0x834d, 0x8347, 0x0342,
0x03c0, 0x83c5, 0x83cf, 0x03ca, 0x83db, 0x03de, 0x03d4, 0x83d1,
0x83f3, 0x03f6, 0x03fc, 0x83f9, 0x03e8, 0x83ed, 0x83e7, 0x03e2,
0x83a3, 0x03a6, 0x03ac, 0x83a9, 0x03b8, 0x83bd, 0x83b7, 0x03b2,
0x0390, 0x8395, 0x839f, 0x039a, 0x838b, 0x038e, 0x0384, 0x8381,
0x0280, 0x8285, 0x828f, 0x028a, 0x829b, 0x029e, 0x0294, 0x8291,
0x82b3, 0x02b6, 0x02bc, 0x82b9, 0x02a8, 0x82ad, 0x82a7, 0x02a2,
0x82e3, 0x02e6, 0x02ec, 0x82e9, 0x02f8, 0x82fd, 0x82f7, 0x02f2,
0x02d0, 0x82d5, 0x82df, 0x02da, 0x82cb, 0x02ce, 0x02c4, 0x82c1,
0x8243, 0x0246, 0x024c, 0x8249, 0x0258, 0x825d, 0x8257, 0x0252,
0x0270, 0x8275, 0x827f, 0x027a, 0x826b, 0x026e, 0x0264, 0x8261,
0x0220, 0x8225, 0x822f, 0x022a, 0x823b, 0x023e, 0x0234, 0x8231,
0x8213, 0x0216, 0x021c, 0x8219, 0x0208, 0x820d, 0x8207, 0x0202,
};



@

The following is the function definition for |add_a_byte_to_checksum|.


@<All functions@>=

CheckSum add_a_byte_to_checksum(new_byte,oldCheckSum)
	unsigned int new_byte;
	CheckSum oldCheckSum;
	{
	CheckSum u;
	u = (CheckSum)new_byte;
	oldCheckSum = oldCheckSum & 0xffff;
	u = ((oldCheckSum <<8) & 0xffff) ^ u;
	u = u ^ crc_table[(oldCheckSum >> 8) & 0xff];
	return(u);
}
@ We next wish to indicate the trick that allows us to compute the
checksum in one pass through the input file rather than two.

First let $b$ be an unsigned char. Then $b$ determines a function
$f_b: R \to R$ as follows: $f_b(r) = X^8\cdot r + b$.

More generally let $a$ and $b$ be elements of $R$. Then these
determine a map $g_{a,b}:R \to R$ via the stipulation $$g_{a,b}(x) =
a\cdot x + b.$$

Let $G$ be the set of all maps from $R$ to $R$ obtained in this way
(for various choices of $a$ and $b$.) Then clearly $f_b$ is of this
form, and the collection of maps of this form is readily seen to be
closed under composition.

Let $g \in G$. Then $g$ is completely determined by the values of
$g(0)$ and $g(1)$. Indeed, $g(0)=b$ and $g(1)= a + b$. Remembering
that in $R$, $-1=1$, we see that $a = g(0) + g(1)$.

@ Our strategy for determining the checksum in one pass should now be
clear. Let $s_0,\ldots s_r$ be the sequence of bytes that correspond
to the lines of the output file up to and including the critical line.
Let $s_{r+1}\ldots s_t$ be the remaining bytes of the input file.
These will determine a function $g \in G$ such that if $c$ is the
checksum determined by $s_0\ldots s_r$, then $g(c)$ will be the
desired final checksum. To compute $g$ it suffices to compute $g(0)$
and $g(1)$. These are the numbers we are computing in the variables
|auxchksum| and |auxchksum2|.

	It remains to derive the formula for $g(c)$ in terms of $c$,
$g(0)$, and $g(1)$. But this is easy. We know $g(c) = a \cdot c + b$
and $a= g(0) + g(1)$, $b = g(0)$. So $g(c) = c\cdot(g(0) + g(1)) + g(0)$.

@
We need some code to multiply two polynomials in the ring $R$. Since
there will be precisely one such multiplication in the course of a run
of the program, we opt for straightforward coding rather than blinding
speed.

The first part of the code puts the $\Zt$ polynomial of degree at
most $31$ which is the product of the two $\Zt$ polynomials
corresponding to the inputs $u$ and $v$ in the array |w_array| .
@<All functions@>=
CheckSum MultiplyTwoPolys(u,v)
	CheckSum u,v;@/
	{
	CheckSum r = 0;
	int u_array[SIZE];
	int v_array[SIZE];
	int  w_array[SIZE+SIZE];
	int i,j;
	CheckSum s[4];
	for(i=0;i<SIZE;i++)
	{u_array[i] = u%2; u = u/2;
	v_array[i] = v%2; v = v/2;}
	for (i=0;i < (SIZE+SIZE);i++)
	w_array[i]=0;
	for (i = 0; i < SIZE;i++)
	for(j=0;j< SIZE; j++)
	w_array[i+j] +=  (u_array[i]*v_array[j]);
	for(i = 0; i < SIZE + SIZE; i++)
	w_array[i] = w_array[i]%2;
@
Our next task is to store the array |w_array| as an array of four
``bytes'', |s[4]|.
@<All functions@>=
for (i = 0; i < 4; i++)
{s[i] = 0;
for(j=0;j<8;j++)
s[i] = (s[i]*2) + w_array[(8*i)+(7-j)];}

@
Finally we have to reduce this polynomial to one of degree at most 15
that is its canonical image in $R$. The table |crc_table| has all the
needed information to do this expeditiously.
@<All functions@>=
for(i=3;i>1;i--)
{s[i-1] = s[i-1] ^ ((crc_table[s[i]] >> 8) & 0xff);
s[i-2] = s[i-2] ^ ((crc_table[s[i]]) & 0xff);}
	r = (s[1]<<8)^s[0];
	return(r);
}


@

The motivation for the following function is contained in the
preceding discussion.

@<All functions@>=

CheckSum merge_three_checksums(s,t,u)
	CheckSum s,t,u;
	{
	CheckSum w;
	w = t ^ u;
	w = MultiplyTwoPolys(w,s);
	return(w^t);
	}

@* Input and output.

This section contains code concerned with opening and closing files.

@ We next have to open the input file (and if |mode == COMPUTING| the
output file). We will also need a pointer to a  temporary file.

We also will need a flag |input_from_pipe| which we will set to $1$ if
the input is coming from stdin.
@<Variables local to |main|@>=
FILE *input_file, *output_file, *temp_file;

int input_from_pipe = 0;
@ If the argument for the input file is missing or the string |"-"| is
used, then the input is taken from |stdin|. Simarly, if the argument
for the output file is missing or the string |"-"| is used, then the
output is sent to |stdout|.
@<Open the input and output files@>=
if  (argc == 0) {@<Input file is stdin@>}@/
else@#
@t\quad@>@+ if (!(strcmp(argv[0],"-"))){argc--;argv++;{@<Input file is
stdin@>}}@/
@t\quad@>@+else
{
if((input_file = fopen(argv[0],"r"))== NULL){@<Handle error while
opening input file@>}@/
@t\quad\quad\quad\quad@>@+else{@+argc--;argv++;@+}@+}
@
Now we open the output file.
@<Open the input and...@>=
if ((argc == 0) || !(strcmp(argv[0],"-"))) output_file = stdout;@/
else {if((output_file = fopen(argv[0],"w"))== NULL)@<Handle error while
		      opening output file@>}@/
@ We have to set the flag that tells us that the input is coming from
|stdin|. In addition, if the mode is |COMPUTING|, we must open a
temporary file. Finally, we have to make the |input_file| pointer
equal to |stdin|.
@<Input file is stdin@>=

input_from_pipe = 1;@/
if(mode == COMPUTING)
{if ((temp_file = tmpfile()) == NULL) {@<Error message about opening
temporary file@>}}@;@/
input_file = stdin;
@
@<Close all files@>=
fclose(input_file); if (mode == COMPUTING) fclose(output_file);

@* Error Messages.

This section contains the various error messages we send when things
go wrong.


@
@<Complain about...@>=
{status += usage_error;
fprintf(stderr,"%s: Command line has incorrect format.\n", prog_name);
fprintf(stderr,"%%  checksum  inputfile outputfile\n");
fprintf(stderr, "     is the format for installing a checksum in a \
file.\n");
fprintf(stderr, "%% checksum -v inputfile\n");
fprintf(stderr," is the format for verifying an installed \
checksum.\n");
exit(EXIT_FAILURE);}
@
@<Handle error while opening input...@>=
{status += cannot_open_read_file;
fprintf(stderr,"%s: cannot open input file %s\n", prog_name, *argv);
exit(EXIT_FAILURE);
}
@
@<Error message...@>=
{status += cannot_open_temporary_file;
fprintf(stderr,"%s: cannot open temporary file.\n",prog_name);
exit(EXIT_FAILURE);}
@
@<Handle error while opening output...@>=
{status += cannot_open_write_file;
fprintf(stderr,"%s: cannot open output file %s\n", prog_name, *argv);
exit(EXIT_FAILURE);
}
@
@<Signal error while reading input file and abort@>=
{status += error_reading_input_file;
fprintf(stderr, "%s: Error encountered while reading input file.\n",
prog_name); exit(EXIT_FAILURE);}
@
@<Issue ...@>=
{status += usage_error;
fprintf(stderr, "The input file did not have the correct format.\n");
fprintf(stderr, "It should contain a line which has the word \
\"checksum\"\n");
fprintf(stderr,"and no other alphabetic characters.\n");
exit(EXIT_FAILURE);}
