# To unbundle, sh this file echo README 1>&2 sed 's/.//' >README <<'//GO.SYSIN DD README' - -The indexing programs here are modified from the versions printed in -Bentley & Kernighan, EP-ODD V1 #1. The programs are also described in -AT&T Bell Laboratories Computing Science Technical Report No. 128, -``Tools for Printing Indexes''. - -Changes from the published version derive from further experience -after the paper was frozen, plus some cleanup and corrections by Joe -Kruskal (to whom many thanks), plus some very local features for -printing the AMPL book. - - -USING THE PROGRAMS - -install - makes the appropriate files executable. since this file - is not executable, use by typing "sh foo.ix - troff -ms foo.ix >foo.out - and then examine the troff output foo.out -cleanup - removes the garbage files left around for debugging - - -CHANGES FROM THE PAPER - -make.index - handles "see" file see.terms. A line like - algorithmssearching, sorting - generates in the final index - algorithms see searching, sorting - a 3rd field of %also makes it - algorithms see also searching, sorting -doclean -deroman -range.prep - minor change to defend against bug in some versions of "sort" -rotate - moved here (and changed as necessary) to remove subtle bug. - see check.data below -range.sort - -u option on sort removes duplicate entries on same page -range.collapse -reroman - page number concatenation removed from here ... -num.collapse - and moved to here. also commas between numbers now - inserted here (to make see terms easier) -gen.key - literals protected differently in gsub commands. - rules for non-alpha index terms slightly richer: - purely nonalphabetic lines first - lines with leading digits next - ordinary lines last -see.prep - changed to match changes above, and to rely on font-changing {} -final.sort - uses -d option for "telephone directory" order. -hierarchy - a rather special purpose version to replace runs of items - with a common one or two word prefix and replace them by - a head word and indented lines. - this also does some rearrangement to bring see terms to the top, - and terms with formatting info to the bottom; this is not - always the right thing to do. -format - letter changes (.YY) determined by first letter. - minor rearrangement of how output line is created. - commas no longer added here. - [Some systems have a disk-formatting program called format.] -check.data - new program that catches subtle errors in the data //GO.SYSIN DD README echo ix.macro 1>&2 sed 's/.//' >ix.macro <<'//GO.SYSIN DD ix.macro' -.de ix -.ie '\\n(.z'' .tm ix: \\$1 \\$2 \\$3 \\$4 \\$5 \\$6 \\$7 \\$8 \\$9 \\n% -.el \\!.ix \\$1 \\$2 \\$3 \\$4 \\$5 \\$6 \\$7 \\$8 \\$9 -.. //GO.SYSIN DD ix.macro echo cleanup 1>&2 sed 's/.//' >cleanup <<'//GO.SYSIN DD cleanup' -#!/bin/sh - -rm foo[1-9] foo.* //GO.SYSIN DD cleanup echo deroman 1>&2 sed 's/.//' >deroman <<'//GO.SYSIN DD deroman' -awk ' # deroman -# Input: string (tab) [arab or roman] -# Output: string (tab) [arab] - -# Roman numeral n is replaced by arab n-1000 (e.g., iii -> -997) -BEGIN { FS = OFS = "\t" - # set a["i"] = 1, a["ii"] = 2, ... - s = "i ii iii iv v vi vii viii ix x" - s = s " xi xii xiii xiv xv xvi xvii xviii xix xx" - s = s " xxi xxii xxiii xxiv xxv xxvi xxvii xxviii xxix xxx" - n = split(s, b, " ") - for (i = 1; i <= n; i++) a[b[i]] = i - } -$2~/^[ivxlc]+$/ { if ($2 in a) $2 = -1000 + a[$2] - else print "deroman: bad number: " $0 | "cat 1>&2" - } - { print } -' $* //GO.SYSIN DD deroman echo doclean 1>&2 sed 's/.//' >doclean <<'//GO.SYSIN DD doclean' -#!/bin/sh - -awk ' # doclean - -# Input: "number tab IX string -# 107 IX self-reference #1186 - -# 281 TL APPENDIX A AMPL Reference Manual #26 - -# Output: string (tab) number -# excess spaces are removed output string -# note reversal of order; rest of programs expect it - -# This contains some special pleading for the AMPL book - -BEGIN { FS = OFS = "\t" } - -/\t(TL|H1|H2|H3|LASTPAGE)/ { next } # zap expected noise - -$0 !~ /[0-9ixv]+\tIX / { - print "doclean: non index line: " $0 | "cat 1>&2"; next -} - -{ sub(/IX +/, "", $2) # zap "IX " - sub(/ +#[0-9]+ .*$/, "", $2) # zap trailing blanks, slug, file - gsub(/ +/, " ", $2) # compress internal blanks - print $2, $1 # item (tab) page number -} -' $* //GO.SYSIN DD doclean echo final.sort 1>&2 sed 's/.//' >final.sort <<'//GO.SYSIN DD final.sort' -# final.sort -# Input/Output: sort key (tab) string (tab) numlist -# Sort by $1 (string) - -sort -t' ' +0fd -1 +0f -1 $* //GO.SYSIN DD final.sort echo format 1>&2 sed 's/.//' >format <<'//GO.SYSIN DD format' -awk ' # format -# Input: sort key (tab) string (tab) numlist -# Output: troff format, commands interpreted - -BEGIN { FS = "\t" - s = "ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz " - # set upper["a"] = "A" - for (i = 1; i <= 27; i++) upper[substr(s,i+27,1)] = substr(s,i,1) - # set lower["a"] = lower["A"] ="a" - for (i = 1; i <= 27; i++) { - lower[substr(s,i,1)] = substr(s,i+27,1) - lower[substr(s,i+27,1)] = substr(s,i+27,1) - } -} -{ # mark change between letters with .YY - # find first non-punctuation char - for (i = 1; (c = substr($1,i,1)) != ""; i++) - if (c ~ /[a-zA-Z0-9 ]/) - break - this = c - if (!(this in lower)) lower[this] = " " - this = lower[this] - if (this != last) # && this != " ") - print ".YY", this, upper[last=this] - quoted = 0 - - # interpret font change language - - if ($2 ~ /^ /) # different macro, to avoid bad breaks in hier - print ".ZZ" - else - print ".XX" - if (NF == 3) - $0 = $2 "," " " $3 # discard sort key, leave term .. numlist - else - $0 = $2 - - if ($0 ~ /%/) { - quoted = 1 - gsub(/%%/, "QQ0QQ", $0) - gsub(/%\[/, "QQ1QQ", $0) - gsub(/%\]/, "QQ2QQ", $0) - gsub(/%\{/, "QQ3QQ", $0) - gsub(/%\}/, "QQ4QQ", $0) - gsub(/%~/, "QQ5QQ", $0) - } - gsub(/%e/, "\\e", $0) # %e -> \e - gsub(/~/, " ", $0) # unpaddable spaces go away at last - if (gsub(/\[/, "\\\\&\\f(CW", $0)) - gsub(/\]/, "\\fP", $0) - if (gsub(/\{/, "\\f2", $0)) - gsub(/\}/, "\\fP", $0) - if (quoted) { - gsub(/%/, "", $0) - gsub(/QQ0QQ/, "%", $0) - gsub(/QQ1QQ/, "[", $0) - gsub(/QQ2QQ/, "]", $0) - gsub(/QQ3QQ/, "{", $0) - gsub(/QQ4QQ/, "}", $0) - gsub(/QQ5QQ/, "~", $0) - } - printf "\\&%s\n", $0 -} -' $* //GO.SYSIN DD format echo gen.key 1>&2 sed 's/.//' >gen.key <<'//GO.SYSIN DD gen.key' -awk ' # gen.key -# Input: Each input line has one of the following two forms: -# string (tab) numlist -# string " %key " sort.key (tab) numlist -# Output: Each output line has the form: -# sort.key (tab) string (tab) numlist - -BEGIN { FS = OFS = "\t" } - -/ %key / { # use sort.key if it is provided - i = index($1, " %key ") - print substr($1, i+6), substr($1, 1, i-1), $2 - next - } - - { # generate sort.key (in $2, by modifying string) if it is not provided - $3 = $2 - $2 = $1 - - #Modify sort.key - # Remove some troff commands - gsub(/\\f\(..|\\f.|\\s[+-][0-9]|\\s[0-9][0-9]?/, "", $2) - - # underscore -> 0, so "foo_gorp" sorts before "food" - gsub(/_/, "0", $2) - - # quote character is %, space character is ~ - quoted = 0 - if ($2 ~ /%/) { # hide quoted literals in Q - quoted = 1 - gsub(/%%/, "QQ0QQ", $2) - gsub(/%\[/, "QQ1QQ", $2) - gsub(/%\]/, "QQ2QQ", $2) - gsub(/%\{/, "QQ3QQ", $2) - gsub(/%\}/, "QQ4QQ", $2) - gsub(/%~/, "QQ5QQ", $2) - } - gsub(/%e/, "\\", $2) # implement troff escape - gsub(/~/, " ", $2) # remove tildes - gsub(/[%\[\]\{\}]/, "", $2) # remove % and font-changing []{} - if (quoted) { # restore literals but without escape charcter - gsub(/QQ0QQ/, "%", $2) - gsub(/QQ1QQ/, "[", $2) - gsub(/QQ2QQ/, "]", $2) - gsub(/QQ3QQ/, "{", $2) - gsub(/QQ4QQ/, "}", $2) - gsub(/QQ5QQ/, "~", $2) - } - if ($2 ~ /^[^a-zA-Z]+$/) # purely nonalphabetic lines go first - $2 = " " $2 - else if ($2 ~ /^[0-9]/) # lines with eading digits come next - $2 = " " $2 - # otherwise whatever final.sort does -} - - { print $2, $1, $3 } -' $* //GO.SYSIN DD gen.key echo ix.test 1>&2 sed 's/.//' >ix.test <<'//GO.SYSIN DD ix.test' -ix: line 1 -ix: line 2 -ix: line 2 -ix: line 3 -ix: %begin line 11 -ix: line 15 -ix: %end line 19 -ix: \f(CWps\fP \f(CW-a\fP 34 -ix: \f(CWps\fP command 34 -ix: \f(CWPS1\fP shell variable 36 -ix: sorting~a book~index 25 -ix: [pr]~[-]{n} command 31 -ix: [%[^]...[%]] regular~expression 32 -ix: [printf] [%%d] specification 35 -ix: T\v'.17m'\h'-.12m'E\h'-.12m'\v'-.17m'X %key TEX 40 -ix: CONTENTS 7 Input and Output -ix: [%~] tilde 41 //GO.SYSIN DD ix.test echo make.index 1>&2 sed 's/.//' >make.index <<'//GO.SYSIN DD make.index' -#!/bin/sh - -#check.data $* - -doclean $* >foo1 -deroman foo1 >foo2 -range.prep foo2 >foo3 -rotate foo3 >foo4 -range.sort foo4 >foo5 -range.collapse foo5 >foo6 -reroman foo6 >foo7 -num.collapse foo7 >foo8 -gen.key foo8 >foo9 -see.prep see.terms | gen.key >foo.see -final.sort foo9 foo.see >foo.regular - -hierarchy foo.regular >foo.hier - -format foo.hier >foo.all - -cat foo.all - -#cleanup //GO.SYSIN DD make.index echo num.collapse 1>&2 sed 's/.//' >num.collapse <<'//GO.SYSIN DD num.collapse' -awk ' # num.collapse -# Input: lines of form: string (tab) num1 [(space) num2] -# Output: lines of form: string (tab) fancy.num.list -# -# fancy.num.list contains items, separated by ", ", of form: num or num-num -# Sequence of input lines with same value of string is combined -# into a single output line. Each input line contributes either -# num or num-num to output line. - -BEGIN { FS = OFS = "\t" } - - { sub(/ /, "\\(en", $2) } # use - if there is no en dash - -$1 != p { p = $1 - if (NR > 1) printf "\n" - printf "%s\t%s", $1, $2 - next - } - { printf ", %s", $2 } -END { if (NR > 0) printf "\n" } -' $* //GO.SYSIN DD num.collapse echo range.collapse 1>&2 sed 's/.//' >range.collapse <<'//GO.SYSIN DD range.collapse' -awk ' # range.collapse -# Input: lines of form: string (tab) ["b"|"e"|"a"] (tab) number -# Output: lines of form: string (tab) num [(space) num] -# In sequence of lines with same value of string: -# b line and following e line are combined into single line: -# string (tab) num num -# a line disappears if between paired b and e -# a line otherwise becomes single line: -# string (tab) num - -function error(s) { - print "range.collapse: " s " near pp " rlo "-" rhi | "cat 1>&2" -} -function printoldrange() { - if (range == 1) { error("no %end for " term); rhi = "XXX" } - if (NR > 1) { - if (rlo == rhi) - print term, rlo - else - print term, (rlo " " rhi) - } - rlo = rhi = $3 # bounds of current range -} - -BEGIN { FS = OFS = "\t" } -$1 != term { printoldrange(); term = $1; range = 0 } -$2 == "e" { if (range == 1) { range = 0; rhi = $3 } - else { printoldrange(); error("no %begin for " term); rlo = "XXX" } - next - } -$3 <= rhi + 1 { rhi = $3} -$3 > rhi + 1 { if (range == 0) printoldrange() } -$2 == "b" { if (range == 1) error("multiple %begin for " term); range = 1 } -END { if (NR == 1) NR = 2; printoldrange() } -' $* //GO.SYSIN DD range.collapse echo range.prep 1>&2 sed 's/.//' >range.prep <<'//GO.SYSIN DD range.prep' -awk ' # range.prep -# Input: ["%begin"|"%end"|""] string (tab) number -# Output: string (tab) ["b"|"e"|"a"] (tab) number - -BEGIN { FS = OFS = "\t" } - { f2 = "a" } -$1 ~ /^%begin/ { f2 = "b"; sub(/^%begin */, "", $1) } -$1 ~ /^%end/ { f2 = "e"; sub(/^%end */, "", $1) } - { print $1, f2, $2 } -' $* //GO.SYSIN DD range.prep echo range.sort 1>&2 sed 's/.//' >range.sort <<'//GO.SYSIN DD range.sort' -# range.sort -# Input/Output: string (tab) ["b"|"e"|"a"] (tab) number -# Sort by $1 (string), $3 (number), then $2 ("b"|"e"|"a") - -sort -u '-t ' +0 -1 +2n +1 -2 $* //GO.SYSIN DD range.sort echo reroman 1>&2 sed 's/.//' >reroman <<'//GO.SYSIN DD reroman' -awk ' # reroman -# Output: string (tab) arab1 [(space) arab2] -# Input: string (tab) arab1 or roman1 [(space) arab2 or roman2] - -BEGIN { FS = OFS = "\t" - # set a[1] = "i", a[2] = "ii", ... - s = "i ii iii iv v vi vii viii ix x" - s = s " xi xii xiii xiv xv xvi xvii xviii xix xx" - s = s " xxi xxii xxiii xxiv xxv xxvi xxvii xxviii xxix xxx" - split(s, a, " ") - } -$2 < 0 { n = split($2, b, " ") - for (i = 1; i <= n; i++) { - if (b[i] >= 0) continue - j = 1000 + b[i] - if (j in a) b[i] = a[j] - else print "reroman: bad number: " $0 | "cat 1>&2" - } - $2 = b[1] - if (n > 1) $2 = b[1] " " b[2] - } - { print } -' $* //GO.SYSIN DD reroman echo rotate 1>&2 sed 's/.//' >rotate <<'//GO.SYSIN DD rotate' -awk ' # rotate -# Input line: string (tab) ["b"|"e"|"a"] (tab) number -# Output several lines: -# string (tab) ["b"|"e"|"a"] (tab) number -# rotated string (tab) ["b"|"e"|"a"] (tab) number -# rotated string (tab) ["b"|"e"|"a"] (tab) number -# ... -# -# In the output strings, tildes are replaced by spaces - -BEGIN { FS = OFS = "\t" } - -/ %key / { # if explicit sort.key is provided, do not rotate - print $0 - next - } - - { - t1 = $1 #t1 will be $1 with tildes changed to spaces - gsub(/%~/, "QQ5QQ", t1) #hide real tildes - gsub(/~/, " ", t1) #change tildes to spaces - gsub(/QQ5QQ/, "%~", t1) #restore real tildes - print t1, $2, $3 - i = 1 - while ((j = index(substr($1, i+1), " ")) > 0) { - i += j - printf("%s, %s\t%s\t%s\n", \ - substr(t1, i+1), substr(t1, 1, i-1), $2, $3) - } - } -' $* //GO.SYSIN DD rotate echo see.prep 1>&2 sed 's/.//' >see.prep <<'//GO.SYSIN DD see.prep' -#!/bin/sh - -awk ' # see.prep -# Input: string "\t" string -# Output: string "\t{see [also]} " string - -BEGIN { FS = "\t" } -$3 ~ /%also/ { print $1 "\t{see also} " $2; next } - { print $1 "\t{see} " $2 } -' $* //GO.SYSIN DD see.prep echo hierarchy 1>&2 sed 's/.//' >hierarchy <<'//GO.SYSIN DD hierarchy' -#!/bin/sh - -# input: -# key (tab) string (tab) page numbers -# command command 123 -# command, data command, [data] 11 -# command, display command, [display] 11, 54, 63, 75 -# command, model command, [model] 11 -# command, quit command, [quit] 5, 16 -# output: -# key (tab) string (tab) page numbers -# key command 123 -# key [data] 11 -# key [display] ... -# key [model] ... -# key [quit] ... - -awk ' -BEGIN { FS = OFS = "\t" } - -{ line[NR] = $0; x[NR] = $2 "\t" $3; y[NR] = $1 } - -# find a sequence that have the same prefix -# dump prefix, then each instance with spaces instead of prefix -END { - for (i = 1; i <= NR; i = j+1) { - j = findrun(i) # returns last elem of run - if (j > i) - printrun(i, j) - else - print y[i], x[i] - } -} - -function findrun(s, j, p, np) { # find y[s],y[s+1]... with same prefix - p = prefix(y[s]) - np = length(p) - for (j = s+1; j <= NR; j++) { - if (y[j] == p) # same, so include - continue - if (index(y[j], p) != 1) # no match - break - c = substr(y[j], np+1, 1) - if (c != " " && c != ",") # has to be whole word prefix - break - } - return j-1 -} - -function prefix(s, n) { # find 1st word of s: same sort key, minus , - gsub(/,/, "", s) - n = index(s, " ") - if (n > 0) - return substr(s, 1, n-1) - else - return s -} - -function printrun(s, e, i) { # move [...] to end, "see" to front - s1 = 0; e1 = 0; p1 = 0; i1 = 0 - for (i = s; i <= e; i++) { - if (x[i] ~ /{see/) { # see, see also - sx[s1] = x[i] - sy[s1] = y[i] - s1++ - } else if (x[i] ~ /^\[/) { # prefix word is [...] - px[p1] = x[i] - py[p1] = y[i] - p1++ - } else if (x[i] ~ /\[.*\]/) { # [...] somewhere else - ex[e1] = x[i] - ey[e1] = y[i] - e1++ - } else { # none of the above - ix[i1] = x[i] - iy[i1] = y[i] - i1++ - } - } - if (e-s+1 != s1 + p1 + i1 + e1) print "oh shit" >"/dev/stderr" - - for (i = 0; i < s1; i++) # "see", one/line - print sy[i], sx[i] - if (i1 > 1) - printgroup(ix,iy,0,i1) # non [...] items - else if (i1 == 1) - print iy[0], ix[0] - if (e1 > 1) - printgroup(ex,ey,0,e1) # prefix [...] items - else if (e1 == 1) - print ey[0], ex[0] - # for (i = 0; i < p1; i++) # [prefix] ... items - # print py[i], px[i] - if (p1 > 1) - printgroup(px,py,0,p1) # [prefix] ... items - else if (p1 == 1) - print py[0], px[0] -} - -function printgroup(x, y, s, e, i, j) { - split(x[s], f23) - if (split(f23[1], temp, " ") > 1) { - pfx = temp[1] " " temp[2] # 2-word prefix - for (i = s+1; i < e; i++) { - if (index(x[i], pfx) != 1) - break - c = substr(x[i], length(pfx)+1, 1) - if (c != " " && c != ",") # has to be whole word prefix - break - } - if (i == e) { - # print "got a run with", pfx - sub(/ /, "@", f23[1]) - for (i = s; i < e; i++) - sub(/ /, "@", x[i]) # take @ out later - } - } - n = sub(/,?[ ~]+.*/, "", f23[1]) # zap rest of line - - sub(/,$/, "", f23[1]) - if (n > 0) { # some change, so not a single word - sub(/@/, " ", f23[1]) - print y[s], f23[1] # print main entry - } - for (j = s; j < e; j++) { - split(x[j], f23) - sub(/^[^, ]+[, ]+/, " ", f23[1]) - sub(/@/, " ", f23[1]) - print y[s], f23[1], f23[2] - } -} - -' $* //GO.SYSIN DD hierarchy echo see.terms 1>&2 sed 's/.//' >see.terms <<'//GO.SYSIN DD see.terms' -soft constraints penalty function -[>>> <<<] error messages -omitted data value default symbol -data value, omitted default symbol -["].\|.\|.["], ['].\|.\|.['] quotes -[..] arithmetic progression -[=] attribute defined variables -[.body] constraint body -built-in function function -Cartesian product cross product -conditional expression logical expression, [if]-[then]-[else] -conditional declaration [if] indexing expression -[.dat] file file -[.dual] dual value -Lagrange multiplier dual variables -index dummy index -iterated operator operator -comparison operators relational operators -[maximize] objective declaration -[minimize] objective declaration -model file file -[.mod] file file -price, shadow dual value -problem model %also -[.rc] reduced cost -data reset [reset] [data] command -scaling along arc loss in network flow -scope dummy index %also -set of sets indexed collection -shadow price dual value -marginal~value dual value -simplification of expression presolve %also -starting guess initial values of variables -[subject] [to] constraint declaration -[s.t.] constraint declaration -symmetric difference [symdiff] -set difference [diff], [symdiff] -syntax error error messages -messages error messages -space requirement memory use -viewing data [display] command -[;] semicolon statement terminator -array one-dimensional set, parameter -vector one-dimensional set, parameter -matrix two-dimensional set, parameter -command mode model mode -[>], [>>] output to file -redirection output to file -qualified name [.] suffix -member, dummy dummy index -membership test [in], [within] -subset test [in], [within] -such-that condition logical condition -set intersection [inter] -intersection, set [inter] -slope piecewise-linear expression -breakpoint piecewise-linear expression -command statement %also -statement command, declaration %also -declaration statement %also -traffic flow maximum flow model -flow network flow -roll trim problem cutting-stock problem -argument command-line argument -zero-one programming integer programming -reading files [model], [data] -file inclusion [model], [data] -[include] [model], [data] %also -transposition table transposition //GO.SYSIN DD see.terms echo install 1>&2 sed 's/.//' >install <<'//GO.SYSIN DD install' -chmod +x check.data cleanup deroman doclean final.sort format \ - gen.key hierarchy make.index num.collapse range.collapse \ - range.prep range.sort reroman rotate see.prep //GO.SYSIN DD install