# /u/sy/beebe/tex/bib/year-coverage.awk, Wed Dec 27 08:16:40 1995 # Edit by Nelson H. F. Beebe # ======================================================================== # Print a summary of the yearly entry counts in a BibTeX file, assuming # BibNet-style citation labels. # # Usage: # nawk -f year-coverage.awk bibfile # # [24-Dec-1997] -- Increase publication count field width from 3 to 4. # [02-Jan-1997] -- Add check for unknown entry type. # [30-Oct-1996] -- Change year patterns in second /^@/ action and year_min # computation to require a match to a 4-digit value # to avoid false matches that could generate very # long year ranges # [05-Oct-1996] -- Extend to handle bibliographies that do not use BibNet-style # citation labels # [03-Sep-1996] -- Improve pattern matching that controls computation of # the year extrema. # [15-Jun-1996] -- Use format for Total entries output. # [11-May-1996] -- Extend pattern in guard for year_min, year_max # computation # [05-Apr-1996] -- Modify output to avoid printing years beyond year_max, # and generalize printing of unknown years. # [08-Feb-1996] -- Add code to output entry counts by entry type. # [13-Jan-1996] -- First RCS checkin. # [27-Dec-1995] -- Original version. # ======================================================================== BEGIN {initialize()} /^@/ { saw_year = 0 } /^@/ && (NF == 3) && ($2 ~ /^[12][0-9][0-9x][0-9x]$/) { type = substr($0,1,index($0,"{")-1) ## print "DEBUG 1: NR = " NR " type = " type " year = " $2 sub(/[ \t]*$/,"",type) sub(/^[@ \t]*/,"",type) type = Typeof[tolower(type)] if (length(type) == 0) print "ERROR: Unknown entry type at line",FNR >"/dev/stderr" count_type[type]++ count_year[$2]++ saw_year = 1 total++ if ($2 ~ /^[12][0-9][0-9][0-9]$/) # then have a numeric year { year_min = ($2 > year_min) ? year_min : $2 year_max = ($2 < year_max) ? year_max : $2 } next } # See Nelson H. F. Beebe, ``Bibliography prettyprinting and syntax # checking'', TUGboat 14(3), 222-222, October (1993), and 14(4), # 395--419, December (1993) for the syntax of BibTeX names used here /^[ \t]*@[ \t]*[A-Za-z][A-Za-z0-9:.+\/'\-]*[ \t]*{[A-Za-z][A-Za-z0-9:.+\/'\-]*,[ \t]*$/ \ { type = substr($0,1,index($0,"{")-1) sub(/[ \t]*$/,"",type) sub(/^[@ \t]*/,"",type) type = Typeof[tolower(type)] ## print "DEBUG 2: NR = " NR " type = " type count_type[type]++ total++ next } /^ *year *= *"[12][0-9][0-9x][0-9x]([-/]+[12][0-9][0-9x][0-9x])*",? *$/ && (saw_year == 0) { # year = substr($0, 1, 4) split($0, parts, "\"") year = substr(parts[2], 1, 4) ## print "DEBUG 3: NR = " NR " year = " year gsub(/[^0-9x]/,"",year) if (year ~ /^[12][0-9][0-9][0-9]$/) # then have a numeric year { year_min = (year > year_min) ? year_min : year year_max = (year < year_max) ? year_max : year saw_year = 1 } count_year[year]++ next } /^ *year *= *"[12][0-9][0-9x][0-9x]\/[12][0-9][0-9x][0-9x]"/ && (saw_year == 0) { year = $0 sub(/\/[12][0-9][0-9x][0-9x]/, "", year) gsub(/[^0-9x]/,"",year) ## print "DEBUG 4: NR = " NR " year = " year if (year ~ /^[12][0-9][0-9][0-9]\//) # then have a numeric year { year_min = (year > year_min) ? year_min : year year_max = (year < year_max) ? year_max : year saw_year = 1 } count_year[year]++ next } END { PREFIX = "%%% " print PREFIX delta = int((year_max - year_min + 1 + 2)/3) for (year = year_min; year < (year_min + delta); year++) { # Skip lines that have all counts zero: in a few bibliographies, # there are many such cases. if ((count_year[year + 0*delta] > 0) || (count_year[year + 1*delta] > 0) || (count_year[year + 2*delta] > 0)) { # Shorten line to prevent exceeding year_max if ((year + 1*delta) > year_max) printf("%s%4d (%4d)\n", \ PREFIX, \ year + 0*delta, count_year[year + 0*delta]) else if ((year + 2*delta) > year_max) printf("%s%4d (%4d) %4d (%4d)\n", \ PREFIX, \ year + 0*delta, count_year[year + 0*delta], \ year + 1*delta, count_year[year + 1*delta]) else printf("%s%4d (%4d) %4d (%4d) %4d (%4d)\n", \ PREFIX, \ year + 0*delta, count_year[year + 0*delta], \ year + 1*delta, count_year[year + 1*delta], \ year + 2*delta, count_year[year + 2*delta] ) } } # Print any remaining unknown year counts for (century = 11; century < 21; ++century) { year = century "xx" if (year in count_year) printf("%s%4s (%4d)\n", PREFIX, year, count_year[year]) } print PREFIX for (type in count_type) printf("%s%-15s%4d\n", PREFIX, type ":", count_type[type]) | "sort" close ("sort") print PREFIX printf("%s%-15s%4d\n", PREFIX, "Total entries:", total) print PREFIX } function initialize() { FS = ":" year_min = 99999 year_max = 0 Typeof["article"] = "Article" Typeof["book"] = "Book" Typeof["booklet"] = "Booklet" Typeof["inbook"] = "InBook" Typeof["incollection"] = "InCollection" Typeof["inproceedings"] = "InProceedings" Typeof["manual"] = "Manual" Typeof["mastersthesis"] = "MastersThesis" Typeof["misc"] = "Misc" Typeof["periodical"] = "Periodical" Typeof["phdthesis"] = "PhdThesis" Typeof["proceedings"] = "Proceedings" Typeof["techreport"] = "TechReport" Typeof["unpublished"] = "Unpublished" }