%!PS-Adobe-1.0 %%DocumentFonts: %%Creator: eroff/eps %%CreationDate: Thu Nov 24 09:18:58 1988 %%Pages: (atend) %%EndComments save 0.1000 dup scale /inch { 720.0000 mul } bind def /stm usertime def /pgc statusdict begin pagecount end def clippath pathbbox pop pop exch pop 0 exch translate clippath pathbbox /pgtop exch def pop pop pop /ps { print flush } bind def /lineWidthScale 1.0 def /page { showpage restore save home } bind def /home { newpath 0 pgtop moveto } bind def /mf { statusdict /manualfeed true put } bind def /af { statusdict /manualfeed false put } bind def af 0 0 moveto 2 setlinecap /y { neg 0 exch rmoveto } bind def /X { currentpoint exch pop moveto } bind def /Y { pgtop exch sub currentpoint pop exch moveto } bind def /s { show } bind def /l { neg rlineto currentpoint stroke moveto } bind def /c { 2 div /rad exch def currentpoint /y0 exch def /x0 exch def newpath x0 rad add y0 rad 0 360 arc stroke x0 rad add rad add y0 moveto } bind def /a { /y2 exch neg def /x2 exch def /y1 exch neg def /x1 exch def x1 y1 rmoveto currentpoint currentpoint x2 x2 mul y2 y2 mul add sqrt y1 neg x1 neg atan y2 x2 atan newpath arc stroke moveto x2 y2 rmoveto } bind def /e { currentlinewidth /elw exch def gsave 2 div /yrad exch def 2 div /xrad exch def currentpoint /y0 exch def /x0 exch def x0 xrad add y0 translate xrad yrad scale newpath 0 0 1 0 360 arc elw xrad div setlinewidth stroke grestore elw setlinewidth x0 xrad add xrad add y0 moveto } bind def /spln { rcurveto currentpoint stroke moveto } bind def /ft { /fonttype exch def /xsiz exch def /ysiz exch def /sl exch def fonttype [ xsiz pt 0 sl sin sl cos div ysiz pt mul ysiz pt 0 0 ] makefont setfont lineWidthScale xsiz mul 5.0 div setlinewidth } bind def /doImage {{currentfile rasterString readhexstring pop} image} bind def /pt { 10 mul } bind def 0.375 0.000000 add inch 0.25 inch translate /savematrix matrix currentmatrix def /roman-8-dict 20 dict def % Local storage /roman-8-mappings [ 8#260 /Adieresis 8#265 /Aring 8#276 /Aacute 8#300 /Agrave 8#311 /Acircumflex 8#314 /Atilde 8#321 /Ccedilla 8#322 /Edieresis 8#323 /Eacute 8#324 /Egrave 8#325 /Ecircumflex 8#326 /Idieresis 8#327 /Iacute 8#330 /Igrave 8#331 /Icircumflex 8#332 /Ntilde 8#333 /Odieresis 8#334 /Oacute 8#335 /Ograve 8#336 /Ocircumflex 8#337 /Otilde 8#340 /Scaron 8#342 /Udieresis 8#344 /Uacute 8#345 /Ugrave 8#346 /Ydieresis 8#347 /adieresis 8#354 /aring 8#355 /aacute 8#356 /agrave 8#357 /acircumflex 8#360 /atilde 8#362 /ccedilla 8#363 /edieresis 8#364 /eacute 8#366 /egrave 8#367 /ecircumflex 8#374 /idieresis 8#375 /iacute 8#376 /igrave 8#220 /icircumflex 8#221 /ntilde 8#222 /odieresis 8#223 /oacute 8#224 /ograve 8#225 /ocircumflex 8#226 /otilde 8#227 /scaron 8#230 /udieresis 8#231 /uacute 8#232 /ugrave 8#233 /ucircumflex 8#234 /ydieresis 8#235 /Ucircumflex ] def /AddRoman-8 { roman-8-dict begin /roman-8-mappings exch def /newName exch def /oldName exch def /oldDict oldName findfont def /newDict oldDict maxlength dict def oldDict { exch dup /FID ne { dup /Encoding eq { exch dup length array copy newDict 3 1 roll put } { exch newDict 3 1 roll put } ifelse } { pop pop } ifelse } forall newDict /FontName newName put 0 2 roman-8-mappings length 1 sub { dup roman-8-mappings exch get exch 1 add roman-8-mappings exch get newDict /Encoding get 3 1 roll put } for newName newDict definefont pop end } bind def /BracketFontDict 9 dict def /$workingdict 10 dict def BracketFontDict begin /FontType 3 def /FontName (Bracket) cvn def /FontMatrix [ 0.001 0 0 0.001 0 0] def /FontBBox [ -50 -250 1000 1000 ] def /Encoding 256 array def 0 1 255 { Encoding exch /.notdef put } for Encoding dup 65 /Cbv put dup 66 /Clt put dup 67 /Clk put dup 68 /Clb put dup 69 /Crt put dup 70 /Crk put dup 71 /Crb put dup 72 /Clc put dup 73 /Clf put dup 74 /Crc put dup 75 /Crf put dup 76 /Cbr put dup 77 /Crn put dup 78 /Cci put dup 79 /C|| put dup 80 /C^^ put dup 81 /Cr1 put dup 82 /Cr2 put pop /CharProcs 24 dict dup begin /setC { 0 -50 -250 500 1000 setcachedevice} bind def /C.bv {220 -250 moveto 0 1000 rlineto 60 0 rlineto 0 -1000 rlineto fill } bind def /C.cbar { 750 moveto 180 0 rlineto 0 -60 rlineto -180 0 rlineto fill } bind def /C.fbar { -250 moveto 180 0 rlineto 0 60 rlineto -180 0 rlineto fill } bind def /C.brk.end { 1 setlinewidth moveto rlineto rcurveto reversepath 60 0 rlineto rlineto rcurveto fill } bind def /C.setl {dup dtransform exch round exch idtransform pop setlinewidth } bind def /Cbv { 500 setC C.bv } bind def /Clt { 500 setC 0 150 50 210 140 250 0 730 0 150 50 250 200 250 0 750 220 -250 C.brk.end } bind def /Clk { 500 setC 1 setlinewidth 220 -250 moveto 0 400 rlineto 0 50 -50 100 -100 100 rcurveto 50 0 100 50 100 100 rcurveto 0 400 rlineto 60 0 rlineto 0 -400 rlineto 0 -50 -50 -100 -100 -100 rcurveto 50 0 100 -50 100 -100 rcurveto 0 -400 rlineto closepath fill } bind def /Clb { 500 setC 0 -150 50 -210 140 -250 0 -730 0 -150 50 -250 200 -250 0 -750 220 750 C.brk.end } bind def /Crt { 500 setC 0 150 -50 250 -200 250 0 750 0 150 -50 210 -140 250 0 730 220 -250 C.brk.end } bind def /Crk { 500 setC 1 setlinewidth 220 -250 moveto 0 400 rlineto 0 50 50 100 100 100 rcurveto -50 0 -100 50 -100 100 rcurveto 0 400 rlineto 60 0 rlineto 0 -400 rlineto 0 -50 50 -100 100 -100 rcurveto -50 0 -100 -50 -100 -100 rcurveto 0 -400 rlineto fill } bind def /Crb { 500 setC 0 -150 -50 -250 -200 -250 0 -750 0 -150 -50 -210 -140 -250 0 -730 220 750 C.brk.end } bind def /Clc { 500 setC C.bv 280 C.cbar } bind def /Clf { 500 setC C.bv 280 C.fbar } bind def /Crc { 500 setC C.bv 40 C.cbar } bind def /Crf { 500 setC C.bv 40 C.fbar } bind def /Cbr { 0 0 -50 -250 0 1000 setcachedevice 40 C.setl 0 -250 moveto 0 1000 rlineto stroke } bind def /Crn { 500 setC 40 C.setl 0 770 moveto 500 0 rlineto stroke } bind def /Cci { 1000 0 -50 -250 1000 1000 setcachedevice 40 C.setl 500 250 400 0 360 arc stroke } bind def /C|| { 170 0 -50 -250 170 1000 setcachedevice } bind def /C^^ { 80 0 -50 -250 80 1000 setcachedevice } bind def /Cr1 { 800 0 -50 -250 800 1000 setcachedevice 40 C.setl 0 setlinejoin 700 180 moveto -650 currentlinewidth add 0 rlineto 200 -200 rlineto 50 360 moveto 650 currentlinewidth sub 0 rlineto -200 200 rlineto stroke } bind def /Cr2 { 800 0 -50 -250 800 1000 setcachedevice 40 C.setl 2 setlinejoin 217 18 moveto -150 150 rlineto 150 150 rlineto -150 -150 rlineto 633 0 rlineto 50 360 moveto 633 0 rlineto -150 150 rlineto 150 -150 rlineto -150 -150 rlineto stroke } bind def end def /BuildChar { $workingdict begin /charcode exch def /fontdict exch def fontdict /CharProcs get begin fontdict /Encoding get charcode get load gsave 0 setlinecap 0 setgray newpath exec grestore end end } bind def end /BracketFont BracketFontDict definefont pop /f.ZD /ZapfDingbats findfont def /f.S /Symbol findfont def /f.S2 /BracketFont findfont def /Times-Roman /Times-Roman-8 roman-8-mappings AddRoman-8 /f.R /Times-Roman-8 findfont def 0.0 10 10 f.R ft save home %%EndProlog 0 Y 720 X 840 Y 3060 X(.)s 720 X 2160 Y 2192 X(ARGONNE)s 2705 X(NATIONAL)s 3245 X(LABORATORY)s 720 X 2280 Y 2556 X(9700)s 2781 X(South)s 3040 X(Cass)s 3254 X(Avenue)s 720 X 2400 Y 2568 X(Argonne,)s 2967 X(Illinois)s 3301 X(60439)s 720 X 2952 Y 2091 X /Times-Bold /Times-Bold-8 roman-8-mappings AddRoman-8 /f.B /Times-Bold-8 findfont def 0.0 11 11 f.B ft(O.)s 2233 X(Brewer,)s 2634 X(J.)s 2745 X(Dongarra,)s 3258 X(and)s 3465 X(D.)s 3600 X(Sorensen)s 720 X 3432 Y 1743 X(Tools)s 2029 X(to)s 2148 X(Aid)s 2348 X(in)s 2469 X(the)s 2643 X(Analysis)s 3070 X(of)s 3189 X(Memory)s 3617 X(Access)s 3954 X(Patterns)s 720 X 3672 Y 2546 X(for)s 2713 X(Fortran)s 3112 X(Programs)s 720 X 4632 Y 2152 X 0.0 10 10 f.R ft(Mathematics)s 2693 X(and)s 2862 X(Computer)s 3287 X(Science)s 3622 X(Division)s 720 X 4992 Y 2425 X(Technical)s 2843 X(Memorandum)s 3434 X(No.)s 3606 X(??)s 720 X 5856 Y 2856 X(June)s 3064 X(1988)s 7920 Y page /Times-Roman /Times-Roman-8 roman-8-mappings AddRoman-8 /f.R /Times-Roman-8 findfont def 0.0 10 10 f.R ft /lineWidthScale 1.0000 def lineWidthScale xsiz mul 1.7 div setlinewidth 720 X 840 Y 1622 X /Times-Bold /Times-Bold-8 roman-8-mappings AddRoman-8 /f.B /Times-Bold-8 findfont def 0.0 12 12 f.B ft(Tools)s 1933 X(to)s 2063 X(Aid)s 2280 X(in)s 2411 X(the)s 2601 X(Analysis)s 3066 X(of)s 3196 X(Memory)s 3665 X(Access)s 4034 X(Patterns)s 720 X 1080 Y 2497 X(for)s 2680 X(Fortran)s 3116 X(Programs)s 720 X 1440 Y 2138 X /Times-Italic /Times-Italic-8 roman-8-mappings AddRoman-8 /f.I /Times-Italic-8 findfont def 0.0 11 11 f.I ft(O.)s 2273 X(Brewer,)s 2652 X(J.)s 2756 X(Dongarra,)s 3252 X(and)s 3445 X(D.)s 3580 X(Sorensen)s 720 X 1620 Y 2080 X 0.0 10 10 f.R ft(Mathematics)s 2621 X(and)s 2790 X(Computer)s 3215 X(Science)s 3550 X(Division)s 720 X 1740 Y 2395 X(Argonne)s 2769 X(National)s 3138 X(Laboratory)s 720 X 1860 Y 2392 X(Argonne,)s 2791 X(Illinois)s 3100 X(60439-4844)s 720 X 2420 Y 0.0 11 11 f.B ft(1.)s 859 X(Introduction)s 720 X 2640 Y 970 X 0.0 11 11 f.R ft(The)s 1176 X(development)s 1779 X(of)s 1906 X(e)s /CFi { (f) show xsiz pt 20 div neg 0 rmoveto (\256) s } bind def CFi(cient)s 2295 X(algorithms)s 2802 X(on)s 2948 X(todays)s 3271 X(high-performance)s 4090 X(computers)s 4583 X(can)s 4770 X(be)s 4909 X(a)s 4993 X(challeng-)s 720 X 2800 Y(ing)s 890 X(undertaking,)s 1471 X(with)s 1696 X(the)s 1859 X(e)s CFi(cient)s 2241 X(use)s 2416 X(of)s 2536 X(memory)s 2931 X(being)s 3204 X(a)s 3281 X(critical)s 3614 X(factor.)s 3953 X(Memory)s 4359 X(is)s 4461 X(organized)s 4920 X(in)s 5034 X(a)s 5110 X(hierar-)s 720 X 2960 Y(chy)s 925 X(according)s 1403 X(to)s 1536 X(access)s 1861 X(time.)s 2132 X(High-performance)s 2986 X(computers)s 3490 X(rely)s 3707 X(on)s 3864 X(e)s /Cff { (f) show xsiz pt 20 div neg 0 rmoveto (f) s } bind def Cff(ective)s 4287 X(management)s 4895 X(of)s 5034 X(memory)s 720 X 3120 Y(hierarchy)s 1163 X(when)s 1431 X(carrying)s 1825 X(out)s 1996 X(\257oating-point)s 2626 X(computations.)s 3305 X(This)s 3531 X(hierarchy)s 3973 X(takes)s 4228 X(the)s 4392 X(form)s 4635 X(of)s 4756 X(main)s 5006 X(memory,)s 720 X 3280 Y(cache,)s 1026 X(local)s 1270 X(memory,)s 1695 X(and)s 1884 X(vector)s 2188 X(registers.)s 2646 X(The)s 2847 X(basic)s 3103 X(objective)s 3536 X(of)s 3658 X(this)s 3849 X(organization)s 4428 X(is)s 4534 X(to)s 4652 X(attempt)s 5014 X(to)s 5132 X(match)s 720 X 3440 Y(the)s 891 X(imbalance)s 1378 X(between)s 1779 X(the)s 1949 X(fast)s 2143 X(processing)s 2648 X(speed)s 2933 X(of)s 3060 X(the)s 3230 X(\257oating-point)s 3866 X(units)s 4117 X(and)s 4311 X(the)s 4481 X(slow)s 4725 X(latency)s 5077 X(time)s 5309 X(of)s 720 X 3600 Y(main)s 984 X(memory.)s 1450 X(Successful)s 1963 X(algorithms)s 2478 X(must)s 2737 X(e)s Cff(ectively)s 3243 X(utilize)s 3562 X(the)s 3741 X(memory)s 4152 X(hierarchy)s 4609 X(of)s 4745 X(the)s 4924 X(underlying)s 720 X 3760 Y(computer)s 1162 X(architecture)s 1705 X(on)s 1843 X(which)s 2139 X(they)s 2356 X(are)s 2516 X(implemented.)s 720 X 3980 Y 970 X(Cache)s 1287 X(memory,)s 1725 X(local)s 1982 X(memory,)s 2421 X(and)s 2624 X(vector)s 2942 X(registers)s 3358 X(are)s 3535 X(really)s 3829 X(high-speed)s 4355 X(bu)s Cff(ers)s 4704 X(Cache)s 5022 X(Memory)s 720 X 4140 Y(Design:)s 1094 X(An)s 1260 X(Evolving)s 1695 X(Art)s 1872 X(Cache)s 2176 X(memory)s 2573 X(is)s 2678 X(usually)s 3027 X(controlled)s 3503 X(by)s 3644 X(hardware,)s 4108 X(while)s 4383 X(local)s 4627 X(memory)s 5024 X(and)s 5213 X(vec-)s 720 X 4300 Y(tor)s 876 X(registers)s 1281 X(are)s 1447 X(controlled)s 1926 X(by)s 2070 X(software.)s 2536 X(The)s 2741 X(purpose)s 3123 X(of)s 3249 X(this)s 3444 X(hierarchy)s 3891 X(is)s 4000 X(to)s 4121 X(capture)s 4477 X(those)s 4744 X(portions)s 5140 X(of)s 5266 X(the)s 720 X 4460 Y(main)s 972 X(memory)s 1370 X(that)s 1567 X(are)s 1731 X(currently)s 2158 X(in)s 2276 X(use,)s 2482 X(and)s 2672 X(to)s 2790 X(reduce)s 3112 X(the)s 3277 X(time)s 3504 X(for)s 3662 X(subsequent)s 4181 X(accesses.)s 4637 X(Since)s 4912 X(these)s 5168 X(high-)s 720 X 4620 Y(speed)s 1000 X(bu)s Cff(ers)s 1335 X(are)s 1498 X(often)s 1754 X(5)s 1840 X(to)s 1957 X(10)s 2099 X(times)s 2370 X(faster)s 2644 X(than)s 2865 X(main)s 3117 X(memory,)s 3543 X(they)s 3764 X(can)s 3947 X(substantially)s 4536 X(reduce)s 4858 X(the)s 5024 X(e)s Cff(ective)s 720 X 4780 Y(memory)s 1115 X(access)s 1422 X(time)s 1646 X(if)s 1741 X(they)s 1958 X(can)s 2137 X(be)s 2268 X(used.)s 2525 X(The)s 2723 X(success)s 3079 X(of)s 3198 X(hierarchy)s 3638 X(is)s 3740 X(then)s 3957 X(attributed)s 4406 X(to)s 4520 X(locality)s 4878 X(of)s 4997 X(reference)s 720 X 4940 Y(and)s 906 X(reuse)s 1164 X(of)s 1283 X(data)s 1493 X(in)s 1607 X(a)s 1683 X(users)s 1936 X(program.)s 720 X 5160 Y 970 X(Thus,)s 1257 X(in)s 1382 X(order)s 1651 X(to)s 1776 X(improve)s 2181 X(the)s 2355 X(performance)s 2946 X(of)s 3077 X(algorithms)s 3588 X(implemented)s 4202 X(on)s 4352 X(high-performance)s 5175 X(com-)s 720 X 5320 Y(puters,)s 1047 X(we)s 1205 X(must)s 1451 X(consider)s 1853 X(not)s 2025 X(only)s 2252 X(the)s 2417 X(total)s 2644 X(number)s 3010 X(of)s 3132 X(memory)s 3529 X(references,)s 4034 X(but)s 4206 X(also)s 4414 X(the)s 4579 X(pattern)s 4913 X(of)s 5034 X(memory)s 720 X 5480 Y(references)s 1219 X(Linear)s 1557 X(Algebra)s 1962 X(on)s 2125 X(High-Performance)s 2992 X(Computers)s 3528 X(dongarra)s 3969 X(Sorensen)s 4424 X(germany)s 4861 X(The)s 5085 X(Use)s 5309 X(of)s 720 X 5640 Y(BLAS3)s 1094 X(in)s 1217 X(Linear)s 1538 X(Algebra)s 1926 X(on)s 2072 X(a)s 2156 X(Parallel)s 2527 X(Processor)s 2989 X(with)s 3221 X(a)s 3305 X(Heirarchical)s 3880 X(Memory)s 4294 X(We)s 4481 X(would)s 4792 X(like)s 4993 X(our)s 5175 X(algo-)s 720 X 5800 Y(rithms)s 1040 X(to)s 1164 X(observe)s 1542 X(the)s 1714 X(principle)s 2142 X(of)s 2271 X(locality)s 2639 X(of)s 2768 X(reference,)s 3237 X(so)s 3374 X(that)s 3578 X(the)s 3751 X(data)s 3972 X(can)s 4162 X(be)s 4304 X(e)s Cff(ectively)s 4805 X(utilized.)s 5230 X(Our)s 720 X 5960 Y(new)s 930 X(tool)s 1130 X(provides)s 1536 X(an)s 1667 X(aid)s 1829 X(in)s 1943 X(understanding)s 2593 X(a)s 2669 X(program's)s 3147 X(locality)s 3505 X(of)s 3624 X(reference.)s 720 X 6180 Y 970 X(We)s 1160 X(have)s 1405 X(designed)s 1834 X(and)s 2031 X(built)s 2273 X(two)s 2477 X(tools)s 2731 X(that)s 2935 X(will)s 3146 X(help)s 3375 X(in)s 3501 X(understanding)s 4163 X(how)s 4392 X(a)s 4480 X(speci\256c)s 4855 X(Fortran)s 5218 X(pro-)s 720 X 6340 Y(gram)s 994 X(references)s 1489 X(memory.)s 1960 X(The)s 2179 X(\256rst)s 2400 X(tool,)s 2649 X(called)s 2959 X(the)s 3142 X(Memory)s 3569 X(Access)s 3926 X(Pattern)s 4285 X(Instrumentation)s 5029 X(program)s 720 X 6500 Y(\(MAPI\),)s 1130 X(instruments)s 1679 X(a)s 1763 X(user's)s 2060 X(program)s 2467 X(and,)s 2689 X(when)s 2962 X(the)s 3132 X(instrumented)s 3742 X(program)s 4149 X(is)s 4259 X(run,)s 4469 X(produces)s 4900 X(a)s 4984 X(trace)s 5231 X(\256le.)s 720 X 6660 Y(The)s 922 X(trace)s 1164 X(\256le)s 1336 X(is)s 1441 X(a)s 1520 X(detailed)s 1898 X(ASCII)s 2216 X(\256le)s 2388 X(giving)s 2701 X(the)s 2866 X(individual)s 3344 X(memory)s 3741 X(references)s 4218 X(that)s 4414 X(were)s 4656 X(made)s 4924 X(to)s 5041 X(the)s 5206 X(one-)s 720 X 6760 Y 0.0 8 8 f.R ft /Cru { 0 ysiz pt 5 div 2 copy rmoveto (_) show neg rmoveto } bind def Cru Cru Cru Cru Cru Cru Cru Cru Cru Cru Cru Cru Cru Cru Cru Cru Cru Cru 720 X 6846 Y 0.0 7 7 f.I ft(\262)s 6870 Y 770 X 0.0 9 9 f.R ft(Work)s 1022 X(supported)s 1424 X(in)s 1541 X(part)s 1729 X(by)s 1867 X(the)s 2025 X(Applied)s 2363 X(Mathematical)s 2906 X(Sciences)s 3269 X(subprogram)s 3747 X(of)s 3870 X(the)s 4028 X(O)s CFi(ce)s 4297 X(of)s 4420 X(Energy)s 720 X 6980 Y(Research,)s 1096 X(U.)s 1207 X(S.)s 1303 X(Department)s 1751 X(of)s 1849 X(Energy,)s 2155 X(under)s 2383 X(Contract)s 2716 X(W-31-109-Eng-38.)s 720 X 7200 Y(Typeset)s 1028 X(on)s 1141 X(November)s 1544 X(24,)s 1680 X(1988.)s 7920 Y page /Times-Roman /Times-Roman-8 roman-8-mappings AddRoman-8 /f.R /Times-Roman-8 findfont def 0.0 9 9 f.R ft /lineWidthScale 1.0000 def lineWidthScale xsiz mul 1.7 div setlinewidth 720 X 880 Y 0.0 11 11 f.R ft(and)s 913 X(two-dimensional)s 1687 X(arrays)s 1988 X(in)s 2109 X(the)s 2278 X(program.)s 2712 X(The)s 2917 X(second)s 3256 X(tool,)s 3492 X(called)s 3789 X(the)s 3959 X(Memory)s 4373 X(Access)s 4718 X(Pattern)s 5065 X(Anima-)s 720 X 1040 Y(tion)s 927 X(\(MAPA\))s 1352 X(program,)s 1786 X(allows)s 2108 X(the)s 2277 X(trace)s 2523 X(\256le)s 2699 X(to)s 2820 X(be)s 2958 X(viewed.)s 3337 X(This)s 3568 X(program)s 3974 X(runs)s 4197 X(on)s 4341 X(a)s 4423 X(Sun)s 4629 X(workstation)s 5182 X(\(run-)s 720 X 1200 Y(ning)s 944 X(UNIX)s 1245 X(and)s 1431 X(SunView\))s 1904 X(Sun)s 2104 X(Microsystems)s 720 X 1420 Y 970 X(In)s 1102 X(this)s 1303 X(paper)s 1586 X(Section)s 1957 X(2)s 2053 X(examines)s 2508 X(the)s 2683 X(motivation)s 3202 X(for)s 3370 X(e)s /CFi { (f) show xsiz pt 20 div neg 0 rmoveto (\256) s } bind def CFi(cient)s 3764 X(use)s 3951 X(of)s 4083 X(memory)s 4491 X(hierarchy,)s 4973 X(Section)s 5345 X(3)s 720 X 1580 Y(discusses)s 1174 X(the)s 1353 X(goals)s 1630 X(for)s 1802 X(the)s 1981 X(tools,)s 2269 X(Section)s 2644 X(4)s 2744 X(presents)s 3148 X(a)s 3241 X(detailed)s 3633 X(description)s 4166 X(of)s 4302 X(the)s 4481 X(tools,)s 4769 X(and)s 4971 X(Section)s 5345 X(5)s 720 X 1740 Y(shows)s 1030 X(how)s 1254 X(to)s 1375 X(instrument)s 1881 X(a)s 1964 X(program)s 2370 X(using)s 2644 X(MAPI)s 2954 X(and)s 3147 X(how)s 3371 X(it)s 3469 X(is)s 3579 X(tied)s 3780 X(into)s 3988 X(the)s 4158 X(BLAS.)s 4504 X(Sections)s 4913 X(6,)s 5032 X(7,)s 5151 X(and)s 5345 X(8)s 720 X 1900 Y(discuss)s 1080 X(the)s 1256 X(user)s 1480 X(interface)s 1903 X(to)s 2031 X(the)s 2207 X(animation)s 2689 X(part)s 2901 X(of)s 3034 X(the)s 3210 X(tools)s 3467 X(and)s 3667 X(give)s 3897 X(an)s 4041 X(example)s 4453 X(of)s 4585 X(its)s 4731 X(use.)s 4974 X(Section)s 5345 X(9)s 720 X 2060 Y(states)s 992 X(the)s 1154 X(availability)s 1677 X(of)s 1796 X(the)s 1958 X(tools)s 2201 X(over)s 2423 X /Times-Italic /Times-Italic-8 roman-8-mappings AddRoman-8 /f.I /Times-Italic-8 findfont def 0.0 11 11 f.I ft(netlib)s 2702 X 0.0 11 11 f.R ft(and)s 2888 X(Section)s 3246 X(10)s 3384 X(summarizes)s 3936 X(our)s 4110 X(e)s /Cff { (f) show xsiz pt 20 div neg 0 rmoveto (f) s } bind def Cff(orts.)s 720 X 2380 Y /Times-Bold /Times-Bold-8 roman-8-mappings AddRoman-8 /f.B /Times-Bold-8 findfont def 0.0 11 11 f.B ft(2.)s 859 X(Motivation)s 720 X 2600 Y 970 X 0.0 11 11 f.R ft(The)s 1172 X(goal)s 1393 X(of)s 1516 X(this)s 1708 X(work)s 1965 X(is)s 2071 X(to)s 2189 X(assist)s 2460 X(in)s 2578 X(formulating)s 3129 X(correct)s 3463 X(algorithms)s 3967 X(for)s 4127 X(high-performance)s 4943 X(computers)s 720 X 2760 Y(and)s 910 X(to)s 1028 X(aid)s 1194 X(as)s 1317 X(much)s 1593 X(as)s 1716 X(possible)s 2109 X(the)s 2275 X(process)s 2635 X(of)s 2758 X(translating)s 3254 X(an)s 3388 X(algorithm)s 3847 X(into)s 4050 X(an)s 4184 X(e)s CFi(cient)s 4568 X(implementation)s 5290 X(on)s 720 X 2920 Y(a)s 816 X(speci\256c)s 1199 X(machine.)s 1674 X(Over)s 1940 X(the)s 2122 X(past)s 2347 X(\256ve)s 2560 X(years)s 2839 X(we)s 3015 X(have)s 3270 X(developed)s 3769 X(approaches)s 4309 X(in)s 4444 X(the)s 4627 X(design)s 4963 X(of)s 5103 X(certain)s 720 X 3080 Y(numerical)s 1190 X(algorithms)s 1693 X(that)s 1890 X(allow)s 2166 X(both)s 2394 X(e)s CFi(cient)s 2779 X(and)s 2969 X(portability)s 3460 X(Linear)s 3777 X(Algebra)s 4161 X(on)s 4303 X(High-Performance)s 5149 X(Com-)s 720 X 3240 Y(puters)s 1018 X(dongarra)s 1436 X(Sorensen)s 1868 X(germany)s 2281 X(Our)s 2481 X(current)s 2820 X(e)s Cff(orts)s 3130 X(emphasize)s 3623 X(three)s 3872 X(areas:)s 4185 X(environments)s 4814 X(for)s 4972 X(algorithm)s 720 X 3400 Y(development,)s 1343 X(parallel)s 1699 X(programming)s 2325 X(methodologies,)s 3029 X(and)s 3215 X(advanced)s 3655 X(algorithm)s 4111 X(development.)s 720 X 3620 Y 970 X(For)s 1164 X(most)s 1420 X(computational)s 2090 X(problems,)s 2568 X(the)s 2743 X(design)s 3071 X(and)s 3270 X(implementation)s 4002 X(of)s 4134 X(an)s 4279 X(e)s CFi(cient)s 4674 X(parallel)s 5044 X(solution)s 720 X 3780 Y(are)s 889 X(formidable)s 1407 X(challenges.)s 1962 X(Since)s 2243 X(parallel)s 2608 X(computation)s 3195 X(is)s 3306 X(still)s 3510 X(in)s 3633 X(its)s 3775 X(infancy,)s 4168 X(we)s 4332 X(often)s 4594 X(do)s 4741 X(not)s 4919 X(understand)s 720 X 3940 Y(what)s 978 X(algorithms)s 1495 X(to)s 1627 X(use,)s 1847 X(much)s 2137 X(less)s 2348 X(how)s 2583 X(to)s 2715 X(implement)s 3232 X(them)s 3498 X(e)s CFi(ciently)s 3983 X(on)s 4139 X(speci\256c)s 4520 X(architectures.)s 5180 X(With)s 720 X 4100 Y(existing)s 1106 X(technology,)s 1659 X(the)s 1830 X(construction)s 2410 X(of)s 2538 X(a)s 2623 X(parallel)s 2988 X(program)s 3396 X(is)s 3507 X(a)s 3592 X(laborious,)s 4066 X(largely)s 4407 X(manual)s 4767 X(enterprise)s 5235 X(that)s 720 X 4260 Y(forces)s 1028 X(the)s 1204 X(programmer)s 1787 X(to)s 1915 X(assume)s 2280 X(responsibility)s 2922 X(for)s 3091 X(determining)s 3664 X(a)s 3754 X(suitable)s 4138 X(mathematical)s 4771 X(algorithm)s 5242 X(and)s 720 X 4420 Y(translating)s 1212 X(it)s 1302 X(into)s 1502 X(an)s 1633 X(intricately)s 2106 X(coordinated)s 2651 X(set)s 2801 X(of)s 2920 X(instructions)s 3462 X(tuned)s 3734 X(to)s 3848 X(a)s 3924 X(particular)s 4371 X(parallel)s 4727 X(machine.)s 720 X 4640 Y 970 X(E)s CFi(cient)s 1394 X(parallel)s 1774 X(programs)s 2240 X(are)s 2424 X(much)s 2721 X(more)s 2999 X(di)s CFi(cult)s 3395 X(to)s 3534 X(write)s 3812 X(than)s 4054 X(e)s CFi(cient)s 4460 X(sequential)s 4958 X(programs,)s 720 X 4800 Y(because)s 1097 X(the)s 1263 X(behavior)s 1678 X(of)s 1801 X(parallel)s 2161 X(programs)s 2607 X(is)s 2713 X(nondeterministic.)s 3539 X(They)s 3795 X(are)s 3958 X(also)s 4166 X(much)s 4441 X(less)s 4637 X(portable,)s 5055 X(because)s 720 X 4960 Y(the)s 890 X(structure)s 1309 X(critically)s 1735 X(depends)s 2130 X(on)s 2276 X(speci\256c)s 2647 X(architectural)s 3229 X(features)s 3610 X(of)s 3738 X(the)s 3909 X(underlying)s 4422 X(hardware)s 4864 X(\(such)s 5138 X(as)s 5266 X(the)s 720 X 5120 Y(structure)s 1142 X(of)s 1272 X(the)s 1445 X(memory)s 1850 X(hierarchy\).)s 2393 X(To)s 2554 X(use)s 2738 X(parallel)s 3104 X(machines)s 3556 X(e)s CFi(ciently)s 4033 X(in)s 4157 X(scienti\256c)s 4592 X(research,)s 5020 X(we)s 5185 X(must)s 720 X 5280 Y(develop)s 1117 X(high-level)s 1612 X(languages)s 2101 X(and)s 2310 X(environments)s 2959 X(for)s 3137 X(producing)s 3633 X(e)s CFi(cient)s 4037 X(parallel)s 4416 X(solutions)s 4866 X(to)s 5003 X(scienti\256c)s 720 X 5440 Y(problems.)s 720 X 5660 Y 970 X(The)s 1180 X(key)s 1378 X(to)s 1504 X(using)s 1783 X(a)s 1871 X(high-performance)s 2694 X(computer)s 3148 X(e)s Cff(ectively)s 3651 X(is)s 3766 X(to)s 3893 X(avoid)s 4178 X(unnecessary)s 4753 X(memory)s 5160 X(refer-)s 720 X 5820 Y(ences.)s 1053 X(In)s 1179 X(most)s 1429 X(computers,)s 1949 X(data)s 2166 X(\257ows)s 2440 X(from)s 2688 X(memory)s 3089 X(into)s 3295 X(and)s 3487 X(out)s 3662 X(of)s 3787 X(registers)s 4192 X(and)s 4384 X(from)s 4631 X(registers)s 5036 X(into)s 5242 X(and)s 720 X 5980 Y(out)s 894 X(of)s 1018 X(functional)s 1496 X(units,)s 1772 X(which)s 2073 X(perform)s 2458 X(the)s 2625 X(given)s 2902 X(instructions)s 3450 X(on)s 3594 X(the)s 3762 X(data.)s 4034 X(Algorithm)s 4527 X(performance)s 5112 X(can)s 5297 X(be)s 720 X 6140 Y(dominated)s 1239 X(by)s 1404 X(the)s 1593 X(amount)s 1978 X(of)s 2123 X(memory)s 2543 X(tra)s CFi(c)s 2852 X(rather)s 3160 X(than)s 3403 X(by)s 3567 X(the)s 3755 X(number)s 4144 X(of)s 4289 X(\257oating-point)s 4943 X(operations)s 720 X 6300 Y(involved.)s 1192 X(The)s 1393 X(movement)s 1888 X(of)s 2010 X(data)s 2223 X(between)s 2618 X(memory)s 3015 X(and)s 3204 X(registers)s 3607 X(can)s 3790 X(be)s 3925 X(as)s 4048 X(costly)s 4343 X(as)s 4466 X(arithmetic)s 4943 X(operations)s 720 X 6460 Y(on)s 858 X(the)s 1020 X(data.)s 720 X 6680 Y 970 X(This)s 1196 X(situation)s 1606 X(provides)s 2014 X(considerable)s 2597 X(motivation)s 3105 X(to)s 3221 X(restructure)s 3719 X(existing)s 4099 X(algorithms)s 4601 X(and)s 4790 X(to)s 4907 X(devise)s 5218 X(new)s 720 X 6840 Y(algorithms)s 1228 X(that)s 1430 X(minimize)s 1883 X(data)s 2102 X(movement.)s 2658 X(A)s 2773 X(number)s 3144 X(of)s 3271 X(researchers)s 3796 X(have)s 4038 X(demonstrated)s 4665 X(the)s 4835 X(e)s Cff(ectiveness)s 720 X 7000 Y(of)s 856 X(block)s 1145 X(algorithms)s 1661 X(on)s 1816 X(a)s 1909 X(variety)s 2258 X(of)s 2394 X(modern)s 2774 X(computer)s 3233 X(architectures)s 3836 X(with)s 4077 X(vector-processing)s 4900 X(or)s 5036 X(parallel-)s 720 X 7160 Y(processing)s 1219 X(capabilities)s 1749 X(Demmel)s 2157 X(Prospectus)s 2662 X(on)s 2801 X(which)s 3098 X(potentially)s 3598 X(high)s 3823 X(performance)s 4403 X(can)s 4583 X(easily)s 4868 X(be)s 5000 X(degraded)s 720 X 7320 Y(by)s 864 X(excessive)s 1317 X(transfer)s 1684 X(of)s 1809 X(data)s 2053 X(between)s 2451 X(di)s Cff(erent)s 2856 X(levels)s 3146 X(of)s 3271 X(memory)s 3671 X(\(vector)s 4014 X(registers,)s 4448 X(cache,)s 4758 X(local)s 5006 X(memory,)s 7920 Y page /Times-Roman /Times-Roman-8 roman-8-mappings AddRoman-8 /f.R /Times-Roman-8 findfont def 0.0 11 11 f.R ft /lineWidthScale 1.0000 def lineWidthScale xsiz mul 1.7 div setlinewidth 720 X 520 Y 2968 X(- 2 -)s 720 X 880 Y(main)s 968 X(memory,)s 1390 X(or)s 1509 X(solid-state)s 1989 X(disks\).)s 720 X 1100 Y 970 X(In)s 1098 X(particular,)s 1582 X(for)s 1746 X(computers)s 2240 X(with)s 2474 X(memory)s 2878 X(hierarchy)s 3328 X(or)s 3457 X(for)s 3622 X(true)s 3830 X(parallel)s 4196 X(processing)s 4703 X(computers,)s 5226 X(it)s 5326 X(is)s 720 X 1260 Y(often)s 977 X(preferable)s 1450 X(to)s 1568 X(partition)s 1973 X(the)s 2138 X(matrix)s 2456 X(or)s 2578 X(matrices)s 2980 X(into)s 3183 X(blocks)s 3501 X(and)s 3690 X(to)s 3807 X(perform)s 4190 X(the)s 4355 X(computation)s 4936 X(by)s 5077 X(matrix-)s 720 X 1420 Y(matrix)s 1043 X(operations)s 1536 X(on)s 1682 X(the)s 1852 X(blocks.)s 2203 X(This)s 2435 X(approach)s 2871 X(provides)s 3285 X(for)s 3448 X(full)s 3637 X(reuse)s 3903 X(of)s 4030 X(data)s 4248 X(while)s 4528 X(the)s 4698 X(block)s 4978 X(is)s 5088 X(held)s 5314 X(in)s 720 X 1580 Y(cache)s 1004 X(or)s 1132 X(local)s 1381 X(memory.)s 1811 X(It)s 1914 X(avoids)s 2237 X(excessive)s 2692 X(movement)s 3192 X(of)s 3319 X(data)s 3537 X(to)s 3659 X(and)s 3853 X(from)s 4102 X(memory)s 4504 X(and)s 4698 X(gives)s 4966 X(a)s 5050 X(surface-)s 720 X 1740 Y(to-volume)s 1202 X(e)s /Cff { (f) show xsiz pt 20 div neg 0 rmoveto (f) s } bind def Cff(ect)s 1474 X(for)s 1631 X(the)s 1795 X(ratio)s 2026 X(of)s 2148 X(arithmetic)s 2624 X(operations)s 3112 X(to)s 3229 X(data)s 3442 X(movement,)s 3965 X(i.e.,)s 4159 X /Times-Italic /Times-Italic-8 roman-8-mappings AddRoman-8 /f.I /Times-Italic-8 findfont def 0.0 11 11 f.I ft(O)s 4256 X 0.0 11 11 f.R ft(\()s 0.0 11 11 f.I ft(n)s 1713 Y 4360 X 0.0 8 8 f.R ft(3)s 1740 Y 0.0 11 11 f.R ft(\))s 4467 X(arithmetic)s 4943 X(operations)s 720 X 1900 Y(to)s 847 X 0.0 11 11 f.I ft(O)s 944 X 0.0 11 11 f.R ft(\()s 0.0 11 11 f.I ft(n)s 1873 Y 1048 X 0.0 8 8 f.R ft(2)s 1900 Y 0.0 11 11 f.R ft(\))s 1165 X(data)s 1388 X(movement.)s 1949 X(In)s 2081 X(addition,)s 2511 X(on)s 2662 X(architectures)s 3260 X(that)s 3465 X(provide)s 3840 X(for)s 4007 X(parallel)s 4375 X(processing,)s 4912 X(parallelism)s 720 X 2060 Y(can)s 912 X(be)s 1056 X(exploited)s 1506 X(in)s 1633 X(two)s 1839 X(ways:)s 2164 X(\(1\))s 2332 X(operations)s 2830 X(on)s 2981 X(distinct)s 3347 X(blocks)s 3675 X(may)s 3905 X(be)s 4049 X(performed)s 4545 X(in)s 4672 X(parallel;)s 5073 X(and)s 5273 X(\(2\))s 720 X 2220 Y(within)s 1030 X(the)s 1192 X(operations)s 1677 X(on)s 1815 X(each)s 2042 X(block,)s 2342 X(scalar)s 2624 X(or)s 2743 X(vector)s 3044 X(operations)s 3529 X(may)s 3746 X(be)s 3877 X(performed)s 4360 X(in)s 4474 X(parallel.)s 720 X 2440 Y 970 X(The)s 1174 X(performance)s 1760 X(of)s 1886 X(these)s 2146 X(block)s 2425 X(algorithms)s 2931 X(depends)s 3325 X(on)s 3470 X(the)s 3639 X(dimensions)s 4176 X(chosen)s 4515 X(for)s 4677 X(the)s 4846 X(blocks.)s 5224 X(It)s 5326 X(is)s 720 X 2600 Y(important)s 1183 X(to)s 1304 X(select)s 1588 X(the)s 1757 X(blocking)s 2177 X(strategy)s 2559 X(for)s 2721 X(each)s 2955 X(of)s 3081 X(our)s 3262 X(target)s 3546 X(machines,)s 4022 X(and)s 4214 X(then)s 4437 X(develop)s 4818 X(a)s 4900 X(mechanism)s 720 X 2760 Y(whereby)s 1124 X(the)s 1286 X(routines)s 1668 X(can)s 1847 X(determine)s 2313 X(good)s 2561 X(block)s 2833 X(dimensions)s 3363 X(automatically.)s 720 X 2980 Y 970 X(Since)s 1245 X(most)s 1491 X(memory)s 1888 X(accesses)s 2289 X(for)s 2448 X(data)s 2662 X(in)s 2780 X(scienti\256c)s 3209 X(programs)s 3655 X(are)s 3819 X(for)s 3978 X(matrix)s 4297 X(elements,)s 4747 X(which)s 5047 X(are)s 5211 X(usu-)s 720 X 3140 Y(ally)s 926 X(stored)s 1235 X(in)s 1362 X(two-dimensional)s 2142 X(arrays)s 2449 X(\(column-major)s 3148 X(in)s 3275 X(FORTRAN\),)s 3894 X(knowing)s 4320 X(the)s 4495 X(order)s 4766 X(of)s 4897 X(array)s 5160 X(refer-)s 720 X 3300 Y(ences)s 1013 X(is)s 1138 X(important)s 1617 X(in)s 1754 X(determining)s 2336 X(the)s 2521 X(amount)s 2902 X(of)s 3044 X(memory)s 3461 X(tra)s /CFi { (f) show xsiz pt 20 div neg 0 rmoveto (\256) s } bind def CFi(c.)s 3823 X(To)s 3996 X(get)s 4181 X(an)s 4335 X(idea)s 4568 X(of)s 4710 X(how)s 4950 X(arrays)s 5268 X(are)s 720 X 3460 Y(accessed)s 1150 X(for)s 1326 X(a)s 1423 X(particular)s 1891 X(implementation)s 2631 X(of)s 2771 X(an)s 2923 X(algorithm)s 3400 X(and)s 3607 X(for)s 3783 X(a)s 3880 X(particular)s 4347 X(data)s 4577 X(set,)s 4775 X(we)s 4950 X(could)s 5242 X(add)s 720 X 3620 Y(instructions)s 1275 X(to)s 1402 X(our)s 1589 X(code)s 1836 X(to)s 1963 X(output)s 2286 X(the)s 2461 X(name)s 2739 X(of)s 2871 X(the)s 3046 X(array)s 3310 X(and)s 3509 X(the)s 3684 X(indices,)s 4064 X(whenever)s 4529 X(an)s 4673 X(array)s 4937 X(element)s 5326 X(is)s 720 X 3780 Y(accessed.)s 1205 X(However,)s 1681 X(the)s 1863 X(coding)s 2210 X(would)s 2533 X(be)s 2684 X(tedious)s 3050 X(and)s 3256 X(error)s 3515 X(prone,)s 3840 X(and)s 4046 X(looking)s 4431 X(at)s 4557 X(page)s 4810 X(after)s 5056 X(page)s 5309 X(of)s 720 X 3940 Y(indices)s 1061 X(is)s 1165 X(a)s 1243 X(di)s CFi(cult)s 1616 X(way)s 1828 X(of)s 1950 X(visualizing)s 2464 X(the)s 2629 X(memory)s 3026 X(access)s 3335 X(patterns.)s 3769 X(Notice)s 4092 X(the)s 4257 X(use)s 4434 X(of)s 4556 X(the)s 4721 X(word)s 4977 X("visualiz-)s 720 X 4100 Y(ing.")s 998 X(We)s 1185 X(would)s 1495 X(like)s 1695 X(to)s 1816 X(take)s 2033 X(an)s 2171 X(arbitrary)s 2582 X(linear)s 2866 X(algebra)s 3222 X(program,)s 3656 X(have)s 3897 X(its)s 4037 X(matrices)s 4443 X(mapped)s 4825 X(to)s 4946 X(a)s 5029 X(graphics)s 720 X 4260 Y(screen,)s 1072 X(and)s 1276 X(have)s 1528 X(a)s 1623 X(matrix)s 1957 X(element)s 2351 X(\257ash)s 2606 X(on)s 2763 X(the)s 2944 X(screen)s 3269 X(whenever)s 3740 X(its)s 3892 X(corresponding)s 4566 X(array)s 4836 X(element)s 5230 X(was)s 720 X 4420 Y(accessed)s 1129 X(in)s 1243 X(memory.)s 1693 X(This)s 1917 X(type)s 2134 X(would)s 2437 X(of)s 2556 X(tool)s 2756 X(would)s 3059 X(be)s 3190 X(bene\256cial)s 3644 X(in)s 3758 X(many)s 4030 X(ways:)s 720 X 4640 Y(1.)s 970 X(It)s 1075 X(would)s 1388 X(help)s 1615 X(show)s 1885 X(that)s 2088 X(the)s 2260 X(implementation)s 2989 X(of)s 3118 X(the)s 3290 X(algorithm)s 3756 X(is)s 3868 X(correct,)s 4236 X(or)s 4366 X(at)s 4484 X(least)s 4724 X(doing)s 5014 X(what)s 5266 X(the)s 720 X 4800 Y 970 X(developer)s 1429 X(thinks)s 1727 X(the)s 1889 X(algorithm)s 2345 X(should)s 2667 X(be)s 2798 X(doing.)s 720 X 5020 Y(2.)s 970 X(It)s 1065 X(would)s 1368 X(provide)s 1731 X(insight)s 2060 X(into)s 2260 X(the)s 2422 X(algorithm's)s 2957 X(behavior.)s 720 X 5240 Y(3.)s 970 X(It)s 1065 X(would)s 1368 X(enable)s 1681 X(the)s 1843 X(programmer)s 2412 X(to)s 2526 X(compare)s 2930 X(the)s 3092 X(memory)s 3486 X(access)s 3792 X(patterns)s 4167 X(of)s 4286 X(di)s Cff(erent)s 4685 X(algorithms.)s 720 X 5460 Y(4.)s 970 X(Being)s 1268 X(easy)s 1497 X(to)s 1618 X(use,)s 1827 X(it)s 1924 X(would)s 2234 X(be)s 2372 X(used)s 2609 X(more)s 2870 X(often)s 3131 X(than)s 3356 X(a)s 3440 X(tedious)s 3794 X(method)s 4160 X(such)s 4397 X(as)s 4524 X(examining)s 5024 X(pages)s 5309 X(of)s 720 X 5620 Y 970 X(indices.)s 720 X 5940 Y /Times-Bold /Times-Bold-8 roman-8-mappings AddRoman-8 /f.B /Times-Bold-8 findfont def 0.0 11 11 f.B ft(3.)s 859 X(Goals)s 720 X 6160 Y 970 X 0.0 11 11 f.R ft(The)s 1174 X(MAP)s 1447 X(tools)s 1696 X(are)s 1862 X(intended)s 2274 X(to)s 2394 X(provide)s 2763 X(an)s 2900 X(``animated'')s 3480 X(view)s 3727 X(of)s 3853 X(the)s 4022 X(memory)s 4423 X(activity)s 4788 X(during)s 5110 X(execu-)s 720 X 6320 Y(tion.)s 954 X(Our)s 1158 X(objective)s 1594 X(in)s 1714 X(providing)s 2176 X(these)s 2435 X(tools)s 2684 X(was)s 2888 X(threefold:)s 3376 X(\(1\))s 3537 X(We)s 3722 X(wished)s 4067 X(to)s 4187 X(easily)s 4477 X(play)s 4700 X(back)s 4940 X(a)s 5022 X(previous)s 720 X 6480 Y(execution)s 1187 X(trace)s 1439 X(over)s 1674 X(and)s 1873 X(over)s 2108 X(again)s 2386 X(to)s 2513 X(study)s 2793 X(how)s 3024 X(an)s 3169 X(algorithm)s 3639 X(uses)s 3870 X(memory,)s 4306 X(\(2\))s 4475 X(we)s 4644 X(would)s 4961 X(wished)s 5314 X(to)s 720 X 6640 Y(experiment)s 1250 X(with)s 1483 X(di)s Cff(erent)s 1890 X(memory)s 2292 X(hierarchy)s 2740 X(schemes)s 3147 X(and)s 3341 X(observe)s 3717 X(their)s 3954 X(e)s Cff(ects)s 4275 X(on)s 4421 X(the)s 4591 X(program's)s 5077 X(\257ow)s 5309 X(of)s 720 X 6800 Y(information;)s 1302 X(and)s 1492 X(\(3\))s 1651 X(we)s 1810 X(wished)s 2153 X(to)s 2271 X(use)s 2449 X(what)s 2694 X(was)s 2896 X(available)s 3323 X(from)s 3568 X(Sun)s 3773 X(Microsystems)s 4423 X(in)s 4542 X(the)s 4709 X(way)s 4924 X(of)s 5048 X(creating)s 720 X 6960 Y(a)s 796 X(SunView)s 1233 X(application.)s 7920 Y page /Times-Roman /Times-Roman-8 roman-8-mappings AddRoman-8 /f.R /Times-Roman-8 findfont def 0.0 11 11 f.R ft /lineWidthScale 1.0000 def lineWidthScale xsiz mul 1.7 div setlinewidth 720 X 520 Y 2968 X(- 3 -)s 720 X 880 Y /Times-Bold /Times-Bold-8 roman-8-mappings AddRoman-8 /f.B /Times-Bold-8 findfont def 0.0 11 11 f.B ft(4.)s 859 X(Description)s 1430 X(of)s 1549 X(Tools)s 720 X 1100 Y 970 X 0.0 11 11 f.R ft(There)s 1257 X(are)s 1422 X(two)s 1620 X(basic)s 1878 X(aspects)s 2227 X(to)s 2346 X(accomplishing)s 3020 X(our)s 3200 X(goals:)s 3525 X(preprocessor)s 4117 X(instrumentation)s 4842 X(and)s 5034 X(postpro-)s 720 X 1260 Y(cessing)s 1086 X(display)s 1447 X(graphics.)s 1917 X(Our)s 2130 X(\256rst)s 2345 X(tool,)s 2588 X(MAPI,)s 2934 X(is)s 3051 X(applied)s 3417 X(to)s 3546 X(the)s 3723 X(user's)s 4027 X(program)s 4441 X(before)s 4762 X(it)s 4867 X(is)s 4984 X(executed.)s 720 X 1420 Y(This)s 948 X(tool)s 1152 X(instruments)s 1698 X(the)s 1864 X(program)s 2267 X(so)s 2398 X(that)s 2596 X(trace)s 2840 X(information)s 3392 X(can)s 3576 X(be)s 3712 X(produced.)s 4208 X(MAPA)s 4559 X(is)s 4666 X(a)s 4747 X(postprocessing)s 720 X 1580 Y(tool)s 930 X(which)s 1235 X(displays)s 1633 X(the)s 1804 X(output)s 2123 X(of)s 2251 X(the)s 2422 X(instrumented)s 3033 X(program,)s 3469 X(permitting)s 3965 X(a)s 4050 X(user)s 4269 X(to)s 4392 X(visualize)s 4819 X(the)s 4990 X(output)s 5309 X(of)s 720 X 1740 Y(the)s 882 X(instrumented)s 1484 X(program)s 1883 X(and)s 2069 X(study)s 2336 X(how)s 2553 X(the)s 2715 X(program)s 3114 X(is)s 3216 X(referencing)s 3740 X(memory.)s 720 X 1960 Y 970 X(We)s 1149 X(have)s 1384 X(developed)s 1863 X(a)s 1940 X(simple)s 2263 X(preprocessor)s 2850 X(\(written)s 3226 X(in)s 3341 X(C\))s 3480 X(that)s 3674 X(analyzes)s 4079 X(a)s 4156 X(FORTRAN)s 4699 X(module)s 5058 X(and,)s 5273 X(for)s 720 X 2120 Y(each)s 953 X(reference)s 1390 X(to)s 1510 X(a)s 1592 X(matrix)s 1913 X(element,)s 2321 X(generates)s 2766 X(a)s 2847 X(FORTRAN)s 3394 X(statement)s 3848 X(that)s 4046 X(calls)s 4280 X(a)s 4361 X(MAPI)s 4669 X(routine)s 5013 X(which)s 5314 X(in)s 720 X 2280 Y(turn)s 928 X(records)s 1280 X(the)s 1445 X(reference)s 1879 X(to)s 1996 X(matrix)s 2314 X(element.)s 2748 X(In)s 2870 X(addition,)s 3290 X(if)s 3388 X(calls)s 3620 X(are)s 3784 X(made)s 4053 X(to)s 4171 X(Level)s 4452 X(1,)s 4567 X(2,)s 4682 X(or)s 4805 X(3)s 4892 X(BLAS)s 5206 X(law-)s 720 X 2440 Y(son)s 902 X(hanson)s 1242 X(paper)s 1513 X(An)s 1676 X(Extended)s 2119 X(Set)s 2289 X(of)s 2409 X(Fortran)s 2761 X(Basic)s 3034 X(Linear)s 3348 X(Algebra)s 3728 X(Subprograms)s 4342 X(paper)s 4612 X(de\256ne)s 4908 X(A)s 5015 X(Proposal)s 720 X 2600 Y(for)s 885 X(a)s 971 X(Set)s 1150 X(of)s 1279 X(Level)s 1566 X(3)s 1659 X(Basic)s 1941 X(Linear)s 2264 X(Algebra)s 2654 X(Subprograms)s 3278 X(MAPI)s 3591 X(translates)s 4043 X(those)s 4313 X(calls)s 4552 X(into)s 4762 X(calls)s 5001 X(to)s 5125 X(MAPI)s 720 X 2760 Y(routines)s 1110 X(which)s 1414 X(understand)s 1931 X(the)s 2101 X(BLAS)s 2418 X(operations)s 2910 X(and)s 3103 X(record)s 3416 X(the)s 3585 X(appropriate)s 4118 X(array)s 4376 X(references.)s 4913 X(The)s 5118 X(output)s 720 X 2920 Y(of)s 839 X(this)s 1027 X(tool)s 1227 X(is)s 1329 X(a)s 1405 X(FORTRAN)s 1947 X(module)s 2305 X(that,)s 2526 X(when)s 2791 X(compiled)s 3228 X(and)s 3415 X(linked)s 3719 X(with)s 3944 X(a)s 4021 X(MAPI)s 4325 X(library,)s 4674 X(executes)s 5079 X(the)s 5242 X(ori-)s 720 X 3080 Y(ginal)s 984 X(code)s 1234 X(and)s 1436 X(produces)s 1875 X(a)s 1967 X(trace)s 2222 X(\256le.)s 2435 X(This)s 2675 X(trace)s 2930 X(\256le)s 3115 X(is)s 3233 X(used)s 3478 X(as)s 3613 X(input)s 3884 X(to)s 4013 X(MAPA)s 4374 X(in)s 4503 X(order)s 4776 X(to)s 4905 X(display)s 5266 X(the)s 720 X 3240 Y(memory)s 1114 X(accesses)s 1511 X(on)s 1649 X(the)s 1811 X(arrays)s 2105 X(in)s 2219 X(the)s 2381 X(FORTRAN)s 2923 X(code.)s 720 X 3460 Y 970 X(By)s 1143 X(default,)s 1519 X(the)s 1697 X(preprocessor)s 2299 X(looks)s 2582 X(for)s 2754 X(references)s 3245 X(to)s 3376 X(array)s 3644 X(A)s 3768 X(and)s 3971 X(assumes)s 4382 X(that)s 4592 X(all)s 4747 X(arrays)s 5058 X(that)s 5268 X(are)s 720 X 3620 Y(parameters)s 1230 X(in)s 1347 X(calls)s 1579 X(to)s 1696 X(the)s 1861 X(BLAS)s 2174 X(subroutines)s 2712 X(are)s 2875 X(array)s 3129 X(A.)s 3267 X(However,)s 3726 X(it)s 3819 X(also)s 4027 X(has)s 4204 X(a)s 4283 X(run-time)s 4692 X(option)s 5005 X(to)s 5122 X(search)s 720 X 3780 Y(for)s 875 X(up)s 1013 X(to)s 1127 X(three)s 1373 X(di)s /Cff { (f) show xsiz pt 20 div neg 0 rmoveto (f) s } bind def Cff(erent)s 1772 X(arrays.)s 2122 X(Thus,)s 2398 X(it)s 2488 X(can)s 2667 X(be)s 2798 X(directed)s 3178 X(to)s 3292 X(look)s 3516 X(for)s 3671 X(references)s 4145 X(to)s 4259 X(arrays)s 4553 X(A,)s 4688 X(B,)s 4818 X(and)s 5004 X(C.)s 720 X 4000 Y 970 X(An)s 1132 X(example)s 1531 X(of)s 1650 X(how)s 1867 X(the)s 2029 X(program)s 2428 X(is)s 2530 X(instrumented)s 3132 X(is)s 3234 X(as)s 3353 X(follows.)s 3767 X(The)s 3965 X(original)s 4335 X(code)s 4569 X(is)s 720 X 4320 Y 1374 X(D)s 1447 X(O)s 1605 X(3)s 1678 X(0)s 1812 X(K)s 1966 X(=)s 2116 X(1)s 2202 X(,)s 2268 X(J)s 2344 X(-)s 2408 X(1)s 720 X 4480 Y 1593 X(D)s 1666 X(O)s 1824 X(2)s 1897 X(0)s 2052 X(I)s 2185 X(=)s 2323 X(K)s 2404 X(+)s 2481 X(1)s 2567 X(,)s 2615 X(N)s 720 X 4640 Y 1812 X(A)s 1906 X(\()s 1979 X(I)s 2056 X(,)s 2122 X(J)s 2198 X(\))s 2331 X(=)s 2469 X(A)s 2563 X(\()s 2636 X(I)s 2713 X(,)s 2779 X(J)s 2855 X(\))s 2988 X(+)s 3126 X(A)s 3220 X(\()s 3293 X(I)s 3370 X(,)s 3418 X(K)s 3512 X(\))s 3576 X(*)s 3637 X(A)s 3731 X(\()s 3783 X(K)s 3881 X(,)s 3947 X(J)s 4023 X(\))s 720 X 4800 Y 948 X(2)s 1021 X(0)s 1596 X(C)s 1666 X(O)s 1739 X(NT)s 1906 X(I)s 1958 X(N)s 2031 X(UE)s 720 X 4960 Y 948 X(3)s 1021 X(0)s 1377 X(C)s 1447 X(O)s 1520 X(NT)s 1687 X(I)s 1739 X(N)s 1812 X(UE)s 720 X 5280 Y(which)s 1016 X(is)s 1118 X(transformed)s 1675 X(into)s 720 X 5600 Y 1374 X(D)s 1447 X(O)s 1605 X(3)s 1678 X(0)s 1812 X(K)s 1966 X(=)s 2116 X(1)s 2202 X(,)s 2268 X(J)s 2344 X(-)s 2408 X(1)s 720 X 5760 Y 1593 X(D)s 1666 X(O)s 1824 X(2)s 1897 X(0)s 2052 X(I)s 2185 X(=)s 2323 X(K)s 2404 X(+)s 2481 X(1)s 2567 X(,)s 2615 X(N)s 720 X 5920 Y 1815 X(C)s 1885 X(AL)s 2037 X(L)s 2180 X(R)s 2271 X(\()s 2408 X(1)s 2494 X(,)s 2636 X(I)s 2713 X(,)s 2855 X(I)s 2932 X(,)s 3071 X(J)s 3151 X(,)s 3290 X(J)s 3439 X(\))s 720 X 6080 Y 1815 X(C)s 1885 X(AL)s 2037 X(L)s 2180 X(R)s 2271 X(\()s 2408 X(1)s 2494 X(,)s 2636 X(I)s 2713 X(,)s 2855 X(I)s 2932 X(,)s 3053 X(K)s 3151 X(,)s 3272 X(K)s 3439 X(\))s 720 X 6240 Y 1815 X(C)s 1885 X(AL)s 2037 X(L)s 2180 X(R)s 2271 X(\()s 2408 X(1)s 2494 X(,)s 2615 X(K)s 2713 X(,)s 2834 X(K)s 2932 X(,)s 3071 X(J)s 3151 X(,)s 3290 X(J)s 3439 X(\))s 720 X 6400 Y 1815 X(C)s 1885 X(AL)s 2037 X(L)s 2165 X(W)s 2271 X(\()s 2408 X(1)s 2494 X(,)s 2636 X(I)s 2713 X(,)s 2855 X(I)s 2932 X(,)s 3071 X(J)s 3151 X(,)s 3290 X(J)s 3439 X(\))s 720 X 6560 Y 1812 X(A)s 1906 X(\()s 1979 X(I)s 2056 X(,)s 2122 X(J)s 2198 X(\))s 2331 X(=)s 2469 X(A)s 2563 X(\()s 2636 X(I)s 2713 X(,)s 2779 X(J)s 2855 X(\))s 2988 X(+)s 3126 X(A)s 3220 X(\()s 3293 X(I)s 3370 X(,)s 3418 X(K)s 3512 X(\))s 3576 X(*)s 3637 X(A)s 3731 X(\()s 3783 X(K)s 3881 X(,)s 3947 X(J)s 4023 X(\))s 720 X 6720 Y 948 X(2)s 1021 X(0)s 1596 X(C)s 1666 X(O)s 1739 X(NT)s 1906 X(I)s 1958 X(N)s 2031 X(UE)s 720 X 6880 Y 948 X(3)s 1021 X(0)s 1377 X(C)s 1447 X(O)s 1520 X(NT)s 1687 X(I)s 1739 X(N)s 1812 X(UE)s 720 X 7100 Y 970 X(Subroutines)s 1524 X(R)s 1626 X(and)s 1812 X(W)s 1943 X(record)s 2249 X(access)s 2555 X(to)s 2669 X(storage.)s 3069 X(The)s 3267 X(calling)s 3594 X(sequence)s 4022 X(has)s 4196 X(the)s 4358 X(following)s 4814 X(meaning:)s 7920 Y page /Times-Roman /Times-Roman-8 roman-8-mappings AddRoman-8 /f.R /Times-Roman-8 findfont def 0.0 11 11 f.R ft /lineWidthScale 1.0000 def lineWidthScale xsiz mul 1.7 div setlinewidth 720 X 520 Y 2968 X(- 4 -)s 720 X 880 Y 956 X(R\()s 1094 X(,)s 1611 X(,)s 2297 X(,)s 2952 X(,)s 3798 X()s 4585 X(\))s 720 X 1200 Y 956 X(W\()s 1123 X(,)s 1640 X(,)s 2326 X(,)s 2981 X(,)s 3827 X()s 4614 X(\))s 720 X 1520 Y(where)s 1027 X()s 1542 X(is)s 1657 X(the)s 1832 X(number)s 2208 X(given)s 2493 X(to)s 2620 X(reference)s 3064 X(the)s 3239 X(array,)s 3532 X(\(,\))s 5150 X(is)s 5266 X(the)s 720 X 1680 Y(starting)s 1087 X(point)s 1351 X(in)s 1474 X(the)s 1645 X(array)s 1905 X(for)s 2068 X(the)s 2238 X(operation,)s 2716 X(and)s 2910 X(\(,\))s 4436 X(is)s 4546 X(the)s 4716 X(ending)s 5051 X(point)s 5314 X(in)s 720 X 1840 Y(the)s 882 X(array)s 1133 X(for)s 1288 X(the)s 1450 X(operation.)s 720 X 2060 Y 970 X(Each)s 1222 X(call)s 1414 X(to)s 1534 X(subroutine)s 2032 X(R)s 2140 X(records)s 2495 X(the)s 2663 X(element)s 3044 X(of)s 3169 X(the)s 3337 X(array.)s 3622 X(In)s 3748 X(this)s 3943 X(case,)s 4193 X(array)s 4451 X(A)s 4565 X(has)s 4746 X(been)s 4987 X(given)s 5266 X(the)s 720 X 2220 Y(identi\256er)s 1146 X(1,)s 1258 X(the)s 1421 X(\256rst)s 1622 X(argument)s 2065 X(to)s 2180 X(subroutines)s 2716 X(R)s 2819 X(and)s 3006 X(W.)s 3166 X(Arguments)s 3683 X(2)s 3767 X(and)s 3954 X(3)s 4037 X(give)s 4254 X(the)s 4416 X(range)s 4686 X(of)s 4805 X(row)s 5003 X(accesses,)s 720 X 2380 Y(and)s 910 X(arguments)s 1399 X(4)s 1486 X(and)s 1676 X(5)s 1763 X(give)s 1984 X(the)s 2150 X(range)s 2424 X(of)s 2547 X(column)s 2909 X(accesses.)s 3367 X(Thus)s 3620 X(``CALL)s 4012 X(R\()s 4155 X(1,)s 4271 X(I,)s 4368 X(I,)s 4465 X(J,)s 4569 X(J)s 4645 X(\)'')s 4786 X(translates)s 5233 X(to)s 5352 X(a)s 720 X 2540 Y(read)s 938 X(of)s 1060 X(array)s 1314 X(A)s 1424 X(for)s 1582 X(element)s 1960 X(I,)s 2054 X(J.)s 2183 X(In)s 2304 X(addition)s 2695 X(to)s 2811 X(this)s 3001 X(information,)s 3578 X(subroutines)s 4115 X(R)s 4219 X(and)s 4407 X(W)s 4540 X(also)s 4747 X(time)s 4973 X(stamp)s 5266 X(the)s 720 X 2700 Y(event.)s 720 X 2920 Y 970 X(The)s 1175 X(subroutines)s 1718 X(R)s 1828 X(and)s 2022 X(W)s 2161 X(record)s 2475 X(the)s 2645 X(information)s 3200 X(in)s 3322 X(a)s 3406 X(trace)s 3653 X(\256le.)s 3858 X(MAPA)s 4212 X(can)s 4399 X(then)s 4624 X(read)s 4847 X(the)s 5017 X(informa-)s 720 X 3080 Y(tion)s 927 X(in)s 1048 X(the)s 1217 X(trace)s 1463 X(\256le)s 1639 X(and)s 1832 X(produce)s 2218 X(a)s 2300 X(simple)s 2628 X(animation)s 3102 X(simulating)s 3602 X(the)s 3770 X(memory)s 4170 X(accesses.)s 4629 X(Figure)s 4950 X(1)s 5039 X(displays)s 720 X 3240 Y(the)s 882 X(output)s 1192 X(of)s 1311 X(MAPA)s 1657 X(for)s 1812 X(a)s 1888 X(view)s 2129 X(of)s 2248 X(LU)s 2422 X(decomposition.)s 720 X 5800 Y 2046 X(Figure)s 2361 X(1.)s 2472 X(MAPA)s 2818 X(output)s 3128 X(for)s 3283 X(LU)s 3457 X(decompostion)s 720 X 6280 Y /Times-Bold /Times-Bold-8 roman-8-mappings AddRoman-8 /f.B /Times-Bold-8 findfont def 0.0 11 11 f.B ft(5.)s 859 X(MAPI:)s 1215 X(The)s 1427 X(Preprocessor)s 720 X 6660 Y 970 X 0.0 11 11 f.R ft(The)s 1170 X(preprocessor)s 1758 X(is)s 1862 X(very)s 2086 X(simple)s 2410 X(and)s 2598 X(makes)s 2909 X(many)s 3184 X(assumptions)s 3760 X(about)s 4035 X(the)s 4200 X(FORTRAN)s 4745 X(code.)s 5010 X(Most)s 5268 X(are)s 720 X 6820 Y(assumptions)s 1299 X(about)s 1577 X(styles)s 1862 X(that,)s 2088 X(although)s 2506 X(syntacticly)s 3015 X(correct,)s 3378 X(are)s 3543 X(not)s 3717 X(in)s 3836 X(common)s 4254 X(usage.)s 4592 X(The)s 4795 X(idea)s 5010 X(here)s 5230 X(was)s 720 X 6980 Y(not)s 893 X(to)s 1011 X(spend)s 1300 X(a)s 1381 X(great)s 1632 X(deal)s 1847 X(of)s 1971 X(time)s 2200 X(producing)s 2678 X(a)s 2759 X(complete)s 3194 X(FORTRAN)s 3741 X(lexical)s 4066 X(analyzer)s 4468 X(to)s 4587 X(cover)s 4862 X(every)s 5137 X(possi-)s 720 X 7140 Y(ble)s 888 X(statement,)s 1370 X(but)s 1544 X(to)s 1663 X(produce)s 2048 X(something)s 2540 X(quick)s 2817 X(and)s 3008 X(easy)s 3235 X(that)s 3433 X(would)s 3741 X(recognize)s 4198 X(common)s 4616 X(FORTRAN)s 5163 X(state-)s 720 X 7300 Y(ments)s 1019 X(and)s 1213 X(pick)s 1439 X(out)s 1617 X(the)s 1788 X(array)s 2048 X(references.)s 2587 X(Others)s 2916 X(assumptions)s 3498 X(are)s 3667 X(so)s 3802 X(the)s 3973 X(preprocessor)s 4568 X(is)s 4679 X(aware)s 4975 X(of)s 5103 X(certain)s 7920 Y page /Times-Roman /Times-Roman-8 roman-8-mappings AddRoman-8 /f.R /Times-Roman-8 findfont def 0.0 11 11 f.R ft /lineWidthScale 1.0000 def lineWidthScale xsiz mul 1.7 div setlinewidth 720 X 520 Y 2968 X(- 5 -)s 720 X 880 Y(information)s 1275 X(such)s 1512 X(as)s 1639 X(array)s 1898 X(size)s 2104 X(and)s 2298 X(array)s 2557 X(names.)s 2929 X(In)s 3056 X(addition,)s 3481 X(there)s 3734 X(two)s 3934 X(types)s 4201 X(of)s 4327 X(array)s 4585 X(references)s 5066 X(that)s 5266 X(the)s 720 X 1040 Y(preprocessor)s 1319 X(does)s 1561 X(not)s 1744 X(yet)s 1920 X(catch.)s 2220 X(These)s 2523 X(are)s 2697 X(valid)s 2959 X(array)s 3224 X(references)s 3712 X(but)s 3895 X(not)s 4078 X(as)s 4211 X(common.)s 4694 X(They)s 4961 X(should)s 5297 X(be)s 720 X 1200 Y(added)s 1009 X(as)s 1128 X(needed)s 1465 X(in)s 1579 X(the)s 1741 X(future.)s 2058 X(All)s 2227 X(of)s 2346 X(the)s 2508 X(assumptions)s 3081 X(and)s 3267 X(exceptions)s 3764 X(are)s 3924 X(listed)s 4191 X(below.)s 720 X 1420 Y 970 X(At)s 1108 X(the)s 1270 X(moment,)s 1687 X(the)s 1849 X(preprocessor)s 2435 X(makes)s 2743 X(the)s 2905 X(following)s 3361 X(assumptions)s 3934 X(about)s 4206 X(the)s 4368 X(input)s 4623 X(FORTRAN)s 5165 X(\256le:)s 720 X 1800 Y(1.)s 970 X(It)s 1065 X(is)s 1167 X(syntactically)s 1750 X(correct)s 2080 X(and)s 2266 X(compiles.)s 720 X 2180 Y(2.)s 970 X(Statements)s 1485 X(do)s 1627 X(not)s 1800 X(extend)s 2124 X(beyond)s 2479 X(column)s 2841 X(72;)s 3014 X(in)s 3132 X(fact,)s 3355 X(nothing)s 3724 X(should)s 4050 X(be)s 4185 X(beyond)s 4541 X(column)s 4904 X(72,)s 5075 X(such)s 5309 X(as)s 720 X 2340 Y 970 X(old)s 1139 X(dusty)s 1406 X(decks)s 1683 X(that)s 1876 X(used)s 2105 X(columns)s 2506 X(73-80)s 2790 X(as)s 2909 X(ordering)s 3308 X(information)s 3855 X(for)s 4010 X(the)s 4172 X(cards.)s 720 X 2720 Y(3.)s 970 X(A)s 1077 X(single)s 1368 X(parameter)s 1832 X(is)s 1934 X(not)s 2103 X(split)s 2322 X(across)s 2623 X(lines)s 2859 X(in)s 2973 X(a)s 3049 X(call)s 3235 X(to)s 3349 X(a)s 3425 X(BLAS)s 3735 X(routine:)s 720 X 3040 Y 1138 X(C)s 1208 X(AL)s 1360 X(L)s 1508 X(S)s 1573 X(U)s 1649 X(B)s 1813 X(\()s 1877 X(n)s 1950 X(1)s 2036 X(,)s 2169 X(n)s 2242 X(2)s 2328 X(,)s 2464 X(a)s 2543 X(\()s 2619 X(i)s 2693 X(,)s 720 X 3200 Y 1119 X($)s 1934 X(j)s 2004 X(\))s 2081 X(,)s 2227 X(.)s 2300 X(.)s 2373 X(.)s 2446 X(,)s 2579 X(b)s 720 X 3360 Y 1110 X($)s 1417 X(\(j,i\),)s 1635 X(...,)s 1775 X(nlast\).\))s 720 X 3740 Y(4.)s 970 X(A)s 1077 X(single)s 1368 X(term)s 1597 X(is)s 1699 X(not)s 1868 X(split)s 2087 X(across)s 2388 X(lines)s 2624 X(in)s 2738 X(an)s 2869 X(assignment)s 3392 X(statement:)s 720 X 4060 Y 1135 X(A)s 1229 X(\()s 1302 X(I)s 1379 X(,)s 1445 X(J)s 1521 X(\))s 1654 X(=)s 1798 X(T)s 1877 X(*)s 1938 X(A)s 2032 X(\()s 2102 X(J)s 2182 X(,)s 720 X 4220 Y 1119 X($)s 1858 X(I)s 1931 X(\))s 2007 X(/)s 2077 X(\()s 2129 X(A)s 2223 X(\()s 2293 X(J)s 2373 X(,)s 720 X 4380 Y 1110 X($)s 1417 X(I\)+B\(J,I\)\)\).)s 720 X 4760 Y(5.)s 970 X(Comments)s 1476 X(are)s 1636 X(not)s 1805 X(interwoven)s 2326 X(with)s 2550 X(continuation)s 3128 X(lines:)s 720 X 5080 Y 1138 X(C)s 1208 X(AL)s 1360 X(L)s 1508 X(S)s 1573 X(U)s 1649 X(B)s 1813 X(\()s 1877 X(n)s 1950 X(1)s 2036 X(,)s 2169 X(n)s 2242 X(2)s 2328 X(,)s 2474 X(.)s 2547 X(.)s 2620 X(.)s 2693 X(,)s 720 X 5240 Y 998 X(C)s 1156 X(c)s 1226 X(o)s 1284 X(m)s 1357 X(m)s 1448 X(e)s 1518 X(n)s 1603 X(t)s 720 X 5400 Y 1110 X($)s 1445 X(ni,)s 1587 X(nj,)s 1729 X(...,)s 1869 X(nlast\))s 720 X 5780 Y(6.)s 970 X(There)s 1259 X(are)s 1426 X(no)s 1572 X(variable)s 1960 X(names)s 2276 X(like)s 2477 X(DO30I)s 2817 X(\(i.e)s 2996 X(DO30I)s 3336 X(=)s 3434 X(1.3)s 3608 X(vs)s 3742 X(DO30I)s 4082 X(=)s 4180 X(1,3\))s 4390 X(or)s 4517 X(function)s 4919 X(names)s 5235 X(like)s 720 X 5940 Y 970 X(IF.)s 720 X 6320 Y(7.)s 970 X(There)s 1252 X(is)s 1354 X(a)s 1430 X(variable)s 1810 X(named)s 2130 X(N)s 2237 X(in)s 2351 X(the)s 2513 X(\256le)s 2682 X(being)s 2954 X(preprocessed)s 3552 X(which)s 3848 X(is)s 3950 X(set)s 4100 X(to)s 4214 X(the)s 4376 X(array)s 4627 X(size.)s 720 X 6700 Y(8.)s 970 X(The)s 1169 X(designated)s 1667 X(arrays)s 1962 X(are)s 2123 X(global)s 2427 X(throughout)s 2939 X(the)s 3102 X(\256le.)s 3300 X(In)s 3420 X(other)s 3674 X(words,)s 3999 X(there)s 4246 X(should)s 4569 X(not)s 4739 X(be)s 4871 X(two)s 5065 X(subrou-)s 720 X 6860 Y 970 X(tines)s 1221 X(each)s 1463 X(with)s 1702 X(local)s 1958 X(arrays)s 2267 X(using)s 2549 X(the)s 2726 X(same)s 2994 X(variable)s 3389 X(name;)s 3700 X(the)s 3877 X(preprocessor)s 4478 X(would)s 4796 X(not)s 4980 X(know)s 5266 X(the)s 720 X 7020 Y 970 X(di)s /Cff { (f) show xsiz pt 20 div neg 0 rmoveto (f) s } bind def Cff(erence.)s 1494 X(Also,)s 1762 X(the)s 1929 X(main)s 2182 X(driver)s 2476 X(program)s 2880 X(should)s 3207 X(be)s 3343 X(in)s 3462 X(a)s 3543 X(separate)s 3933 X(\256le)s 4107 X(so)s 4238 X(that)s 4436 X(array)s 4692 X(references,)s 5199 X(such)s 720 X 7180 Y 970 X(as)s 1089 X(array)s 1340 X(initialization,)s 1953 X(are)s 2113 X(not)s 2282 X(listed)s 2549 X(in)s 2663 X(the)s 2825 X(meta\256le.)s 7920 Y page /Times-Roman /Times-Roman-8 roman-8-mappings AddRoman-8 /f.R /Times-Roman-8 findfont def 0.0 11 11 f.R ft /lineWidthScale 1.0000 def lineWidthScale xsiz mul 1.7 div setlinewidth 720 X 520 Y 2968 X(- 6 -)s 720 X 880 Y(9.)s 970 X(In)s 1089 X(calls)s 1318 X(to)s 1432 X(subroutine,)s 1952 X(the)s 2114 X(sequence)s 2542 X("CALL)s 2902 X(SUB\(parameter1,",)s 3773 X(should)s 4095 X(be)s 4226 X(on)s 4364 X(the)s 4526 X(same)s 4779 X(line.)s 720 X 1100 Y 970 X(The)s 1168 X(preprocessor)s 1754 X(does)s 1983 X(not)s 2152 X(yet)s 2314 X(catch)s 2572 X(the)s 2734 X(following)s 3190 X(references:)s 720 X 1480 Y 1440 X(1.)s 1718 X(IF\(...A\(I,J\)...\))s 2342 X(...)s 2594 X(array)s 2845 X(reference)s 3276 X(in)s 3390 X(logical)s 3717 X(expression)s 4214 X(of)s 4333 X(IF.)s 720 X 1860 Y 1440 X(2.)s 1718 X(...z\(A\(I,J\)\)...)s 2572 X(array)s 2823 X(reference)s 3254 X(used)s 3483 X(as)s 3602 X(an)s 3733 X(array)s 3984 X(index.)s 720 X 2240 Y 970 X(To)s 1120 X(use)s 1294 X(the)s 1456 X(MAPI)s 1759 X(program)s 2158 X(one)s 2344 X(merely)s 2676 X(types)s 720 X 2560 Y 776 X /Times-Italic /Times-Italic-8 roman-8-mappings AddRoman-8 /f.I /Times-Italic-8 findfont def 0.0 11 11 f.I ft(mapi)s 1024 X(<)s 1126 X(foo.f)s 1354 X(>)s 1456 X(outfoo.f)s 720 X 2880 Y 0.0 11 11 f.R ft(at)s 827 X(the)s 989 X(UNIX)s 1290 X(shell)s 1527 X(level.)s 1825 X(The)s 2024 X(MAPI)s 2328 X(program)s 2728 X(will)s 2929 X(take)s 3140 X(as)s 3260 X(input)s 3516 X(a)s 3593 X(Fortran)s 3945 X(program)s 4345 X(and)s 4532 X(generate)s 4930 X(a)s 5007 X(new)s 5218 X(pro-)s 720 X 3040 Y(gram)s 979 X(on)s 1123 X(standard)s 1528 X(output)s 1843 X(containing)s 2340 X(the)s 2507 X(instrumented)s 3114 X(version.)s 3526 X(When)s 3820 X(compiling)s 4300 X(and)s 4491 X(loading)s 4854 X(outfoo.f,)s 5266 X(the)s 720 X 3200 Y(user)s 930 X(should)s 1252 X(reference)s 1683 X(the)s 1845 X(mapilib.a)s 2286 X(\256le)s 2455 X(to)s 2569 X(resolve)s 2913 X(calls)s 3142 X(to)s 3256 X(MAPI)s 3559 X(routines.)s 720 X 3680 Y /Times-Bold /Times-Bold-8 roman-8-mappings AddRoman-8 /f.B /Times-Bold-8 findfont def 0.0 11 11 f.B ft(5.1)s 886 X(Calls)s 1153 X(to)s 1272 X(the)s 1446 X(BLAS)s 720 X 3900 Y 970 X 0.0 11 11 f.R ft(Since)s 1247 X(the)s 1414 X(BLAS)s 1729 X(form)s 1975 X(such)s 2209 X(an)s 2346 X(important)s 2808 X(part)s 3012 X(of)s 3137 X(software)s 3547 X(for)s 3708 X(linear)s 3991 X(algebra)s 4346 X(problems,)s 4817 X(we)s 4978 X(have)s 5218 X(pro-)s 720 X 4060 Y(vided)s 996 X(an)s 1131 X(interface)s 1544 X(for)s 1703 X(them)s 1955 X(to)s 2073 X(our)s 2251 X(package.)s 2696 X(During)s 3039 X(the)s 3205 X(preprocessing)s 3845 X(phase,)s 4153 X(if)s 4251 X(a)s 4330 X(call)s 4519 X(to)s 4636 X(a)s 4715 X(Level)s 4995 X(1,)s 5109 X(2,)s 5223 X(or)s 5345 X(3)s 720 X 4220 Y(BLAS)s 1031 X(is)s 1134 X(present,)s 1507 X(it)s 1598 X(is)s 1701 X(replaced)s 2099 X(by)s 2238 X(a)s 2315 X(call)s 2502 X(to)s 2617 X(one)s 2804 X(of)s 2924 X(our)s 3099 X(MAPI)s 3404 X(routines.)s 3816 X(The)s 4016 X(replaced)s 4415 X(routine)s 4756 X(will)s 4958 X(record)s 5266 X(the)s 720 X 4380 Y(memory)s 1120 X(access)s 1432 X(to)s 1552 X(be)s 1689 X(made,)s 1988 X(as)s 2113 X(well)s 2336 X(as)s 2461 X(the)s 2629 X(number)s 2998 X(of)s 3123 X(\257oating)s 3494 X(point)s 3755 X(operations)s 4246 X(to)s 4366 X(be)s 4503 X(performed,)s 5020 X(and)s 5211 X(then)s 720 X 4540 Y(call)s 906 X(the)s 1068 X(Level)s 1345 X(1,)s 1456 X(2,)s 1567 X(or)s 1686 X(3)s 1769 X(BLAS)s 2079 X(originally)s 2535 X(intended.)s 720 X 4760 Y 970 X(For)s 1151 X(example,)s 1578 X(a)s 1654 X(call)s 1840 X(such)s 2069 X(as)s 720 X 5080 Y 1085 X(C)s 1155 X(AL)s 1307 X(L)s 1455 X(S)s 1520 X(GE)s 1657 X(M)s 1739 X(V)s 1833 X(\()s 1983 X(.)s 2056 X(.)s 2129 X(.)s 2271 X(\))s 720 X 5400 Y(will)s 920 X(be)s 1051 X(replaced)s 1448 X(by)s 1586 X(a)s 1662 X(call)s 1848 X(to)s 720 X 5720 Y 1085 X(C)s 1155 X(AL)s 1307 X(L)s 1438 X(M)s 1528 X(S)s 1593 X(GE)s 1730 X(M)s 1812 X(V)s 1906 X(\()s 2056 X(.)s 2129 X(.)s 2202 X(.)s 2344 X(\))s 2421 X(.)s 720 X 6040 Y(The)s 922 X(call)s 1112 X(will)s 1316 X(be)s 1452 X(modi\256ed)s 1877 X(by)s 2020 X(additional)s 2493 X(parameters)s 3005 X(to)s 3124 X(resolve)s 3473 X(the)s 3640 X(two-dimensional)s 4412 X(array)s 4668 X(references)s 5147 X(in)s 5266 X(the)s 720 X 6200 Y(call.)s 720 X 6420 Y 970 X(The)s 1168 X(next)s 1385 X(example)s 1784 X(shows)s 2087 X(how)s 2304 X(calls)s 2533 X(to)s 2647 X(the)s 2809 X(BLAS)s 3119 X(are)s 3279 X(translated.)s 3789 X(The)s 3987 X(original)s 4357 X(code)s 4591 X(looks)s 4858 X(like)s 7920 Y page /Times-Roman /Times-Roman-8 roman-8-mappings AddRoman-8 /f.R /Times-Roman-8 findfont def 0.0 11 11 f.R ft /lineWidthScale 1.0000 def lineWidthScale xsiz mul 1.7 div setlinewidth 720 X 520 Y 2968 X(- 7 -)s 720 X 880 Y(*)s 720 X 1040 Y(*)s 1359 X(Compute)s 1836 X(superdiagonal)s 2524 X(block)s 2841 X(of)s 3005 X(U.)s 720 X 1200 Y(*)s 720 X 1360 Y 1377 X(CALL)s 1737 X(STRSM\()s 2209 X('Left',)s 2564 X('Lower',)s 3022 X('No)s 3265 X(transpose',)s 3816 X('Unit',)s 4185 X(J)s 4301 X(-)s 4410 X(1,)s 720 X 1520 Y 1085 X($)s 2235 X(JB,)s 2453 X(A,)s 2633 X(LDA,)s 2959 X(A\()s 3147 X(1,)s 3303 X(J)s 3419 X(\),)s 3556 X(LDA)s 3854 X(\))s 720 X 1680 Y(*)s 720 X 1840 Y(*)s 1359 X(Update)s 1748 X(diagonal)s 2199 X(and)s 2430 X(subdiagonal)s 3034 X(blocks.)s 720 X 2000 Y(*)s 720 X 2160 Y 1377 X(CALL)s 1737 X(SGEMM\()s 2250 X('No)s 2493 X(transpose',)s 3044 X('No)s 3287 X(transpose',)s 3838 X(M)s 4009 X(-)s 4118 X(J)s 4234 X(+)s 4369 X(1,)s 4525 X(JB,)s 720 X 2320 Y 1085 X($)s 2235 X(J)s 2351 X(-)s 2460 X(1,)s 2616 X(-ONE,)s 2978 X(A\()s 3166 X(J,)s 3310 X(1)s 3438 X(\),)s 3575 X(LDA,)s 3901 X(A\()s 4089 X(1,)s 4245 X(J)s 4361 X(\),)s 4498 X(LDA,)s 4824 X(ONE,)s 720 X 2480 Y 1085 X($)s 2235 X(A\()s 2423 X(J,)s 2567 X(J)s 2683 X(\),)s 2820 X(LDA)s 3118 X(\))s 720 X 2800 Y(After)s 978 X(the)s 1140 X(preprocessor)s 1726 X(executes,)s 2158 X(it)s 2248 X(is)s 2350 X(transformed)s 2907 X(to)s 720 X 3120 Y(*)s 720 X 3280 Y(*)s 1359 X(Compute)s 1836 X(superdiagonal)s 2524 X(block)s 2841 X(of)s 3005 X(U.)s 720 X 3440 Y(*)s 720 X 3600 Y 1377 X(CALL)s 1737 X(MSTRSM\()s 2307 X('Left',)s 2662 X('Lower',)s 3120 X('No)s 3363 X(transpose',)s 3914 X('Unit',)s 4283 X(J)s 4399 X(-)s 4508 X(1,)s 720 X 3760 Y 1085 X($)s 2235 X(JB,)s 2453 X(A,)s 2633 X(LDA,)s 2959 X(A\()s 3147 X(1,)s 3303 X(J)s 3419 X(\),)s 3556 X(LDA)s 3854 X(,)s 720 X 3920 Y 1085 X($)s 2235 X(1,)s 2391 X(1,)s 2547 X(1,)s 2703 X(1,)s 2859 X(1,)s 3015 X(J)s 3131 X(\))s 720 X 4080 Y(*)s 720 X 4240 Y(*)s 1359 X(Update)s 1748 X(diagonal)s 2199 X(and)s 2430 X(subdiagonal)s 3034 X(blocks.)s 720 X 4400 Y(*)s 720 X 4560 Y 1377 X(CALL)s 1737 X(MSGEMM\()s 2348 X('No)s 2591 X(transpose',)s 3142 X('No)s 3385 X(transpose',)s 3936 X(M)s 4107 X(-)s 4216 X(J)s 4332 X(+)s 4467 X(1,)s 4623 X(JB,)s 720 X 4720 Y 1085 X($)s 2235 X(J)s 2351 X(-)s 2460 X(1,)s 2616 X(-ONE,)s 2978 X(A\()s 3166 X(J,)s 3310 X(1)s 3438 X(\),)s 3575 X(LDA,)s 3901 X(A\()s 4089 X(1,)s 4245 X(J)s 4361 X(\),)s 4498 X(LDA,)s 4824 X(ONE,)s 720 X 4880 Y 1085 X($)s 2235 X(A\()s 2423 X(J,)s 2567 X(J)s 2683 X(\),)s 2820 X(LDA)s 3118 X(,)s 720 X 5040 Y 1085 X($)s 2235 X(1,)s 2391 X(J,)s 2535 X(1,)s 2691 X(1,)s 2847 X(1,)s 3003 X(J,)s 3147 X(1,)s 3303 X(J,)s 3447 X(J)s 3563 X(\))s 720 X 5260 Y 970 X(In)s 1112 X(the)s 1297 X(instrumentation)s 2039 X(the)s 2224 X(name)s 2512 X(of)s 2655 X(the)s 2841 X(subroutine)s 3357 X(has)s 3555 X(been)s 3813 X(changed.)s 4285 X(Routines)s 4729 X(MSTRSM)s 5242 X(and)s 720 X 5420 Y(MSGEMM)s 1271 X(are)s 1452 X(MAP)s 1740 X(routines)s 2143 X(that)s 2357 X(record)s 2684 X(the)s 2867 X(memory)s 3282 X(references)s 3777 X(and)s 3984 X(call)s 4191 X(the)s 4373 X(corresponding)s 5048 X(Level)s 5345 X(3)s 720 X 5580 Y(BLAS.)s 1064 X(The)s 1268 X(calling)s 1601 X(sequence)s 2035 X(has)s 2215 X(been)s 2455 X(augmented)s 2970 X(to)s 3090 X(add)s 3282 X(the)s 3450 X(starting)s 3814 X(point)s 4075 X(of)s 4200 X(each)s 4433 X(array)s 4690 X(reference.)s 5156 X(Since)s 720 X 5740 Y(internally)s 1174 X(the)s 1341 X(BLAS)s 1656 X(do)s 1799 X(not)s 1973 X(know)s 2250 X(what)s 2496 X(part)s 2699 X(of)s 2823 X(the)s 2990 X(original)s 3365 X(array)s 3621 X(the)s 3788 X(calling)s 4120 X(program)s 4523 X(has)s 4701 X(actually)s 5080 X(passed,)s 720 X 5900 Y(we)s 888 X(need)s 1135 X(to)s 1262 X(supply)s 1597 X(the)s 1772 X(starting)s 2143 X(index)s 2428 X(to)s 2555 X(correctly)s 2984 X(record)s 3304 X(each)s 3545 X(array)s 3810 X(reference.)s 4311 X(Therefore,)s 4810 X(in)s 4938 X(the)s 5114 X(call)s 5314 X(to)s 720 X 6060 Y(MSTRSM,)s 1244 X(the)s 1413 X(last)s 1601 X(six)s 1765 X(arguments)s 2257 X(describe)s 2656 X(the)s 2825 X(starting)s 3190 X(point)s 3452 X(of)s 3578 X(the)s 3747 X(two)s 3947 X(arrays)s 4248 X(involved)s 4668 X(in)s 4789 X(the)s 4958 X(operation.)s 720 X 6220 Y(The)s 932 X(\256rst)s 1146 X(arguement)s 1650 X(1)s 1747 X(involves)s 2162 X(the)s 2338 X(array)s 2603 X(A;)s 2755 X(the)s 2931 X(next)s 3162 X(two)s 3369 X(arguments,)s 3896 X(1,1,)s 4104 X(provide)s 4481 X(the)s 4657 X(row)s 4869 X(and)s 5070 X(column)s 720 X 6380 Y(index)s 996 X(for)s 1155 X(the)s 1321 X(starting)s 1683 X(point)s 1942 X(of)s 2065 X(the)s 2231 X(\256rst)s 2435 X(array;)s 2721 X(the)s 2887 X(last)s 3072 X(three)s 3322 X(arguments)s 3811 X(1,1,J)s 4052 X(follow)s 4371 X(the)s 4537 X(same)s 4794 X(form.)s 5094 X(Within)s 720 X 6540 Y(subroutine)s 1215 X(MSTRSM)s 1707 X(the)s 1872 X(appropriate)s 2401 X(calls)s 2633 X(to)s 2750 X(R)s 2855 X(and)s 3045 X(W)s 3180 X(are)s 3344 X(made)s 3613 X(to)s 3731 X(record)s 4041 X(the)s 4207 X(events,)s 4547 X(and)s 4737 X(then)s 4958 X(the)s 5124 X(call)s 5314 X(to)s 720 X 6700 Y(the)s 882 X(Level)s 1159 X(3)s 1242 X(BLAS)s 1552 X(takes)s 1805 X(place.)s 7920 Y page /Times-Roman /Times-Roman-8 roman-8-mappings AddRoman-8 /f.R /Times-Roman-8 findfont def 0.0 11 11 f.R ft /lineWidthScale 1.0000 def lineWidthScale xsiz mul 1.7 div setlinewidth 720 X 520 Y 2968 X(- 8 -)s 720 X 880 Y /Times-Bold /Times-Bold-8 roman-8-mappings AddRoman-8 /f.B /Times-Bold-8 findfont def 0.0 11 11 f.B ft(5.2)s 886 X(Execution)s 1385 X(of)s 1504 X(the)s 1678 X(instrumentated)s 2426 X(program)s 720 X 1100 Y 970 X 0.0 11 11 f.R ft(As)s 1121 X(the)s 1284 X(instrumentated)s 1966 X(program)s 2366 X(executes,)s 2799 X(it)s 2890 X(generates)s 3331 X(a)s 3408 X(trace)s 3648 X(\256le)s 3818 X(named)s 4139 X /Times-Italic /Times-Italic-8 roman-8-mappings AddRoman-8 /f.I /Times-Italic-8 findfont def 0.0 11 11 f.I ft(memory..)s 4989 X 0.0 11 11 f.R ft(The)s 5189 X(trace)s 720 X 1260 Y(\256le)s 892 X(is)s 997 X(a)s 1076 X(readable)s 1476 X(ASCII)s 1794 X(\256le)s 1966 X(which)s 2265 X(contains)s 2662 X(an)s 2796 X(encoded)s 3191 X(description)s 3710 X(of)s 3832 X(how)s 4052 X(the)s 4217 X(arrays)s 4513 X(in)s 4629 X(the)s 4793 X(program)s 5194 X(have)s 720 X 1420 Y(been)s 955 X(referenced.)s 1470 X(There)s 1753 X(are)s 1914 X(basically)s 2333 X(three)s 2580 X(types)s 2842 X(of)s 2963 X(trace)s 3204 X(lines)s 3442 X(generated:)s 3955 X(array)s 4208 X(de\256nition,)s 4689 X(read)s 4906 X(access,)s 5242 X(and)s 720 X 1580 Y(write)s 981 X(access.)s 1351 X(For)s 1540 X(compactness)s 2136 X(not)s 2313 X(every)s 2591 X(element)s 2973 X(reference)s 3411 X(generates)s 3858 X(a)s 3941 X(trace)s 4187 X(line.)s 4443 X(If)s 4550 X(a)s 4633 X(call)s 4826 X(to)s 4947 X(one)s 5140 X(of)s 5266 X(the)s 720 X 1740 Y(BLAS)s 1037 X(has)s 1218 X(been)s 1459 X(made,)s 1759 X(the)s 1928 X(trace)s 2174 X(line)s 2374 X(may)s 2598 X(contain)s 2956 X(the)s 3125 X(information)s 3679 X(about)s 3958 X(a)s 4041 X(row)s 4246 X(or)s 4372 X(column)s 4737 X(access)s 5050 X(or)s 5176 X(both.)s 720 X 1900 Y(In)s 849 X(addition,)s 1276 X(the)s 1447 X(events)s 1764 X(are)s 1933 X(time)s 2166 X(stamped,)s 2597 X(allowing)s 3019 X(the)s 3190 X(MAPA)s 3545 X(program)s 3953 X(to)s 4076 X(merge)s 4386 X(information)s 4942 X(with)s 5175 X(other)s 720 X 2060 Y(trace)s 972 X(\256les)s 1197 X(and)s 1396 X(have)s 1643 X(the)s 1818 X(relative)s 2188 X(order)s 2460 X(of)s 2593 X(operations)s 3092 X(preserved.)s 3614 X(We)s 3807 X(also)s 4026 X(record)s 4346 X(the)s 4522 X(amount)s 4894 X(of)s 5027 X(\257oating-)s 720 X 2220 Y(point)s 984 X(work)s 1246 X(that)s 1448 X(has)s 1630 X(taken)s 1903 X(place)s 2169 X(for)s 2332 X(a)s 2416 X(given)s 2696 X(memory)s 3098 X(reference.)s 3593 X(The)s 3799 X(name)s 4072 X(of)s 4199 X(the)s 4369 X(BLAS)s 4687 X(is)s 4797 X(recorded,)s 5242 X(and)s 720 X 2380 Y(during)s 1035 X(playback)s 1458 X(the)s 1620 X(name)s 1885 X(of)s 2004 X(the)s 2166 X(BLAS)s 2476 X(executed)s 2892 X(will)s 3092 X(be)s 3223 X(displayed.)s 720 X 2600 Y 970 X(The)s 1168 X(trace)s 1407 X(\256le)s 1576 X(has)s 1750 X(the)s 1912 X(following)s 2368 X(format:)s 720 X 2920 Y(Matrix)s 1047 X(de\256nition:)s 720 X 3080 Y 748 X(0)s 831 X()s 1320 X()s 2167 X()s 720 X 3400 Y(Read)s 973 X(access:)s 720 X 3560 Y 748 X(1)s 831 X()s 1320 X()s 1978 X()s 2605 X()s 3423 X()s 4210 X(