prep1 # prep1 - prepare countries by continent and pop. den. prep1 prep1 BEGIN { FS = "\t" } prep1 { printf("%s:%s:%d:%d:%.1f\n", prep1 $4, $1, $3, $2, 1000*$3/$2) | "sort -t: +0 -1 +4rn" prep1 } form1 # form1 - format countries data by continent, pop. den. form1 form1 BEGIN { FS = ":" form1 printf("%-15s %-10s %10s %7s %12s\n", form1 "CONTINENT", "COUNTRY", "POPULATION", form1 "AREA", "POP. DEN.") form1 } form1 { printf("%-15s %-10s %7d %10d %10.1f\n", form1 $1, $2, $3, $4, $5) form1 } prep2 # prep2 - prepare countries by continent, inverse pop. den. prep2 prep2 BEGIN { FS = "\t"} prep2 { den = 1000*$3/$2 prep2 printf("%-15s:%12.8f:%s:%d:%d:%.1f\n", prep2 $4, 1/den, $1, $3, $2, den) | "sort" prep2 } form2 # form2 - format countries by continent, pop. den. form2 form2 BEGIN { FS = ":" form2 printf("%-15s %-10s %10s %7s %12s\n", form2 "CONTINENT", "COUNTRY", "POPULATION", form2 "AREA", "POP. DEN.") form2 } form2 { if ($1 != prev) { form2 print "" form2 prev = $1 form2 } else form2 $1 = "" form2 printf("%-15s %-10s %7d %10d %10.1f\n", form2 $1, $2, $3, $4, $5) form2 } prep3 # prep3 - prepare countries data for form3 prep3 prep3 BEGIN { FS = "\t" } prep3 pass == 1 { prep3 area[$4] += $2 prep3 areatot += $2 prep3 pop[$4] += $3 prep3 poptot += $3 prep3 } prep3 pass == 2 { prep3 den = 1000*$3/$2 prep3 printf("%s:%s:%s:%f:%d:%f:%f:%d:%d\n", prep3 $4, $1, $3, 100*$3/poptot, $2, 100*$2/areatot, prep3 den, pop[$4], area[$4]) | "sort -t: +0 -1 +6rn" prep3 } form3 # form3 - format countries report number 3 form3 form3 BEGIN { form3 FS = ":"; date = "January 1, 1988" form3 hfmt = "%36s %8s %12s %7s %12s\n" form3 tfmt = "%33s %10s %10s %9s\n" form3 TOTfmt = " TOTAL for %-13s%7d%11.1f%11d%10.1f\n" form3 printf("%-18s %-40s %19s\n\n", "Report No. 3", form3 "POPULATION, AREA, POPULATION DENSITY", date) form3 printf(" %-14s %-14s %-23s %-14s %-11s\n\n", form3 "CONTINENT", "COUNTRY", "POPULATION", "AREA", "POP. DEN.") form3 printf(hfmt, "Millions ", "Pct. of", "Thousands ", form3 "Pct. of", "People per") form3 printf(hfmt, "of People", "Total ", "of Sq. Mi.", form3 "Total ", "Sq. Mi. ") form3 printf(hfmt, "---------", "-------", "----------", form3 "-------", "----------") form3 } form3 { if ($1 != prev) { # new continent form3 if (NR > 1) form3 totalprint() form3 prev = $1 # first entry for continent form3 poptot = $8; poppct = $4 form3 areatot = $9; areapct = $6 form3 } else { # next entry for continent form3 $1 = "" form3 poppct += $4; areapct += $6 form3 } form3 printf(" %-15s%-10s %6d %10.1f %10d %9.1f %10.1f\n", form3 $1, $2, $3, $4, $5, $6, $7) form3 gpop += $3; gpoppct += $4 form3 garea += $5; gareapct += $6 form3 } form3 form3 END { form3 totalprint() form3 printf(" GRAND TOTAL %20d %10.1f %10d %9.1f\n", form3 gpop, gpoppct, garea, gareapct) form3 printf(tfmt, "=====", "======", "=====", "======") form3 } form3 form3 function totalprint() { # print totals for previous continent form3 printf(tfmt, "----", "-----", "-----", "-----") form3 printf(TOTfmt, prev, poptot, poppct, areatot, areapct) form3 printf(tfmt, "====", "=====", "=====", "=====") form3 } form4 # form4 - format countries data for tbl input form4 form4 BEGIN { form4 FS = ":"; OFS = "\t"; date = "January 1, 1988" form4 print ".TS\ncenter;" form4 print "l c s s s r s\nl\nl l c s c s c\nl l c c c c c." form4 printf("%s\t%s\t%s\n\n", "Report No. 3", form4 "POPULATION, AREA, POPULATION DENSITY", date) form4 print "CONTINENT", "COUNTRY", "POPULATION", form4 "AREA", "POP. DEN." form4 print "", "", "Millions", "Pct. of", "Thousands", form4 "Pct. of", "People per" form4 print "", "", "of People", "Total", "of Sq. Mi.", form4 "Total", "Sq. Mi." form4 print "\t\t_\t_\t_\t_\t_" form4 print ".T&\nl l n n n n n." form4 } form4 form4 { if ($1 != prev) { # new continent form4 if (NR > 1) form4 totalprint() form4 prev = $1 form4 poptot = $8; poppct = $4 form4 areatot = $9; areapct = $6 form4 } else { # next entry for current continent form4 $1 = "" form4 poppct += $4; areapct += $6 form4 } form4 printf("%s\t%s\t%d\t%.1f\t%d\t%.1f\t%.1f\n", form4 $1, $2, $3, $4, $5, $6, $7) form4 gpop += $3; gpoppct += $4 form4 garea += $5; gareapct += $6 form4 } form4 form4 END { form4 totalprint() form4 print ".T&\nl s n n n n n." form4 printf("GRAND TOTAL\t%d\t%.1f\t%d\t%.1f\n", form4 gpop, gpoppct, garea, gareapct) form4 print "", "=", "=", "=", "=", "=" form4 print ".TE" form4 } form4 form4 function totalprint() { # print totals for previous continent form4 print ".T&\nl s n n n n n." form4 print "", "_", "_", "_", "_", "_" form4 printf(" TOTAL for %s\t%d\t%.1f\t%d\t%.1f\n", form4 prev, poptot, poppct, areatot, areapct) form4 print "", "=", "=", "=", "=", "=" form4 print ".T&\nl l n n n n n." form4 } table # table - simple table formatter table table BEGIN { table FS = "\t"; blanks = sprintf("%100s", " ") table number = "^[+-]?([0-9]+[.]?[0-9]*|[.][0-9]+)$" table } table table { row[NR] = $0 table for (i = 1; i <= NF; i++) { table if ($i ~ number) table nwid[i] = max(nwid[i], length($i)) table wid[i] = max(wid[i], length($i)) table } table } table table END { table for (r = 1; r <= NR; r++) { table n = split(row[r], d) table for (i = 1; i <= n; i++) { table sep = (i < n) ? " " : "\n" table if (d[i] ~ number) table printf("%" wid[i] "s%s", numjust(i,d[i]), sep) table else table printf("%-" wid[i] "s%s", d[i], sep) table } table } table } table table function max(x, y) { return (x > y) ? x : y } table table function numjust(n, s) { # position s in field n table return s substr(blanks, 1, int((wid[n]-nwid[n])/2)) table } table1 # table1 - single column formatter table1 # input: one column of strings and decimal numbers table1 # output: aligned column table1 table1 BEGIN { table1 blanks = sprintf("%100s", " ") table1 number = "^[+-]?([0-9]+[.]?[0-9]*|[.][0-9]+)$" table1 left = "^[+-]?[0-9]*" table1 right = "[.][0-9]*" table1 } table1 table1 { row[NR] = $1 table1 if ($1 ~ number) { table1 match($1, left) # matches the empty string, so RLENGTH>=0 table1 lwid = max(lwid, RLENGTH) table1 if (!match($1, right)) table1 RLENGTH = 0 table1 rwid = max(rwid, RLENGTH) table1 wid = max(wid, lwid + rwid) table1 } else table1 wid = max(wid, length($1)) table1 } table1 table1 END { table1 for (r = 1; r <= NR; r++) { table1 if (row[r] ~ number) table1 printf("%" wid "s\n", numjust(row[r])) table1 else table1 printf("%-" wid "s\n", row[r]) table1 } table1 } table1 table1 function max(x, y) { return (x > y) ? x : y } table1 table1 function numjust(s) { # position s table1 if (!match(s, right)) table1 RLENGTH = 0 table1 return s substr(blanks, 1, int(rwid-RLENGTH+(wid-(lwid+rwid))/2)) table1 } info0 awk ' info0 BEGIN { FS = "\t" } info0 $1 ~ /Canada/ { info0 printf("%s:\n", $1) info0 printf("\t%d million people\n", $3) info0 printf("\t%.3f million sq. mi.\n", $2/1000) info0 printf("\t%.1f people per sq. mi.\n", 1000*$3/$2) info0 } info0 ' countries info awk ' info # info - print information about country info # usage: info country-name info info BEGIN { FS = "\t" } info $1 ~ /'$1'/ { info printf("%s:\n", $1) info printf("\t%d million people\n", $3) info printf("\t%.3f million sq. mi.\n", $2/1000) info printf("\t%.1f people per sq. mi.\n", 1000*$3/$2) info } info ' countries info.ans awk ' info.ans BEGIN { FS = "\t"; pat = ARGV[1]; ARGV[1] = "-" } info.ans $1 ~ pat { info.ans printf("%s:\n", $1) info.ans printf("\t%d million people\n", $3) info.ans printf("\t%.3f million sq. mi.\n", $2/1000) info.ans printf("\t%.1f people per sq. mi.\n", 1000*$3/$2) info.ans } info.ans ' "$1" 0) # read form letter form.gen form[++n] = $0 form.gen } form.gen form.gen { for (i = 1; i <= n; i++) { # read data lines form.gen temp = form[i] # each line generates a letter form.gen for (j = 1; j <= NF; j++) form.gen gsub("#" j, $j, temp) form.gen print temp form.gen } form.gen } capitals USSR Moscow capitals Canada Ottawa capitals China Beijing capitals USA Washington capitals Brazil Brasilia capitals India New Delhi capitals Mexico Mexico City capitals France Paris capitals Japan Tokyo capitals Germany Bonn capitals England London merge.awk awk ' BEGIN { FS = "\t" } merge.awk FILENAME == "capitals" { merge.awk cap[$1] = $2 merge.awk } merge.awk FILENAME == "countries" && $4 == "Asia" { merge.awk print $1, $3, cap[$1] merge.awk } merge.awk ' capitals countries join.awk # join - join file1 file2 on first field join.awk # input: two sorted files, tab-separated fields join.awk # output: natural join of lines with common first field join.awk join.awk BEGIN { join.awk OFS = sep = "\t" join.awk file2 = ARGV[2] join.awk ARGV[2] = "" # read file1 implicitly, file2 explicitly join.awk eofstat = 1 # end of file status for file2 join.awk if ((ng = getgroup()) <= 0) join.awk exit # file2 is empty join.awk } join.awk join.awk { while (prefix($0) > prefix(gp[1])) join.awk if ((ng = getgroup()) <= 0) join.awk exit # file2 exhausted join.awk if (prefix($0) == prefix(gp[1])) # 1st attributes in file1 join.awk for (i = 1; i <= ng; i++) # and file2 match join.awk print $0, suffix(gp[i]) # print joined line join.awk } join.awk join.awk function getgroup() { # put equal prefix group into gp[1..ng] join.awk if (getone(file2, gp, 1) <= 0) # end of file join.awk return 0 join.awk for (ng = 2; getone(file2, gp, ng) > 0; ng++) join.awk if (prefix(gp[ng]) != prefix(gp[1])) { join.awk unget(gp[ng]) # went too far join.awk return ng-1 join.awk } join.awk return ng-1 join.awk } join.awk join.awk function getone(f, gp, n) { # get next line in gp[n] join.awk if (eofstat <= 0) # eof or error has occurred join.awk return 0 join.awk if (ungot) { # return lookahead line if it exists join.awk gp[n] = ungotline join.awk ungot = 0 join.awk return 1 join.awk } join.awk return eofstat = (getline gp[n] temp.countries relfile !sort capitals >temp.capitals relfile !join temp.countries temp.capitals >cc avgarea.awk { area += $area }; END { print area/NR } qawk # qawk - awk relational database query processor qawk qawk BEGIN { readrel("relfile") } qawk /./ { doquery($0) } qawk qawk function readrel(f) { qawk while (getline 0) # parse relfile qawk if ($0 ~ /^[A-Za-z]+ *:/) { # name: qawk gsub(/[^A-Za-z]+/, "", $0) # remove all but name qawk relname[++nrel] = $0 qawk } else if ($0 ~ /^[ \t]*!/) # !command... qawk cmd[nrel, ++ncmd[nrel]] = substr($0,index($0,"!")+1) qawk else if ($0 ~ /^[ \t]*[A-Za-z]+[ \t]*$/) # attribute qawk attr[nrel, $1] = ++nattr[nrel] qawk else if ($0 !~ /^[ \t]*$/) # not white space qawk print "bad line in relfile:", $0 qawk } qawk function doquery(s, i,j) { qawk for (i in qattr) # clean up for next query qawk delete qattr[i] qawk query = s # put $names in query into qattr, without $ qawk while (match(s, /\$[A-Za-z]+/)) { qawk qattr[substr(s, RSTART+1, RLENGTH-1)] = 1 qawk s = substr(s, RSTART+RLENGTH+1) qawk } qawk for (i = 1; i <= nrel && !subset(qattr, attr, i); ) qawk i++ qawk if (i > nrel) # didn't find a table with all attributes qawk missing(qattr) qawk else { # table i contains attributes in query qawk for (j in qattr) # create awk program qawk gsub("\\$" j, "$" attr[i,j], query) qawk for (j = 1; j <= ncmd[i]; j++) # create table i qawk if (system(cmd[i, j]) != 0) { qawk print "command failed, query skipped\n", cmd[i,j] qawk return qawk } qawk awkcmd = sprintf("awk -F'\t' '%s' %s", query, relname[i]) qawk printf("query: %s\n", awkcmd) # for debugging qawk system(awkcmd) qawk } qawk } qawk function subset(q, a, r, i) { # is q a subset of a[r]? qawk for (i in q) qawk if (!((r,i) in a)) qawk return 0 qawk return 1 qawk } qawk function missing(x, i) { qawk print "no table contains all of the following attributes:" qawk for (i in x) qawk print i qawk } qawk1.ans # qawk - awk relational database query processor qawk1.ans qawk1.ans BEGIN { readrel("relfile") } qawk1.ans /./ { doquery($0) } qawk1.ans qawk1.ans function readrel(f) { qawk1.ans while (getline 0) # parse relfile qawk1.ans if ($0 ~ /^[A-Za-z]+ *:/) { # name: qawk1.ans gsub(/[^A-Za-z]+/, "", $0) # remove all but name qawk1.ans relname[++nrel] = $0 qawk1.ans } else if ($0 ~ /^[ \t]*!/) # !command... qawk1.ans cmd[nrel, ++ncmd[nrel]] = substr($0,index($0,"!")+1) qawk1.ans else if ($0 ~ /^[ \t]*[A-Za-z]+[ \t]*$/) # attribute qawk1.ans attr[nrel, $1] = ++nattr[nrel] qawk1.ans else if ($0 !~ /^[ \t]*$/) # not white space qawk1.ans print "bad line in relfile:", $0 qawk1.ans } qawk1.ans function doquery(s, i,j,x) { qawk1.ans for (i in qattr) # clean up for next query qawk1.ans delete qattr[i] qawk1.ans query = s # put $names in query into qattr, without $ qawk1.ans while (match(s, /\$[A-Za-z]+/)) { qawk1.ans qattr[substr(s, RSTART+1, RLENGTH-1)] = 1 qawk1.ans s = substr(s, RSTART+RLENGTH+1) qawk1.ans } qawk1.ans for (i = 1; i <= nrel && !subset(qattr, attr, i); ) qawk1.ans i++ qawk1.ans if (i > nrel) # didn't find a table with all attributes qawk1.ans missing(qattr) qawk1.ans else { # table i contains attributes in query qawk1.ans for (j in qattr) # create awk program qawk1.ans gsub("\\$" j, "$" attr[i,j], query) qawk1.ans if (!exists[i] && ncmd[i] > 0) { qawk1.ans for (j = 1; j <= ncmd[i]; j++) qawk1.ans x = x cmd[i, j] "\n" qawk1.ans print "executing\n" x # for debugging qawk1.ans if (system(x) != 0) { # create table i qawk1.ans print "command failed, query skipped\n", x qawk1.ans return qawk1.ans } qawk1.ans exists[i]++ qawk1.ans } qawk1.ans awkcmd = sprintf("awk -F'\t' '%s' %s", query, relname[i]) qawk1.ans printf("query: %s\n", awkcmd) # for debugging qawk1.ans system(awkcmd) qawk1.ans } qawk1.ans } qawk1.ans function subset(q, a, r, i) { # is q a subset of a[r]? qawk1.ans for (i in q) qawk1.ans if (!((r,i) in a)) qawk1.ans return 0 qawk1.ans return 1 qawk1.ans } qawk1.ans function missing(x, i) { qawk1.ans print "no table contains all of the following attributes:" qawk1.ans for (i in x) qawk1.ans print i qawk1.ans }