prep1 # prep1 - prepare countries by continent and pop. den.
prep1 
prep1 BEGIN { FS = "\t" }
prep1       { printf("%s:%s:%d:%d:%.1f\n",
prep1             $4, $1, $3, $2, 1000*$3/$2) | "sort -t: +0 -1 +4rn"
prep1       }
form1 # form1 - format countries data by continent, pop. den.
form1 
form1 BEGIN { FS = ":"
form1         printf("%-15s %-10s %10s %7s %12s\n",
form1             "CONTINENT", "COUNTRY", "POPULATION",
form1             "AREA", "POP. DEN.")
form1       }
form1       { printf("%-15s %-10s %7d %10d %10.1f\n",
form1             $1, $2, $3, $4, $5)
form1       }
prep2 # prep2 - prepare countries by continent, inverse pop. den.
prep2 
prep2 BEGIN { FS = "\t"}
prep2       { den = 1000*$3/$2
prep2         printf("%-15s:%12.8f:%s:%d:%d:%.1f\n",
prep2             $4, 1/den, $1, $3, $2, den) | "sort"
prep2       }
form2 # form2 - format countries by continent, pop. den.
form2 
form2 BEGIN { FS = ":"
form2         printf("%-15s %-10s %10s %7s %12s\n",
form2             "CONTINENT", "COUNTRY", "POPULATION",
form2             "AREA", "POP. DEN.")
form2       }
form2       { if ($1 != prev) {
form2             print ""
form2             prev = $1
form2         } else
form2             $1 = ""
form2         printf("%-15s %-10s %7d %10d %10.1f\n",
form2             $1, $2, $3, $4, $5)
form2       }
prep3 # prep3 - prepare countries data for form3
prep3 
prep3 BEGIN  { FS = "\t" }
prep3 pass == 1 {
prep3     area[$4] += $2
prep3     areatot += $2
prep3     pop[$4] += $3
prep3     poptot += $3
prep3 }
prep3 pass == 2 {
prep3     den = 1000*$3/$2
prep3     printf("%s:%s:%s:%f:%d:%f:%f:%d:%d\n",
prep3         $4, $1, $3, 100*$3/poptot, $2, 100*$2/areatot,
prep3         den, pop[$4], area[$4]) | "sort -t: +0 -1 +6rn"
prep3 }
form3 # form3 - format countries report number 3
form3 
form3 BEGIN  {
form3     FS = ":"; date = "January 1, 1988"
form3     hfmt = "%36s %8s %12s %7s %12s\n"
form3     tfmt = "%33s %10s %10s %9s\n"
form3     TOTfmt = "   TOTAL for %-13s%7d%11.1f%11d%10.1f\n" 
form3     printf("%-18s %-40s %19s\n\n", "Report No. 3",
form3       "POPULATION, AREA, POPULATION DENSITY", date)
form3     printf(" %-14s %-14s %-23s %-14s %-11s\n\n",
form3       "CONTINENT", "COUNTRY", "POPULATION", "AREA", "POP. DEN.")
form3     printf(hfmt, "Millions ", "Pct. of", "Thousands ",
form3                  "Pct. of", "People per")
form3     printf(hfmt, "of People", "Total ", "of Sq. Mi.",
form3                  "Total ", "Sq. Mi. ")
form3     printf(hfmt, "---------", "-------", "----------",
form3                  "-------", "----------")
form3 }
form3 {   if ($1 != prev) { # new continent
form3         if (NR > 1)
form3             totalprint()
form3         prev = $1     # first entry for continent
form3         poptot = $8;  poppct = $4
form3         areatot = $9; areapct = $6
form3     } else {          # next entry for continent
form3         $1 = ""
form3         poppct += $4; areapct += $6
form3     }
form3     printf(" %-15s%-10s %6d %10.1f %10d %9.1f %10.1f\n",
form3         $1, $2, $3, $4, $5, $6, $7)
form3     gpop += $3;  gpoppct += $4
form3     garea += $5; gareapct += $6
form3 }
form3 
form3 END {
form3     totalprint()
form3     printf(" GRAND TOTAL %20d %10.1f %10d %9.1f\n",
form3         gpop, gpoppct, garea, gareapct)
form3     printf(tfmt, "=====", "======", "=====", "======")
form3 }
form3 
form3 function totalprint() {	# print totals for previous continent
form3     printf(tfmt, "----", "-----", "-----", "-----")
form3     printf(TOTfmt, prev, poptot, poppct, areatot, areapct)
form3     printf(tfmt, "====", "=====", "=====", "=====")
form3 }
form4 # form4 - format countries data for tbl input
form4 
form4 BEGIN  {
form4     FS = ":"; OFS = "\t"; date = "January 1, 1988"
form4     print ".TS\ncenter;"
form4     print "l c s s s r s\nl\nl l c s c s c\nl l c c c c c."
form4     printf("%s\t%s\t%s\n\n", "Report No. 3",
form4         "POPULATION, AREA, POPULATION DENSITY", date)
form4     print "CONTINENT", "COUNTRY", "POPULATION",
form4           "AREA", "POP. DEN."
form4     print "", "", "Millions", "Pct. of", "Thousands",
form4           "Pct. of", "People per"
form4     print "", "", "of People", "Total", "of Sq. Mi.",
form4           "Total", "Sq. Mi."
form4     print "\t\t_\t_\t_\t_\t_"
form4     print ".T&\nl l n n n n n."
form4 }
form4 
form4 {    if ($1 != prev) {  # new continent
form4         if (NR > 1)
form4             totalprint()
form4         prev = $1
form4         poptot = $8;  poppct = $4
form4         areatot = $9; areapct = $6
form4     } else {            # next entry for current continent
form4         $1 = ""
form4         poppct += $4; areapct += $6
form4     }
form4     printf("%s\t%s\t%d\t%.1f\t%d\t%.1f\t%.1f\n",
form4         $1, $2, $3, $4, $5, $6, $7)
form4     gpop += $3;  gpoppct += $4
form4     garea += $5; gareapct += $6
form4 }
form4 
form4 END {
form4     totalprint()
form4     print ".T&\nl s n n n n n."
form4     printf("GRAND TOTAL\t%d\t%.1f\t%d\t%.1f\n",
form4         gpop, gpoppct, garea, gareapct)
form4     print "", "=", "=", "=", "=", "="
form4     print ".TE"
form4 }
form4 
form4 function totalprint() {    # print totals for previous continent
form4     print ".T&\nl s n n n n n."
form4     print "", "_", "_", "_", "_", "_"
form4     printf("   TOTAL for %s\t%d\t%.1f\t%d\t%.1f\n",
form4         prev, poptot, poppct, areatot, areapct)
form4     print "", "=", "=", "=", "=", "="
form4     print ".T&\nl l n n n n n."
form4 }
table # table - simple table formatter
table 
table BEGIN {
table     FS = "\t"; blanks = sprintf("%100s", " ")
table     number = "^[+-]?([0-9]+[.]?[0-9]*|[.][0-9]+)$"
table }
table 
table {   row[NR] = $0
table     for (i = 1; i <= NF; i++) {
table         if ($i ~ number)
table             nwid[i] = max(nwid[i], length($i))
table         wid[i] = max(wid[i], length($i))
table     }
table }
table 
table END {
table     for (r = 1; r <= NR; r++) {
table         n = split(row[r], d)
table         for (i = 1; i <= n; i++) {
table             sep = (i < n) ? "   " : "\n"
table             if (d[i] ~ number)
table                 printf("%" wid[i] "s%s", numjust(i,d[i]), sep)
table             else
table                 printf("%-" wid[i] "s%s", d[i], sep)
table         }
table     }
table }
table 
table function max(x, y) { return (x > y) ? x : y }
table 
table function numjust(n, s) {   # position s in field n
table     return s substr(blanks, 1, int((wid[n]-nwid[n])/2))
table }
table1 # table1 - single column formatter
table1 #   input:  one column of strings and decimal numbers
table1 #   output: aligned column
table1 
table1 BEGIN {
table1     blanks = sprintf("%100s", " ")
table1     number = "^[+-]?([0-9]+[.]?[0-9]*|[.][0-9]+)$"
table1     left = "^[+-]?[0-9]*"
table1     right = "[.][0-9]*"
table1 }
table1 
table1 {   row[NR] = $1
table1     if ($1 ~ number) {
table1         match($1, left) # matches the empty string, so RLENGTH>=0
table1         lwid = max(lwid, RLENGTH)
table1         if (!match($1, right))
table1             RLENGTH = 0
table1         rwid = max(rwid, RLENGTH)
table1         wid = max(wid, lwid + rwid)
table1     } else
table1         wid = max(wid, length($1))
table1 }
table1 
table1 END {
table1     for (r = 1; r <= NR; r++) {
table1         if (row[r] ~ number)
table1             printf("%" wid "s\n", numjust(row[r]))
table1         else
table1             printf("%-" wid "s\n", row[r])
table1     }
table1 }
table1 
table1 function max(x, y) { return (x > y) ? x : y }
table1 
table1 function numjust(s) {   # position s
table1     if (!match(s, right))
table1         RLENGTH = 0
table1     return s substr(blanks, 1, int(rwid-RLENGTH+(wid-(lwid+rwid))/2))
table1 }
info0 awk '
info0 BEGIN { FS = "\t" }
info0 $1 ~ /Canada/ {
info0     printf("%s:\n", $1)
info0     printf("\t%d million people\n", $3)
info0     printf("\t%.3f million sq. mi.\n", $2/1000)
info0     printf("\t%.1f people per sq. mi.\n", 1000*$3/$2)
info0 }
info0 ' countries
info awk '
info # info - print information about country
info #    usage: info country-name
info 
info BEGIN { FS = "\t" }
info $1 ~ /'$1'/ {
info     printf("%s:\n", $1)
info     printf("\t%d million people\n", $3)
info     printf("\t%.3f million sq. mi.\n", $2/1000)
info     printf("\t%.1f people per sq. mi.\n", 1000*$3/$2)
info }
info ' countries
info.ans awk '
info.ans BEGIN { FS = "\t"; pat = ARGV[1]; ARGV[1] = "-" }
info.ans $1 ~ pat {
info.ans     printf("%s:\n", $1)
info.ans     printf("\t%d million people\n", $3)
info.ans     printf("\t%.3f million sq. mi.\n", $2/1000)
info.ans     printf("\t%.1f people per sq. mi.\n", 1000*$3/$2)
info.ans }
info.ans ' "$1" <countries
info1.ans awk '
info1.ans BEGIN { FS = "\t" }
info1.ans $1 ~ pat {
info1.ans     printf("%s:\n", $1)
info1.ans     printf("\t%d million people\n", $3)
info1.ans     printf("\t%.3f million sq. mi.\n", $2/1000)
info1.ans     printf("\t%.1f people per sq. mi.\n", 1000*$3/$2)
info1.ans }
info1.ans ' pat="$1" <countries
letter.text Subject: Demographic Information About #1
letter.text From: AWK Demographics, Inc.
letter.text 
letter.text In response to your request for information about #1,
letter.text our latest research has revealed that its population is #2
letter.text million people and its area is #3 million square miles.
letter.text This gives #1 a population density of #4 people per
letter.text square mile.
form.gen # form.gen - generate form letters
form.gen #   input:  prototype file letter.text; data lines
form.gen #   output: one form letter per data line
form.gen 
form.gen BEGIN {
form.gen     FS = ":"
form.gen     while (getline <"letter.text" > 0) # read form letter
form.gen         form[++n] = $0
form.gen }
form.gen 
form.gen {   for (i = 1; i <= n; i++) { # read data lines
form.gen         temp = form[i]         # each line generates a letter
form.gen         for (j = 1; j <= NF; j++)
form.gen             gsub("#" j, $j, temp)
form.gen         print temp
form.gen     }
form.gen }
capitals USSR	Moscow
capitals Canada	Ottawa
capitals China	Beijing
capitals USA	Washington
capitals Brazil	Brasilia
capitals India	New Delhi
capitals Mexico	Mexico City
capitals France	Paris
capitals Japan	Tokyo
capitals Germany	Bonn
capitals England	London
merge.awk awk ' BEGIN { FS = "\t" }
merge.awk       FILENAME == "capitals" {
merge.awk           cap[$1] = $2
merge.awk       }
merge.awk       FILENAME == "countries" && $4 == "Asia" {
merge.awk           print $1, $3, cap[$1]
merge.awk       }
merge.awk ' capitals countries
join.awk # join - join file1 file2 on first field
join.awk #   input:  two sorted files, tab-separated fields
join.awk #   output: natural join of lines with common first field
join.awk 
join.awk BEGIN {
join.awk     OFS = sep = "\t"
join.awk     file2 = ARGV[2]
join.awk     ARGV[2] = ""  # read file1 implicitly, file2 explicitly
join.awk     eofstat = 1   # end of file status for file2
join.awk     if ((ng = getgroup()) <= 0)
join.awk         exit      # file2 is empty
join.awk }
join.awk 
join.awk {   while (prefix($0) > prefix(gp[1]))
join.awk         if ((ng = getgroup()) <= 0)
join.awk             exit  # file2 exhausted
join.awk     if (prefix($0) == prefix(gp[1]))  # 1st attributes in file1
join.awk         for (i = 1; i <= ng; i++)     #     and file2 match
join.awk             print $0, suffix(gp[i])   # print joined line
join.awk }
join.awk 
join.awk function getgroup() { # put equal prefix group into gp[1..ng]
join.awk     if (getone(file2, gp, 1) <= 0)    # end of file
join.awk         return 0
join.awk     for (ng = 2; getone(file2, gp, ng) > 0; ng++)
join.awk         if (prefix(gp[ng]) != prefix(gp[1])) {
join.awk             unget(gp[ng])    # went too far
join.awk             return ng-1
join.awk         }
join.awk     return ng-1
join.awk }
join.awk 
join.awk function getone(f, gp, n) {  # get next line in gp[n]
join.awk     if (eofstat <= 0) # eof or error has occurred
join.awk         return 0
join.awk     if (ungot) {      # return lookahead line if it exists
join.awk         gp[n] = ungotline
join.awk         ungot = 0
join.awk         return 1
join.awk     }
join.awk     return eofstat = (getline gp[n] <f)
join.awk }
join.awk 
join.awk function unget(s)  { ungotline = s; ungot = 1 }
join.awk function prefix(s) { return substr(s, 1, index(s, sep) - 1) }
join.awk function suffix(s) { return substr(s, index(s, sep) + 1) }
relfile countries:
relfile 	country
relfile 	area
relfile 	population
relfile 	continent
relfile capitals:
relfile 	country
relfile 	capital
relfile cc:
relfile 	country
relfile 	area
relfile 	population
relfile 	continent
relfile 	capital
relfile 	!sort countries >temp.countries
relfile 	!sort capitals >temp.capitals
relfile 	!join temp.countries temp.capitals >cc
avgarea.awk { area += $area }; END { print area/NR }
qawk # qawk - awk relational database query processor
qawk 
qawk BEGIN { readrel("relfile") }
qawk /./   { doquery($0) }
qawk 
qawk function readrel(f) {
qawk     while (getline <f > 0)   # parse relfile
qawk         if ($0 ~ /^[A-Za-z]+ *:/) {     # name:
qawk             gsub(/[^A-Za-z]+/, "", $0)  # remove all but name
qawk             relname[++nrel] = $0
qawk         } else if ($0 ~ /^[ \t]*!/)     # !command...
qawk             cmd[nrel, ++ncmd[nrel]] = substr($0,index($0,"!")+1)
qawk         else if ($0 ~ /^[ \t]*[A-Za-z]+[ \t]*$/)  # attribute
qawk             attr[nrel, $1] = ++nattr[nrel]
qawk         else if ($0 !~ /^[ \t]*$/)      # not white space
qawk             print "bad line in relfile:", $0
qawk }
qawk function doquery(s,   i,j) {
qawk     for (i in qattr)  # clean up for next query
qawk         delete qattr[i]
qawk     query = s    # put $names in query into qattr, without $
qawk     while (match(s, /\$[A-Za-z]+/)) {
qawk         qattr[substr(s, RSTART+1, RLENGTH-1)] = 1
qawk         s = substr(s, RSTART+RLENGTH+1)
qawk     }
qawk     for (i = 1; i <= nrel && !subset(qattr, attr, i); ) 
qawk         i++
qawk     if (i > nrel)     # didn't find a table with all attributes
qawk         missing(qattr)
qawk     else {            # table i contains attributes in query
qawk         for (j in qattr)   # create awk program
qawk             gsub("\\$" j, "$" attr[i,j], query)
qawk         for (j = 1; j <= ncmd[i]; j++)  # create table i
qawk             if (system(cmd[i, j]) != 0) {
qawk                 print "command failed, query skipped\n", cmd[i,j]
qawk                 return
qawk            }
qawk         awkcmd = sprintf("awk -F'\t' '%s' %s", query, relname[i])
qawk         printf("query: %s\n", awkcmd)   # for debugging
qawk         system(awkcmd)
qawk     }
qawk }
qawk function subset(q, a, r,   i) {  # is q a subset of a[r]?
qawk     for (i in q)
qawk         if (!((r,i) in a))
qawk             return 0
qawk     return 1
qawk }
qawk function missing(x,     i) {
qawk     print "no table contains all of the following attributes:"
qawk     for (i in x)
qawk         print i
qawk }
qawk1.ans # qawk - awk relational database query processor
qawk1.ans 
qawk1.ans BEGIN { readrel("relfile") }
qawk1.ans /./   { doquery($0) }
qawk1.ans 
qawk1.ans function readrel(f) {
qawk1.ans     while (getline <f > 0)   # parse relfile
qawk1.ans         if ($0 ~ /^[A-Za-z]+ *:/) {     # name:
qawk1.ans             gsub(/[^A-Za-z]+/, "", $0)  # remove all but name
qawk1.ans             relname[++nrel] = $0
qawk1.ans         } else if ($0 ~ /^[ \t]*!/)     # !command...
qawk1.ans             cmd[nrel, ++ncmd[nrel]] = substr($0,index($0,"!")+1)
qawk1.ans         else if ($0 ~ /^[ \t]*[A-Za-z]+[ \t]*$/)  # attribute
qawk1.ans             attr[nrel, $1] = ++nattr[nrel]
qawk1.ans         else if ($0 !~ /^[ \t]*$/)      # not white space
qawk1.ans             print "bad line in relfile:", $0
qawk1.ans }
qawk1.ans function doquery(s,   i,j,x) {
qawk1.ans     for (i in qattr)  # clean up for next query
qawk1.ans         delete qattr[i]
qawk1.ans     query = s    # put $names in query into qattr, without $
qawk1.ans     while (match(s, /\$[A-Za-z]+/)) {
qawk1.ans         qattr[substr(s, RSTART+1, RLENGTH-1)] = 1
qawk1.ans         s = substr(s, RSTART+RLENGTH+1)
qawk1.ans     }
qawk1.ans     for (i = 1; i <= nrel && !subset(qattr, attr, i); ) 
qawk1.ans         i++
qawk1.ans     if (i > nrel)     # didn't find a table with all attributes
qawk1.ans         missing(qattr)
qawk1.ans     else {            # table i contains attributes in query
qawk1.ans         for (j in qattr)   # create awk program
qawk1.ans             gsub("\\$" j, "$" attr[i,j], query)
qawk1.ans         if (!exists[i] && ncmd[i] > 0) {
qawk1.ans             for (j = 1; j <= ncmd[i]; j++)
qawk1.ans                 x = x cmd[i, j] "\n"
qawk1.ans             print "executing\n" x  # for debugging
qawk1.ans             if (system(x) != 0) { # create table i
qawk1.ans                     print "command failed, query skipped\n", x
qawk1.ans                     return
qawk1.ans                }
qawk1.ans             exists[i]++
qawk1.ans         }
qawk1.ans         awkcmd = sprintf("awk -F'\t' '%s' %s", query, relname[i])
qawk1.ans         printf("query: %s\n", awkcmd)   # for debugging
qawk1.ans         system(awkcmd)
qawk1.ans     }
qawk1.ans }
qawk1.ans function subset(q, a, r,   i) {  # is q a subset of a[r]?
qawk1.ans     for (i in q)
qawk1.ans         if (!((r,i) in a))
qawk1.ans             return 0
qawk1.ans     return 1
qawk1.ans }
qawk1.ans function missing(x,     i) {
qawk1.ans     print "no table contains all of the following attributes:"
qawk1.ans     for (i in x)
qawk1.ans         print i
qawk1.ans }