# Carolyn Tolstoshev, NCBI # 1/10/90 # This program scans the genbank entries and creates a # fasta format sequence file. # Get the locus $1 == "LOCUS" { printf(">%s (GenBank) ", $2) } # Get definition line(s) $1 == "DEFINITION" { definition = substr($0,12) getline while (substr($0,1,1) == " ") { for (i=1; i <= NF; i++) definition = definition " " $i getline } printf("%s\n", definition) } # Find origin (which immed. precedes sequence). Print out sequence. $1 == "ORIGIN" { getline # Get the sequence and convert to upper case if necessary. while (substr($0,1,1) != "/") { sline = $2 $3 $4 $5 $6 $7 # Put sequence in upper case sline = upper(sline) printf("%s\n",sline) getline } } ###################################################################### function upper(sline,n) # Puts all ch array in upper case { up["a"] = "A" up["b"] = "B" up["c"] = "C" up["d"] = "D" up["e"] = "E" up["f"] = "F" up["g"] = "G" up["h"] = "H" up["i"] = "I" up["j"] = "J" up["k"] = "K" up["l"] = "L" up["m"] = "M" up["n"] = "N" up["o"] = "O" up["p"] = "P" up["q"] = "Q" up["r"] = "R" up["s"] = "S" up["t"] = "T" up["u"] = "U" up["v"] = "V" up["w"] = "W" up["x"] = "X" up["y"] = "Y" up["z"] = "Z" newline = "" for (i=1; i<=length(sline); i++) if (substr(sline,i,1) > "Z") newline = newline up[substr(sline,i,1)] else newline = newline substr(sline,i,1) return(newline) }