# Carolyn Tolstoshev, NCBI # 1/10/90 # This program scans the pir and creates sequence file in fasta format. $1 == "ENTRY" { # Get definition lines while(($1 != "TITLE") && ($1 != "///"))getline if ($1 == "TITLE") { definition = $2 for (i=3; i<=NF; i++) definition = definition " " $i getline while (substr($0,1,1) == " ") { for (i=1; i <= NF; i++) definition = definition " " $i getline } # Get placement data if exists while (($1 != "PLACEMENT") && ($1 != "SEQUENCE"))getline if ($1 == "PLACEMENT")definition = definition " | " \ $2 " " $3 " " $4 " " $5 " " $6 # Get Sequence data while(($1 != "SEQUENCE") && ($1 != "///"))getline if ($1 != "///") { printf(">%s\n",definition) getline seqline[1] = $0 getline seq = "" seql = 1 while ($1 != "///") { seql++ seqline[seql] = $0 # Get sequence for (i=9; i <= length($0); i=i+2) seq = seq substr($0,i,1) getline } #Print sequence and definition lseq = length(seq) if (lseq <= 60)printf("%s\n",seq) else { printf("%s\n",substr(seq,1,60)) p=61 while (p <= lseq) { q = p+59 if (q > lseq)q = lseq printf("%s\n",substr(seq,p,q-p+1)) p = q + 1 } } } } }