/* 13 October 2003 */ /* Sample easy-to-use main to call the function string in order to search Tandem Repeats in a nucleotide sequence (contained in a file with extension .seq, written either in GenBank or in FASTA format). The user must provide the sequence file name without extension and optionally the score threshold value. In unix-like environment the user can provide them as two optional parameters (if both parameters are missing they are both requested, if only the first parameter is given, a default value is used for the second). The two output file names are automatically created by completing the file name with extensions .ris and .tab . */ #include #include #include #include int main (int argc, char** argv); int read_f (char*, unsigned long*, char**); int string (unsigned long, char*, FILE*, FILE*, int); #define len_name 81 #define score_default 500 FILE *fi, *fo1, *fo2; int main (int argc, char** argv) { int err, sc; char name [len_name], name_s [len_name+4], /* sequence file name */ name_r [len_name+4], /* detailed results file name */ name_t [len_name+4]; /* tabled results file name */ char *s; unsigned long bp_no; if (argc >= 2) { if (strlen (argv [1]) >= len_name) { printf ("FILE NAME TOO LONG (max 80)!\n"); exit (2); } strcpy (name, argv [1]); } else { printf ("sequence name? "); scanf ("%80s", name); } strcpy (name_s, name); strcpy (name_r, name); strcpy (name_t, name); strcat (name_s, ".seq"); strcat (name_r, ".ris"); strcat (name_t, ".tab"); sc = score_default; if (argc > 2) sc = (int) strtol (argv [2], (char**) NULL, 10); else if (argc < 2) { printf ("score? "); scanf ("%d", &sc); } if (!read_f (name_s, &bp_no, &s)) exit (3); if (!bp_no) { printf ("WRONG FILE LENGTH!\n"); exit (3); } if ((fo1 = fopen (name_r, "w")) == NULL) { printf ("IMPOSSIBLE TO OPEN %s!\n", name_r); exit (4); } if ((fo2 = fopen (name_t, "w")) == NULL) { printf ("IMPOSSIBLE TO OPEN %s!\n", name_t); exit (5); } fprintf (fo1, "sequence name: %s with threshold %d\n\n", name_s, sc); printf ("\n--------sequence length %ld\n\n", bp_no); printf ("threshold %d\n", sc); printf ("%10s %10s %6s %7s %s\n","TR start", "TR end", "length", "score", "word"); err = string (bp_no, s, fo1, fo2, sc); free (s); if (!err) { printf ("DONE!\n"); fprintf (fo1, "\n******* DONE!\n\n\n\n\n\n"); } else { printf ("WARNING(S).........DONE!\n"); fprintf (fo1, "\nWARNING(S).........DONE!\n\n\n\n\n\n"); } fclose (fo1); fclose (fo2); return (err); } int read_f (char *name_s, unsigned long *bp_no, char** s) /* unsophisticated function that: - reads from a file (name_s) a nucleotide sequence (either in GenBank or in FASTA format) filling the variable *s - sets the variable *bp_no to the length of the sequence - returns 1 (or 0) to flag the success (or failure) */ { char ch, row [300]; int i; unsigned long num_bases = 0; *bp_no = 0; if ((fi = fopen (name_s, "r")) == NULL) { printf ("IMPOSSIBLE TO OPEN %s!\n", name_s); return 0; } while (1) { memset (row, 0, 300); i = -1; do { if (++i >= 300 || (fscanf (fi, "%c", &row [i]) != 1)) { printf ("UNKNOWN FORMAT\n"); return 0; } } while (isprint (row [i])); if (row [0] == '>') break; if (!memcmp (row, "ORIGIN", 6)) break; } while (fscanf (fi, "%c", &ch) == 1) if (isalpha (ch)) num_bases++; fclose (fi); if ((*s = (char*) malloc (num_bases)) == NULL) { printf ("MEMORY ALLOCATION FAILURE\n"); return 0; } *bp_no = num_bases; num_bases = 0; fi = fopen (name_s, "r"); while (1) { memset (row, 0, 300); i = -1; do { if (++i >= 300 || (fscanf (fi, "%c", &row [i]) != 1)) { printf ("UNKNOWN FORMAT\n"); return 0; } } while (isprint (row [i])); if (row [0] == '>') break; if (!memcmp (row, "ORIGIN", 6)) break; } while (fscanf (fi, "%c", &ch) == 1) if (isalpha (ch)) (*s) [num_bases++] = ch; fclose (fi); return 1; }