/* --------------------------------------------------------------------------- MCMC thinning utlity for use with IM surface files (*.surf). Code written by C R Young Last updated: 10 Oct 2005 Copyright (C) 2005 C. R. Young This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . Input files: "filelist.txt" contains the names of the files that you want to read outfile.txt_0000.surf all of the surface files listed in "filelist.txt" ... Output files: "MCMCdata.out" Contains the thinned dataset and immigration estimates rescaled to 2Nem Useage: MCMCtest Options: integer > 0. Ignores this many samples at the beginning of files Subsample every x samples from file. numbers thinned samples for plotting traces Examples of usage for 500000 burnin and sampling every 5000 iterations with iteration numbering turned on: ./MCMCthin 500000 5000 1 Compilation (linux): gcc MCMCthin.c -oMCMCthin Also compiled and checked on Visual C++ 2005 Express Edition Beta ---------------------------------------------------------------------------*/ #include #include #define MAXSTR 80 #define MAXFILES 50 /* ---------------------------------------------------------------------------*/ /* Runtime errors */ /* */ /* ---------------------------------------------------------------------------*/ /* My error message */ void MyError(error_text) char error_text[]; { fprintf(stderr,"\nRY's run-time error...\n"); fprintf(stderr,"\n\t%s\n",error_text); fprintf(stderr,"\n...now exiting to system...\n\n"); exit(1); } main (int argc, char *argv[]) { FILE *fptr, *Files, *MCMCdata; char fname[MAXSTR], currentfile[MAXSTR]; char filelist[MAXFILES][MAXSTR]; long int count_check; int end, num, subsample, subdiff, subcount, printcount, numfiles, file, count, diff, burnin, number_switch; float q1, q2, qA, m1, m2, t; float q1prev, q2prev, qAprev, m1prev, m2prev, tprev; char check; int *filesize; /*********************/ /* Begin read files */ /*********************/ if( argc < 4) { printf("usage: MCMCtest \n"); exit(1) ; } burnin = atoi(argv[1]); if (burnin < 0) { printf ( "Burnin doesn't make sense\n"); MyError("usage: MCMCtest = 0> \n"); } subsample = atoi(argv[2]); if (subsample < 1) { printf ( "Thinning interval < 1... must be >= 1\n"); MyError("usage: MCMCtest = 1> \n"); } number_switch = atoi(argv[3]); if (number_switch < 0 || number_switch > 1) { printf ( "Do not understand whether you want to print iteration number\n"); MyError("usage: MCMCtest \n"); } /* Read file lookup table ("filelist.txt") and keep names in filelist */ Files = fopen ("filelist.txt", "r"); if (!Files) { printf ( "Error opening file!\nCould not read filelist.txt...\n"); MyError("Required file not found!\n"); } end = 0; numfiles = 0; printf("\nFiles to read:\n"); while (end == 0) { if(end == 0){ fscanf(Files,"%s", &filelist[numfiles]); end = feof(Files); if(end == 0){ printf( "%s\n", filelist[numfiles]); } numfiles += 1; } } printf("\n"); numfiles -= 1; if(fclose(Files) != 0) MyError("Could not close file: filelist.txt.\n"); /* memory allocation trap */ if(numfiles > MAXFILES){ printf ( "Error, too many files! (max = %d)\n", MAXFILES); MyError("Recompile with MAXFILES set to a higher number.\n"); } printf("Burnin = %d\nThinning = %d\n", burnin, subsample); printf("Number of files to read = %d\n\n", numfiles); /* Initializations */ count = 0; subcount = 0; subdiff = 0; printcount = 0; count_check = 0; end = 0; /* allocate array that holds number of lines in each file */ filesize = (int *) malloc( (size_t) ( (numfiles) * sizeof(int) ) ); MCMCdata = fopen ("MCMCdata.out", "w"); if (!MCMCdata) { printf ( "Could not open MCMCdata.out for writing...\n"); MyError("Error opening file for writing!\n"); } if(number_switch == 1){ fprintf (MCMCdata, "Iter\t"); } fprintf (MCMCdata, "q1\tq2\tqA\tt\tm1\tm2\n"); for (file = 0; file < numfiles; file++){ sprintf(currentfile, "%s", filelist[file]); //open file out of file list (read from "filelist.txt") fptr = fopen (currentfile, "r"); if (!fptr) { printf ( "Error opening file %d!\n", file); MyError("Could not read infile\n"); } printf ( "Opened file %d: %s...\n", file, currentfile); /* read the first line of the file and store this as a previous value (zero freq) */ fscanf(fptr,"%d%f%f%f%f%f%f", &num, &q1prev, &q2prev, &qAprev, &tprev, &m1prev, &m2prev); /* keeping track of the total samples in the file */ filesize[file] = 0; while( end == 0){ fscanf(fptr,"%d", &num); // frequency of the line fscanf(fptr,"%*c"); // get rid of tab char (fixed error 10/10/05 linux-win bug) end = feof(fptr); // check to see if end of file has been reached if( end == 0 ) { count += num; // counts total number of MCMC iterations in all files count_check += 1; // counts number of lines in all files for error check filesize[file] += num; // counts total number of MCMC iterations in current file check = getc(fptr); // pull out a char to check for tab if( check != '\t'){ // ungetc(check,fptr); // put char back in stack fscanf(fptr,"%f", &q1); // pull out float for update getc(fptr); // pull out extra tab } else { q1 = q1prev; // was tab so value did not change (no extra tab here) } check = getc(fptr); if( check != '\t'){ ungetc(check,fptr); fscanf(fptr,"%f", &q2); getc(fptr); } else { q2 = q2prev; } check = getc(fptr); if( check != '\t'){ ungetc(check,fptr); fscanf(fptr,"%f", &qA); getc(fptr); } else { qA = qAprev; } check = getc(fptr); if( check != '\t'){ ungetc(check,fptr); fscanf(fptr,"%f", &t); getc(fptr); } else { t = tprev; } check = getc(fptr); if( check != '\t'){ ungetc(check,fptr); fscanf(fptr,"%f", &m1); getc(fptr); } else { m1 = m1prev; } check = getc(fptr); if( check != '\t'){ ungetc(check,fptr); fscanf(fptr,"%f", &m2); getc(fptr); } else { m2 = m2prev; } /* assign values read this line to prev. holders for next read */ q1prev = q1; q2prev = q2; qAprev = qA; m1prev = m1; m2prev = m2; tprev = t; diff = count - burnin; // diff is difference in total count so far (including current line, 'num') and burnin if(diff < num) { num = diff; // if cross the burnin threshold during this line, toss out some of num not satisfying burnin } if(diff > 0){ // check to see if past the burnin subdiff = num - subcount; // see if num (freq of this line) is larger than thinning remaining if(subdiff < 0) { subcount -= num; // keep counting down if it is not there yet } else { subcount = subsample - subdiff; // take extra samples off of next thinning count printcount += 1; if(number_switch == 1){ fprintf (MCMCdata, "%d\t", printcount); } fprintf (MCMCdata, "%.4f\t%.4f\t%.4f\t%.4f\t%.4f\t%.4f\n", q1, q2, qA, t, m1, m2); if (subcount < 0){ // added to catch error when thinning freq is less than total count of line while (subcount < 0){ printcount += 1; if(number_switch == 1){ fprintf (MCMCdata, "%d\t", printcount); } fprintf (MCMCdata, "%.4f\t%.4f\t%.4f\t%.4f\t%.4f\t%.4f\n", q1, q2, qA, t, m1, m2); subcount += subsample; // keep printing until thinning catches up with number on line } } } } } } if(fclose(fptr) != 0) MyError("Could not close file %d\n", file); printf ( "Closed file %d... filesize is %d\n", file, filesize[file]); end = 0; } if(fclose(MCMCdata) != 0) MyError("Could not close file: MCMCdata.out\n"); printf ( "subsampled every %d samples for print...\n", subsample); printf ( "printed=%d\n\n", printcount); }