/* ---------------------------------------------------------------------------
MCMC thinning utlity for use with IM surface files (*.surf).
Code written by C R Young
Last updated: 10 Oct 2005
Copyright (C) 2005 C. R. Young
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see .
Input files:
"filelist.txt" contains the names of the files that you want to read
outfile.txt_0000.surf all of the surface files listed in "filelist.txt"
...
Output files:
"MCMCdata.out" Contains the thinned dataset and immigration estimates rescaled to 2Nem
Useage:
MCMCtest
Options:
integer > 0. Ignores this many samples at the beginning of files
Subsample every x samples from file.
numbers thinned samples for plotting traces
Examples of usage for 500000 burnin and sampling every 5000 iterations with iteration numbering turned on:
./MCMCthin 500000 5000 1
Compilation (linux):
gcc MCMCthin.c -oMCMCthin
Also compiled and checked on Visual C++ 2005 Express Edition Beta
---------------------------------------------------------------------------*/
#include
#include
#define MAXSTR 80
#define MAXFILES 50
/* ---------------------------------------------------------------------------*/
/* Runtime errors */
/* */
/* ---------------------------------------------------------------------------*/
/* My error message */
void MyError(error_text)
char error_text[];
{
fprintf(stderr,"\nRY's run-time error...\n");
fprintf(stderr,"\n\t%s\n",error_text);
fprintf(stderr,"\n...now exiting to system...\n\n");
exit(1);
}
main (int argc, char *argv[])
{
FILE *fptr, *Files, *MCMCdata;
char fname[MAXSTR], currentfile[MAXSTR];
char filelist[MAXFILES][MAXSTR];
long int count_check;
int end, num, subsample, subdiff, subcount, printcount, numfiles, file, count, diff, burnin, number_switch;
float q1, q2, qA, m1, m2, t;
float q1prev, q2prev, qAprev, m1prev, m2prev, tprev;
char check;
int *filesize;
/*********************/
/* Begin read files */
/*********************/
if( argc < 4) {
printf("usage: MCMCtest \n");
exit(1) ;
}
burnin = atoi(argv[1]);
if (burnin < 0) {
printf ( "Burnin doesn't make sense\n");
MyError("usage: MCMCtest = 0> \n");
}
subsample = atoi(argv[2]);
if (subsample < 1) {
printf ( "Thinning interval < 1... must be >= 1\n");
MyError("usage: MCMCtest = 1> \n");
}
number_switch = atoi(argv[3]);
if (number_switch < 0 || number_switch > 1) {
printf ( "Do not understand whether you want to print iteration number\n");
MyError("usage: MCMCtest \n");
}
/* Read file lookup table ("filelist.txt") and keep names in filelist */
Files = fopen ("filelist.txt", "r");
if (!Files)
{
printf ( "Error opening file!\nCould not read filelist.txt...\n");
MyError("Required file not found!\n");
}
end = 0;
numfiles = 0;
printf("\nFiles to read:\n");
while (end == 0) {
if(end == 0){
fscanf(Files,"%s", &filelist[numfiles]);
end = feof(Files);
if(end == 0){
printf( "%s\n", filelist[numfiles]);
}
numfiles += 1;
}
}
printf("\n");
numfiles -= 1;
if(fclose(Files) != 0) MyError("Could not close file: filelist.txt.\n");
/* memory allocation trap */
if(numfiles > MAXFILES){
printf ( "Error, too many files! (max = %d)\n", MAXFILES);
MyError("Recompile with MAXFILES set to a higher number.\n");
}
printf("Burnin = %d\nThinning = %d\n", burnin, subsample);
printf("Number of files to read = %d\n\n", numfiles);
/* Initializations */
count = 0;
subcount = 0;
subdiff = 0;
printcount = 0;
count_check = 0;
end = 0;
/* allocate array that holds number of lines in each file */
filesize = (int *) malloc( (size_t) ( (numfiles) * sizeof(int) ) );
MCMCdata = fopen ("MCMCdata.out", "w");
if (!MCMCdata)
{
printf ( "Could not open MCMCdata.out for writing...\n");
MyError("Error opening file for writing!\n");
}
if(number_switch == 1){
fprintf (MCMCdata, "Iter\t");
}
fprintf (MCMCdata, "q1\tq2\tqA\tt\tm1\tm2\n");
for (file = 0; file < numfiles; file++){
sprintf(currentfile, "%s", filelist[file]); //open file out of file list (read from "filelist.txt")
fptr = fopen (currentfile, "r");
if (!fptr)
{
printf ( "Error opening file %d!\n", file);
MyError("Could not read infile\n");
}
printf ( "Opened file %d: %s...\n", file, currentfile);
/* read the first line of the file and store this as a previous value (zero freq) */
fscanf(fptr,"%d%f%f%f%f%f%f", &num, &q1prev, &q2prev, &qAprev, &tprev, &m1prev, &m2prev);
/* keeping track of the total samples in the file */
filesize[file] = 0;
while( end == 0){
fscanf(fptr,"%d", &num); // frequency of the line
fscanf(fptr,"%*c"); // get rid of tab char (fixed error 10/10/05 linux-win bug)
end = feof(fptr); // check to see if end of file has been reached
if( end == 0 ) {
count += num; // counts total number of MCMC iterations in all files
count_check += 1; // counts number of lines in all files for error check
filesize[file] += num; // counts total number of MCMC iterations in current file
check = getc(fptr); // pull out a char to check for tab
if( check != '\t'){ //
ungetc(check,fptr); // put char back in stack
fscanf(fptr,"%f", &q1); // pull out float for update
getc(fptr); // pull out extra tab
} else {
q1 = q1prev; // was tab so value did not change (no extra tab here)
}
check = getc(fptr);
if( check != '\t'){
ungetc(check,fptr);
fscanf(fptr,"%f", &q2);
getc(fptr);
} else {
q2 = q2prev;
}
check = getc(fptr);
if( check != '\t'){
ungetc(check,fptr);
fscanf(fptr,"%f", &qA);
getc(fptr);
} else {
qA = qAprev;
}
check = getc(fptr);
if( check != '\t'){
ungetc(check,fptr);
fscanf(fptr,"%f", &t);
getc(fptr);
} else {
t = tprev;
}
check = getc(fptr);
if( check != '\t'){
ungetc(check,fptr);
fscanf(fptr,"%f", &m1);
getc(fptr);
} else {
m1 = m1prev;
}
check = getc(fptr);
if( check != '\t'){
ungetc(check,fptr);
fscanf(fptr,"%f", &m2);
getc(fptr);
} else {
m2 = m2prev;
}
/* assign values read this line to prev. holders for next read */
q1prev = q1;
q2prev = q2;
qAprev = qA;
m1prev = m1;
m2prev = m2;
tprev = t;
diff = count - burnin; // diff is difference in total count so far (including current line, 'num') and burnin
if(diff < num) {
num = diff; // if cross the burnin threshold during this line, toss out some of num not satisfying burnin
}
if(diff > 0){ // check to see if past the burnin
subdiff = num - subcount; // see if num (freq of this line) is larger than thinning remaining
if(subdiff < 0) {
subcount -= num; // keep counting down if it is not there yet
} else {
subcount = subsample - subdiff; // take extra samples off of next thinning count
printcount += 1;
if(number_switch == 1){
fprintf (MCMCdata, "%d\t", printcount);
}
fprintf (MCMCdata, "%.4f\t%.4f\t%.4f\t%.4f\t%.4f\t%.4f\n", q1, q2, qA, t, m1, m2);
if (subcount < 0){ // added to catch error when thinning freq is less than total count of line
while (subcount < 0){
printcount += 1;
if(number_switch == 1){
fprintf (MCMCdata, "%d\t", printcount);
}
fprintf (MCMCdata, "%.4f\t%.4f\t%.4f\t%.4f\t%.4f\t%.4f\n", q1, q2, qA, t, m1, m2);
subcount += subsample; // keep printing until thinning catches up with number on line
}
}
}
}
}
}
if(fclose(fptr) != 0) MyError("Could not close file %d\n", file);
printf ( "Closed file %d... filesize is %d\n", file, filesize[file]);
end = 0;
}
if(fclose(MCMCdata) != 0) MyError("Could not close file: MCMCdata.out\n");
printf ( "subsampled every %d samples for print...\n", subsample);
printf ( "printed=%d\n\n", printcount);
}