Functions
utils.c File Reference

Miscellaneous general utility and helper functions. More...

Functions

void storeExecuteMaskInTraversalDescriptor (pllInstance *tr, partitionList *pr)
 
void storeValuesInTraversalDescriptor (pllInstance *tr, partitionList *pr, double *value)
 
void printBothOpen (const char *format,...)
 
const unsigned int * getBitVector (int dataType)
 
int getUndetermined (int dataType)
 
const partitionLengthsgetPartitionLengths (pInfo *p)
 
size_t discreteRateCategories (int rateHetModel)
 
double gettime (void)
 
int gettimeSrand (void)
 
double randum (long *seed)
 
FILE * myfopen (const char *path, const char *mode)
 
boolean isTip (int number, int maxTips)
 Check whether a node is a tip. More...
 
void getxnode (nodeptr p)
 Set the orientation of a node. More...
 
void hookup (nodeptr p, nodeptr q, double *z, int numBranches)
 Connect two nodes and assign branch lengths. More...
 
void hookupFull (nodeptr p, nodeptr q, double *z)
 
void hookupDefault (nodeptr p, nodeptr q)
 
boolean whitechar (int ch)
 
void printLog (pllInstance *tr)
 
nodeptr pllGetRandomSubtree (pllInstance *tr)
 Get a random subtree. More...
 
void computeAllAncestralVectors (nodeptr p, pllInstance *tr, partitionList *pr)
 
void initializePartitionData (pllInstance *localTree, partitionList *localPartitions)
 
int virtual_width (int n)
 
void initMemorySavingAndRecom (pllInstance *tr, partitionList *pr)
 
double pllGetBranchLength (pllInstance *tr, nodeptr p, int partition_id)
 Get the length of a specific branch. More...
 
void pllSetBranchLength (pllInstance *tr, nodeptr p, int partition_id, double bl)
 Set the length of a specific branch. More...
 
void pllPartitionsDestroy (pllInstance *tr, partitionList **partitions)
 free all data structures associated to a partition More...
 
int pllPartitionsValidate (pllQueue *parts, pllAlignmentData *alignmentData)
 Correspondance check between partitions and alignment. More...
 
partitionListpllPartitionsCommit (pllQueue *parts, pllAlignmentData *alignmentData)
 Constructs the proposed partition scheme. More...
 
void pllAlignmentRemoveDups (pllAlignmentData *alignmentData, partitionList *pl)
 Remove duplicate sites from alignment and update weights vector. More...
 
double ** pllBaseFrequenciesGTR (partitionList *pl, pllAlignmentData *alignmentData)
 Compute the empirical base frequencies for all partitions. More...
 
void pllEmpiricalFrequenciesDestroy (double ***empiricalFrequencies, int models)
 
int pllLoadAlignment (pllInstance *tr, pllAlignmentData *alignmentData, partitionList *partitions, int bDeep)
 Load alignment to the PLL instance. More...
 
pllInstancepllCreateInstance (pllInstanceAttr *attr)
 Create the main instance of PLL. More...
 
void pllTreeInitTopologyNewick (pllInstance *tr, pllNewickTree *nt, int useDefaultz)
 Initializes the PLL tree topology according to a parsed newick tree. More...
 
void pllTreeInitTopologyRandom (pllInstance *tr, int tips, char **nameList)
 Initialize PLL tree with a random topology. More...
 
void pllTreeInitTopologyForAlignment (pllInstance *tr, pllAlignmentData *alignmentData)
 Initialize a tree that corresponds to a given (already parsed) alignment. More...
 
void pllComputeRandomizedStepwiseAdditionParsimonyTree (pllInstance *tr, partitionList *partitions)
 Compute a randomized stepwise addition oder parsimony tree. More...
 
void pllBaseSubstitute (pllAlignmentData *alignmentData, partitionList *partitions)
 Encode the alignment data to the PLL numerical representation. More...
 
void pllClearRearrangeHistory (pllInstance *tr)
 
void pllDestroyInstance (pllInstance *tr)
 Deallocate the PLL instance. More...
 
int pllSetSubstitutionRateMatrixSymmetries (char *string, partitionList *pr, int model)
 Set symmetries among parameters in the Q matrix. More...
 
void pllSetFixedAlpha (double alpha, int model, partitionList *pr, pllInstance *tr)
 Set the alpha parameter of the Gamma model to a fixed value for a partition. More...
 
void pllSetFixedBaseFrequencies (double *f, int length, int model, partitionList *pr, pllInstance *tr)
 Set all base freuqncies to a fixed value for a partition. More...
 
int pllSetOptimizeBaseFrequencies (int model, partitionList *pr, pllInstance *tr)
 Set that the base freuqencies are optimized under ML. More...
 
void pllSetFixedSubstitutionMatrix (double *q, int length, int model, partitionList *pr, pllInstance *tr)
 Set all substitution rates to a fixed value for a specific partition. More...
 
linkageListinitLinkageList (int *linkList, partitionList *pr)
 
int pllLinkAlphaParameters (char *string, partitionList *pr)
 Link alpha parameters across partitions. More...
 
int pllLinkFrequencies (char *string, partitionList *pr)
 Link base frequency parameters across partitions. More...
 
int pllLinkRates (char *string, partitionList *pr)
 Link Substitution matrices across partitions. More...
 
int pllInitModel (pllInstance *tr, partitionList *partitions, pllAlignmentData *alignmentData)
 Initialize partitions according to model parameters. More...
 
int pllOptimizeModelParameters (pllInstance *tr, partitionList *pr, double likelihoodEpsilon)
 Optimize all free model parameters of the likelihood model. More...
 
char * pllReadFile (const char *filename, long *filesize)
 Read the contents of a file. More...
 

Static functions

static void pllTreeInitDefaults (pllInstance *tr, int tips)
 Initialize PLL tree structure with default values. More...
 
static void initializePartitionsSequential (pllInstance *tr, partitionList *pr)
 
static char * my_strtok_r (char *s, const char *delim, char **save_ptr)
 
static void freeLinkageList (linkageList *ll)
 
static void swapSite (unsigned char **buf, int s1, int s2, int nTaxa)
 Swap two sites in a buffer. More...
 
static partitionListcreatePartitions (pllQueue *parts, int *bounds)
 Constructs the list of partitions according to the proposed partition scheme. More...
 
static void copySite (unsigned char **dst, unsigned char **src, int to, int from, int nTaxa)
 Copy a site to another buffer. More...
 
static void genericBaseFrequencies (pInfo *partition, pllAlignmentData *alignmentData, boolean smoothFrequencies, const unsigned int *bitMask, double *pfreqs)
 Compute the empirical frequencies of a partition. More...
 
static int checkTreeInclusion (pllInstance *pInst, pllNewickTree *nTree)
 
static void updateBranchLength (nodeptr p, double old_fracchange, double new_fracchange)
 
static void updateAllBranchLengthsRecursive (nodeptr p, int tips, double old_fracchange, double new_fracchange)
 
static void updateAllBranchLengths (pllInstance *tr, double old_fracchange, double new_fracchange)
 
static int linkTaxa (pllInstance *pInst, pllNewickTree *nTree, int taxaExist)
 Relink the taxa. More...
 
static int init_Q_MatrixSymmetries (char *linkageString, partitionList *pr, int model)
 
static int checkLinkageConsistency (partitionList *pr)
 Check parameter linkage across partitions for consistency. More...
 
static linkageListinitLinkageListString (char *linkageString, partitionList *pr)
 

Detailed Description

Miscellaneous general utility and helper functions.

Function Documentation

static int checkLinkageConsistency ( partitionList pr)
static

Check parameter linkage across partitions for consistency.

Checks that linked alpha, substitution rate and frequency model parameters across several partitions are consistent. E.g., when two partitions are linked via the alpha parameter, the alpha parameter should either be set to the same fixed value or it should be estimated!

Parameters
prList of partitions
Todo:
Call this in more functions, right now it's only invoked in the wrapper for modOpt()

+ Here is the caller graph for this function:

static void copySite ( unsigned char **  dst,
unsigned char **  src,
int  to,
int  from,
int  nTaxa 
)
inlinestatic

Copy a site to another buffer.

Copies site from from buffer src to to in buffer dst. Both buffers must consist of nTaxa + 1 taxa and the first row contains no information, i.e. it is not accessed.

Parameters
dstDestination buffer
srcSource buffer
toAt which position in dst to copy the site to
fromWhich site from src to copy
nTaxaNumber of taxa, i.e. size of site
static partitionList* createPartitions ( pllQueue parts,
int *  bounds 
)
static

Constructs the list of partitions according to the proposed partition scheme.

A static function that construcs the partitionList structure according to the partition scheme AFTER the sites have been repositioned in contiguous regions according to the partition scheme.

Parameters
boundsAn array of the new starting and ending posititons of sites in the alignment for each partition. This array is of size 2 * nparts. The elements are always couples (lower,upper). The upper bounds is a site that is not included in the partition
Todo:
Fix the bug in PLL
Parameters
npartsThe number of partitions to be created

+ Here is the caller graph for this function:

static void genericBaseFrequencies ( pInfo partition,
pllAlignmentData alignmentData,
boolean  smoothFrequencies,
const unsigned int *  bitMask,
double *  pfreqs 
)
static

Compute the empirical frequencies of a partition.

Compute the empirical frequencies of partition partition and store them in pfreqs.

Parameters
partitionThe partition for which to compute empirical frequencies
alignmentDataThe multiple sequence alignment
smoothFrequenciesNot needed?
bitMaskThe bitmask
pfreqsArray of size partition->states where the empirical frequencies for this partition are stored

+ Here is the caller graph for this function:

void getxnode ( nodeptr  p)

Set the orientation of a node.

Sets the orientation of node p. That means, it will reset the orientation p->next->x and p->next->next->x to 0 and of p->x to 1, meaning that the conditional likelihood vector for that node is oriented on p, i.e. the conditional likelihood vector represents the subtree rooted at p and not any other of the two nodes.

Parameters
pNode which we want to orient

+ Here is the caller graph for this function:

void hookup ( nodeptr  p,
nodeptr  q,
double *  z,
int  numBranches 
)

Connect two nodes and assign branch lengths.

Connect the two nodes p and q in each partition i with a branch of length z[i]

Parameters
pNode p
qNode q
numBranchesNumber of partitions

+ Here is the caller graph for this function:

boolean isTip ( int  number,
int  maxTips 
)

Check whether a node is a tip.

Checks whether the node with number number is a tip.

Parameters
numberNode number to be checked
maxTipsNumber of tips in the tree
Returns
PLL_TRUE if tip, PLL_FALSE otherwise

+ Here is the caller graph for this function:

static int linkTaxa ( pllInstance pInst,
pllNewickTree nTree,
int  taxaExist 
)
static

Relink the taxa.

Relink the taxa by performing a preorder traversal of the unrooted binary tree. We assume that the tree is rooted such that the root is the only node of out-degree 3 and in-degree 0, while all the other inner nodes have in-degree 1 and out-degree 2. Finally, the leaves have in-degree 1 and out-degree 0.

Parameters
pInstPLL instance
nTreeParsed newick tree structure
taxaExistIs the set of taxa of nTree a subset of the taxa of the current tree
Returns

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void pllAlignmentRemoveDups ( pllAlignmentData alignmentData,
partitionList pl 
)

Remove duplicate sites from alignment and update weights vector.

Removes duplicate sites from the alignment given the partitions list and updates the weight vector of the alignment and the boundaries (upper, lower, width) for each partition.

Parameters
alignmentDataThe multiple sequence alignment
plList of partitions
double** pllBaseFrequenciesGTR ( partitionList pl,
pllAlignmentData alignmentData 
)

Compute the empirical base frequencies for all partitions.

Compute the empirical base frequencies for all partitions in the list pl.

Parameters
plPartition list
alignmentDataMultiple sequence alignment
Returns
A list of pl->numberOfPartitions arrays each of size pl->partitionData[i]->states, where i is the i-th partition

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void pllBaseSubstitute ( pllAlignmentData alignmentData,
partitionList partitions 
)

Encode the alignment data to the PLL numerical representation.

Transforms the alignment to the PLL internal representation by substituting each base with a specific digit.

Parameters
alignmentDataMultiple sequence alignment
partitionsList of partitions

+ Here is the caller graph for this function:

void pllClearRearrangeHistory ( pllInstance tr)

Clears the rearrangements history from PLL instance

Clears the rearrangements rollback information (history) from the PLL instance tr.

Parameters
trPLL instance

+ Here is the caller graph for this function:

void pllComputeRandomizedStepwiseAdditionParsimonyTree ( pllInstance tr,
partitionList partitions 
)

Compute a randomized stepwise addition oder parsimony tree.

Implements the RAxML randomized stepwise addition order algorithm

Todo:
check functions that are invoked for potential memory leaks!
Parameters
trThe PLL instance
partitionsThe partitions
pllInstance* pllCreateInstance ( pllInstanceAttr attr)

Create the main instance of PLL.

Create an instance of the phylogenetic likelihood library

Parameters
rateHetModelRate heterogeneity model
fastScalingexplain fastScaling here
saveMemoryexplain saveMemory here
useRecomIf set to PLL_TRUE, enables ancestral state recomputation
Todo:
Document fastScaling, rate heterogeneity and saveMemory and useRecom
Note
Do not set saveMemory to when using useRecom as memory saving techniques are not yet implemented for ancestral state recomputation.
Returns
On success returns an instance to PLL, otherwise NULL

+ Here is the call graph for this function:

void pllDestroyInstance ( pllInstance tr)

Deallocate the PLL instance.

Deallocates the library instance and all its elements.

Parameters
trThe PLL instance

+ Here is the call graph for this function:

double pllGetBranchLength ( pllInstance tr,
nodeptr  p,
int  partition_id 
)

Get the length of a specific branch.

Get the length of the branch specified by node p and p->back of partition partition_id. The branch length is decoded from the PLL representation.

Parameters
trPLL instance
pSpecifies one end-point of the branch. The other one is p->back
partition_idSpecifies the partition
Returns
The branch length
nodeptr pllGetRandomSubtree ( pllInstance tr)

Get a random subtree.

Returns the root node of a randomly picked subtree of the tree in PLL instance tr. The picked subtree is guaranteed to have height over 1, that is, the direct descendents of the returned (root) node are not tips.

Parameters
trPLL instance
Returns
The root node of the randomly picked subtree

+ Here is the call graph for this function:

int pllInitModel ( pllInstance tr,
partitionList partitions,
pllAlignmentData alignmentData 
)

Initialize partitions according to model parameters.

Initializes partitions according to model parameters.

Parameters
trThe PLL instance
partitionsList of partitions
alignmentDataThe parsed alignment
Returns
Returns PLL_TRUE in case of success, otherwise PLL_FALSE

+ Here is the call graph for this function:

int pllLinkAlphaParameters ( char *  string,
partitionList pr 
)

Link alpha parameters across partitions.

Links alpha paremeters across partitions (GAMMA model of rate heterogeneity)

Parameters
stringstring describing the linkage pattern
prList of partitions
Todo:
test behavior/impact/mem-leaks of this when PSR model is used it shouldn't do any harm, but it would be better to check!
int pllLinkFrequencies ( char *  string,
partitionList pr 
)

Link base frequency parameters across partitions.

Links base frequency paremeters across partitions

Parameters
stringstring describing the linkage pattern
prList of partitions
Todo:
semantics of this function not clear yet: right now this only has an effect when we do a ML estimate of base frequencies when we use empirical or model-defined (protein data) base frequencies, one could maybe average over the per-partition frequencies, but the averages would need to be weighted accodring on the number of patterns per partition
int pllLinkRates ( char *  string,
partitionList pr 
)

Link Substitution matrices across partitions.

Links substitution matrices (Q matrices) across partitions

Parameters
stringstring describing the linkage pattern
prList of partitions
Todo:
re-think/re-design how this is done for protein models
int pllOptimizeModelParameters ( pllInstance tr,
partitionList pr,
double  likelihoodEpsilon 
)

Optimize all free model parameters of the likelihood model.

Initializes partitions according to model parameters.

Parameters
trThe PLL instance
prList of partitions
likelihoodEpsilonSpecifies up to which epsilon in likelihood values the iterative routine will be optimizing the parameters

+ Here is the call graph for this function:

void pllPartitionsDestroy ( pllInstance tr,
partitionList **  partitions 
)

free all data structures associated to a partition

frees all data structures allocated for this partition

Parameters
partitionsthe pointer to the partition list
tipsnumber of tips in the tree

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

char* pllReadFile ( const char *  filename,
long *  filesize 
)

Read the contents of a file.

Reads the ile filename and return its content. In addition the size of the file is stored in the input variable filesize. The content of the variable filesize can be anything and will be overwritten.

Parameters
filenameName of the input file
filesizeInput parameter where the size of the file (in bytes) will be stored
Returns
Contents of the file

+ Here is the caller graph for this function:

void pllSetBranchLength ( pllInstance tr,
nodeptr  p,
int  partition_id,
double  bl 
)

Set the length of a specific branch.

Set the length of the branch specified by node p and p->back of partition partition_id. The function encodes the branch length to the PLL representation.

Parameters
trPLL instance
pSpecifies one end-point of the branch. The other one is p->back
partition_idSpecifies the partition
blBranch length
void pllSetFixedAlpha ( double  alpha,
int  model,
partitionList pr,
pllInstance tr 
)

Set the alpha parameter of the Gamma model to a fixed value for a partition.

Sets the alpha parameter of the gamma model of rate heterogeneity to a fixed value and disables the optimization of this parameter

Parameters
alphaalpha value
modelIndex of the partition for which we want to set the alpha value
prList of partitions
trLibrary instance for which we want to fix alpha
Todo:
test if this works with the parallel versions

+ Here is the call graph for this function:

void pllSetFixedBaseFrequencies ( double *  f,
int  length,
int  model,
partitionList pr,
pllInstance tr 
)

Set all base freuqncies to a fixed value for a partition.

Sets all base freuqencies of a partition to fixed values and disables ML optimization of these parameters

Parameters
farray containing the base frequencies
lengthlength of array f, this needs to be as long as the number of states in the model, otherwise an assertion will fail!
modelIndex of the partition for which we want to set the frequencies
prList of partitions
trLibrary instance for which we want to fix the base frequencies
Todo:
test if this works with the parallel versions

+ Here is the call graph for this function:

void pllSetFixedSubstitutionMatrix ( double *  q,
int  length,
int  model,
partitionList pr,
pllInstance tr 
)

Set all substitution rates to a fixed value for a specific partition.

Sets all substitution rates of a partition to fixed values and disables ML optimization of these parameters. It will automatically re-scale the relative rates such that the last rate is 1.0

Parameters
farray containing the substitution rates
lengthlength of array f, this needs to be as long as: (s * s - s) / 2, i.e., the number of upper diagonal entries of the Q matrix
modelIndex of the partition for which we want to set/fix the substitution rates
prList of partitions
trLibrary instance for which we want to fix the substitution rates
Todo:
test if this works with the parallel versions

+ Here is the call graph for this function:

int pllSetOptimizeBaseFrequencies ( int  model,
partitionList pr,
pllInstance tr 
)

Set that the base freuqencies are optimized under ML.

The base freuqencies for partition model will be optimized under ML

Parameters
modelIndex of the partition for which we want to optimize base frequencies
prList of partitions
trLibrary instance for which we want to fix the base frequencies
Todo:
test if this works with the parallel versions

+ Here is the call graph for this function:

int pllSetSubstitutionRateMatrixSymmetries ( char *  string,
partitionList pr,
int  model 
)

Set symmetries among parameters in the Q matrix.

Allows to link some or all rate parameters in the Q-matrix for obtaining simpler models than GTR

Parameters
stringstring describing the symmetry pattern among the rates in the Q matrix
prList of partitions
modelIndex of the partition for which we want to set the Q matrix symmetries
Todo:
nothing
static void pllTreeInitDefaults ( pllInstance tr,
int  tips 
)
static

Initialize PLL tree structure with default values.

Initialize PLL tree structure with default values and allocate memory for its elements.

Todo:
STILL NOT FINISHED

+ Here is the caller graph for this function:

void pllTreeInitTopologyForAlignment ( pllInstance tr,
pllAlignmentData alignmentData 
)

Initialize a tree that corresponds to a given (already parsed) alignment.

Initializes the PLL tree such that it corresponds to the given alignment

Todo:
nothing
Parameters
trThe PLL instance
alignmentDataParsed alignment

+ Here is the call graph for this function:

void pllTreeInitTopologyRandom ( pllInstance tr,
int  tips,
char **  nameList 
)

Initialize PLL tree with a random topology.

*Initializes the PLL tree with a randomly created topology

Todo:
Perhaps pass a seed?
Parameters
trThe PLL instance
tipsNumber of tips
nameListA set of tips names representing the taxa labels

+ Here is the call graph for this function:

static void swapSite ( unsigned char **  buf,
int  s1,
int  s2,
int  nTaxa 
)
inlinestatic

Swap two sites in a buffer.

Swaps sites s1 and s2 in buffer buf which consists of nTaxa + 1 taxa (i.e. rows), and the first row contains no information, i.e. it is not accessed.

Parameters
bufferMemory buffer
s1First site
s2Second site
nTaxaNumber of taxa, i.e. size of site

+ Here is the caller graph for this function: