Source code for brokilon.metrics.generalized_robinson_foulds

from brokilon.ccd.clades import DemeClade


[docs] def _get_deme_clades(tree, annotation_str): clades = set() for node in tree.traverse("levelorder"): # if len(node) > 1: # this would ignore all leaf and internal nodes if len(node.children) != 1: # this only ingores internal nodes, technically we care # about leaf nodes with this distance as they could contribute to it. if not hasattr(node, annotation_str): try: probs = [x for x in getattr(node, f"{annotation_str}_set_prob").strip("[]\"\'").split( ",")] types = [x for x in getattr(node, f"{annotation_str}_set").strip("[]\"\'").split(",")] except Exception: raise ValueError("Something is wrong with the tree file, likely inconsistent " "labels from treeannotator which is infuriatingly stupid!") cur_clade = {int(leaf.name) for leaf in node} # this is a bypass, if there is no attribute of the required annotation it will # try to use the set and set prob to extract the most likely one, this is stupid # and all because treeannotoator doesn't annotate at least the leaves (maybe more) clades.add(DemeClade(frozenset(cur_clade), deme=types[probs.index(max(probs))])) else: cur_clade = {int(leaf.name) for leaf in node} clades.add(DemeClade(frozenset(cur_clade), deme=getattr(node, annotation_str))) return clades
[docs] def deme_robinson_foulds(tree_t1, tree_t2, annotation_str): # this is half as fast as the function above (which makes sense, its based on a weird function) clades1 = _get_deme_clades(tree_t1, annotation_str) clades2 = _get_deme_clades(tree_t2, annotation_str) return len(clades1.symmetric_difference(clades2))
[docs] def deme_robinson_foulds_reference(tree, reference_tree, annotation_str): # Essentially this is half the RF distance, all clades in the reference that are not in the tree clades_ref = _get_deme_clades(reference_tree, annotation_str) clades_tree = _get_deme_clades(tree, annotation_str) # All clades that are in the reference but not in the given tree (i.e. wrong clades if # reference is the truth) return len(clades_ref.difference(clades_tree))
if __name__ == '__main__': from pathlib import Path from brokilon.core import read_nexus_trees tree_file = ( f"{Path(__file__).parent.absolute().parent.parent.parent}" f"/examples/data/h3n2-bdmm.h3n2_2deme.trees" ) trees = read_nexus_trees(tree_file) t1 = trees[10] t2 = trees[11] d = deme_robinson_foulds(t1, t2, annotation_str="type") from brokilon.metrics import robinson_foulds, robinson_foulds_ccd regular_rf = robinson_foulds(t1, t2) ccd_rf = robinson_foulds_ccd(t1, t2) print(f"Annotated: {d} ||| Regular: {regular_rf} ||| CCD: {ccd_rf}")