Source code for brokilon.metrics.robinson_foulds
from brokilon.ccd.domain.topology.ccd import get_clades
from brokilon.core.tree import TreeError, extract_edge_set
[docs]
def robinson_foulds(tree_t1, tree_t2, attr_t1="name", attr_t2="name"):
"""
Returns the Robinson-Foulds symmetric distance between current
tree and a different tree instance.
:param tree_t1: The first input tree for distance computation
:param tree_t2: The second input tree for distance computation
:param name attr_t1: Compare trees using a custom node
attribute as a node name.
:param name attr_t2: Compare trees using a custom node
attribute as a node name in target tree.
:returns: (rf, rf_max, common_attrs, names, edges_t1, edges_t2)
"""
ref_t = tree_t1
if len(ref_t.children) > 2 or len(tree_t2.children) > 2:
raise TreeError(
"Unrooted tree found! Not supported in this package...")
attrs_t1 = {getattr(n, attr_t1) for n in ref_t.iter_leaves() if hasattr(n, attr_t1)}
attrs_t2 = {getattr(n, attr_t2) for n in tree_t2.iter_leaves() if hasattr(n, attr_t2)}
if attrs_t1 != attrs_t2:
raise TreeError("Trees have different taxa sets, not supported...")
edges1 = extract_edge_set(ref_t, attr_t1)
edges2 = extract_edge_set(tree_t2, attr_t2)
# the two root edges are never counted here, as they are always
# present in both trees because of the common attr filters
rf = len(edges1.symmetric_difference(edges2))
# Otherwise we need to count the actual number of valid
# partitions in each tree -2 is to avoid counting the root
# partition of the two trees (only needed in rooted trees)
# max_parts = (sum(1 for p in edges1 if len(p) > 1) +
# sum(1 for p in edges2 if len(p) > 1) - 2)
# min_comparison = (rf, max_parts, edges1, edges2)
return rf
[docs]
def robinson_foulds_ccd(tree_t1, tree_t2):
# this is half as fast as the function above (which makes sense, its based on a weird function)
clades_t1 = get_clades(tree_t1)
clades_t2 = get_clades(tree_t2)
return len(clades_t1.symmetric_difference(clades_t2))
if __name__ == '__main__':
from pathlib import Path
from brokilon.core import read_nexus_trees
tree_file = (
f"{Path(__file__).parent.absolute().parent.parent.parent}"
f"/examples/data/400Taxa.trees"
)
trees = read_nexus_trees(tree_file)
d1 = robinson_foulds(trees[0], trees[1], attr_t1="name", attr_t2="name")
d2 = robinson_foulds_ccd(trees[0], trees[1])
print(d1, d2)