#!/usr/bin/env python # coding: utf-8 # # Trees # Open In Colab # # http://openbookproject.net/thinkcs/python/english3e/trees.html # - like linked lists, trees are made up of nodes # # ## Binary Tree # - is a commonly used tree in which each node contains a reference to atmost two other nodes (possibly None) # - these references are referred to as the left and right subtrees # - like the node of linked list, each node also contains data/cargo # - like linked lists, trees are recursive data structures that are defined recursively: # 1. the empty tree, represented by None, or # 2. a node that contains a data and two tree references (left and right subtree) # ## Building trees # - similar to building linked-list # In[1]: class Tree: def __init__(self, data, left=None, right=None): self.cargo = data self.left = left self.right = right def __str__(self): return "{}".format(self.cargo) # ### bottom-up way to build-trees # - first create children and link them to the parent # In[4]: left = Tree(2) right = Tree(3) tree = Tree(1, left, right) # In[5]: tree1 = Tree(10, Tree(20), Tree(30)) # In[6]: print(tree) # In[7]: print(tree1) # ## traversing tree # - natural way to traverse a tree is recursively! # In[8]: def findSum(tree): if not tree: return 0 return tree.cargo + findSum(tree.left) + findSum(tree.right) # In[9]: findSum(tree) # In[10]: findSum(tree1) # ## Expression trees # - trees are natural way to represent the structure of an expression unambigigously. # - infix expression 1 + 2 * 3 is ambigious unless we know the order of operation that * happens before + # - we can use tree to represent the same expression # - operands are leaf nodes # - operator nodes contain references to their operands; operators are binary (two operands) # # # # - applications: # - translate expressions to postfix, prefix, and infix # - compilers use expression trees to parse, optimize, and translate programs # # - three ways to traverse trees: pre-order, in-order and post-order # In[18]: expression = Tree('+', Tree(1), Tree('*', Tree(2), Tree(3))) # ### pre-order tree traversal # - contents of the root appear before the contents of the children # - recursive algorithm: # - visit the node # - visit left subtree # - visit right subtree # In[19]: def preorder(tree): if not tree: return print(tree.cargo, end=' ') preorder(tree.left) preorder(tree.right) # In[20]: preorder(expression) # ### in-order tree traversal # - contents of the tree appear in order # - recursive algorithm: # - visit left subtree # - visit node # - visit right subtree # In[21]: def inorder(tree): if not tree: return inorder(tree.left) print(tree.cargo, end=' ') inorder(tree.right) # In[22]: inorder(expression) # In[29]: def inorderIndented(tree, level=0): if not tree: return inorderIndented(tree.right, level+1) print(' '*level + str(tree.cargo)) inorderIndented(tree.left, level+1) # In[30]: inorderIndented(expression) # ### post-order traversal # - recursive algorithm: # 1. visit left subtree # 2. visit right subtree # 3. visit node # In[33]: def postorder(tree): if not tree: return postorder(tree.left) postorder(tree.right) print(tree.cargo, end=' ') # In[34]: postorder(expression) # ## building an expression tree # - parse an infix expression and build the corresponding expression tree # - e.g., (3 + 7) * 9 yields the following tree: # # 1. tokenize expression into python list? How (left as an exercise) # - (3 + 7) * 9 = ["(", 3, "+", 7, ")", "*", 9, "end"] # 2. "end" token is useful for preventing the parser from reading pas the end of the list # In[31]: def get_token(token_list, expected): if token_list[0] == expected: del token_list[0] return True return False # In[36]: # handles operands def get_number(token_list): x = token_list[0] if not isinstance(x, int): return None del token_list[0] return Tree(x, None, None) # leaf node # In[37]: token_list = [9, 11, 'end'] x = get_number(token_list) postorder(x) # In[38]: print(token_list) # In[39]: def get_product(token_list): a = get_number(token_list) if get_token(token_list, '*'): b = get_number(token_list) return Tree('*', a, b) return a # In[40]: token_list = [9, '*', 11, 'end'] tree = get_product(token_list) # In[42]: postorder(tree) # In[44]: token_list = [9, '+', 11, 'end'] tree = get_product(token_list) postorder(tree) # In[45]: # adapt the function for compound product such as 3 * (5 * (7 * 9)) def get_product(token_list): a = get_number(token_list) if get_token(token_list, '*'): b = get_product(token_list) return Tree('*', a, b) return a # In[46]: token_list = [2, "*", 3, "*", 5 , "*", 7, "end"] tree = get_product(token_list) postorder(tree) # In[47]: # a sum can be a tree with + at the root, a product on the left, and a sum on the right. # Or, a sum can be just a product. def get_sum(token_list): a = get_product(token_list) if get_token(token_list, "+"): b = get_sum(token_list) return Tree("+", a, b) return a # In[48]: token_list = [9, "*", 11, "+", 5, "*", 7, "end"] tree = get_sum(token_list) # In[49]: postorder(tree) # In[52]: # handle parenthesis def get_number(token_list): if get_token(token_list, "("): x = get_sum(token_list) # Get the subexpression get_token(token_list, ")") # Remove the closing parenthesis return x else: x = token_list[0] if not isinstance(x, int): return None del token_list[0] return Tree(x, None, None) # In[53]: # 9 * (11 + 5) * 7 token_list = [9, "*", "(", 11, "+", 5, ")", "*", 7, "end"] tree = get_sum(token_list) postorder(tree) # ## handling errors on malformed expressions # In[54]: # handle parenthesis def get_number(token_list): if get_token(token_list, "("): x = get_sum(token_list) # Get the subexpression if not get_token(token_list, ")"): # Remove the closing parenthesis raise ValueError('Missing close parenthesis!') return x else: x = token_list[0] if not isinstance(x, int): return None del token_list[0] return Tree(x, None, None) # In[55]: token_list = [9, "*", "(", 11, "+", 5, "*", 7, "end"] tree = get_sum(token_list) postorder(tree) # ## exercises: # 1. Modify inorder function so that it puts parentheses around every operator and pair of operands. Is the output correct and unambiguous? Are the parentheses always necessary? # 2. Write a function that takes an expression string and returns a token list. # 3. Find other places in the expression tree functions where errors can occur and add appropriate raise statements. Test your code with improperly formed expressions. # In[ ]: