# Unit 4 Parsing Code

The code for parsing in unit 4 presented in unit 4-31. You can also find this code in Homework 4 Challenge.

``````work_count = 0      # track one notion of "time taken"

if not (elt in theset[index]):
theset[index] = [elt] + theset[index]
return True
return False

def parse(tokens,grammar):
global work_count
work_count = 0
tokens = tokens + [ "end_of_input_marker" ]
chart = {}
start_rule = grammar[0]
for i in range(len(tokens)+1):
chart[i] = [ ]
start_state = (start_rule[0], [], start_rule[1], 0)
chart[0] = [ start_state ]
for i in range(len(tokens)):
while True:
changes = False
for state in chart[i]:
# State ===   x -> a b . c d , j
x = state[0]
ab = state[1]
cd = state[2]
j = state[3]

# Current State ==   x -> a b . c d , j
# Option 1: For each grammar rule c -> p q r
# (where the c's match)
# make a next state               c -> . p q r , i
# English: We're about to start parsing a "c", but
#  "c" may be something like "exp" with its own
#  production rules. We'll bring those production rules in.
next_states = [ (rule[0],[],rule[1],i)
for rule in grammar if cd <> [] and cd[0] == rule[0] ]
work_count = work_count + len(grammar)
for next_state in next_states:

# Current State ==   x -> a b . c d , j
# Option 2: If tokens[i] == c,
# make a next state               x -> a b c . d , j
# in chart[i+1]
# English: We're looking for to parse token c next
#  and the current token is exactly c! Aren't we lucky!
#  So we can parse over it and move to j+1.
if cd <> [] and tokens[i] == cd[0]:
next_state = (x, ab + [cd[0]], cd[1:], j)

# Current State ==   x -> a b . c d , j
# Option 3: If cd is [], the state is just x -> a b . , j
# for each p -> q . x r , l in chart[j]
# make a new state                p -> q x . r , l
# in chart[i]
# English: We just finished parsing an "x" with this token,
#  but that may have been a sub-step (like matching "exp -> 2"
#  in "2+3"). We should update the higher-level rules as well.
next_states = [ (jstate[0], jstate[1] + [x], (jstate[2])[1:],
jstate[3] )
for jstate in chart[j]
if cd == [] and jstate[2] <> [] and (jstate[2])[0] == x ]
work_count = work_count + len(chart[j])
for next_state in next_states:

# We're done if nothing changed!
if not changes:
break

## Uncomment this block if you'd like to see the chart printed.
#
#  for i in range(len(tokens)):
#   print "== chart " + str(i)
#   for state in chart[i]:
#     x = state[0]
#     ab = state[1]
#     cd = state[2]
#     j = state[3]
#     print "    " + x + " ->",
#     for sym in ab:
#       print " " + sym,
#     print " .",
#     for sym in cd:
#       print " " + sym,
#     print "  from " + str(j)

# Uncomment this block if you'd like to see the chart printed
# in cases where it's important to see quotes in the grammar
# for i in range(len(tokens)):
#   print "== chart " + str(i)
#   for state in chart[i]:
#     x = state[0]
#     ab = state[1]
#     cd = state[2]
#     j = state[3]
#     print "    " + x.__repr__() + " ->",
#     for sym in ab:
#       print " " + sym.__repr__(),
#     print " .",
#     for sym in cd:
#       print " " + sym.__repr__(),
#     print "  from " + str(j)

accepting_state = (start_rule[0], start_rule[1], [], 0)
return accepting_state in chart[len(tokens)-1]

grammar = [
("S", ["P" ]) ,
("P", ["(" , "P", ")" ]),
("P", [ ]) ,
]
tokens = [ "(", "(", ")", ")"]
result=parse(tokens, grammar)
print result
``````