Modify get_name_by_grand_child function
tyt3 opened this issue · comments
Modify function so that matches for creator include and matches for contributor exclude the following roleTerm values: 'author', 'aut', 'composer', 'cmp', 'creator', cre', 'dubious author', 'dub', 'editor', 'edt', 'screenwriter', 'aus'.
And so that there are two ways to handle instances when there is no role subelement:
- grand_child_element_value='creator' and 'name[usage="primary"]
- grand_child_element_value='contributor' and not( 'name[usage="primary"] )
def get_name_by_grand_child(bs_object, grand_child_exp, grand_child_element_value, children='namePart' ):
"""
This function accepts a BeautifulSoup object, find role > roleTerm="creator", "depositor", or
"contributor", and then if it matches, we return all namePart element values, comma separated
"""
# matches where roleTerm = grand_child_element_value
grand_children = bs_object.select(grand_child_exp)
all_matches_joined = ""
match = False
for g in grand_children:
if g.text == grand_child_element_value:
match = True
break
if match:
all_matches = bs_object.select(children)
all_matches_joined += ", ".join([i.text for i in all_matches])
# matches where there is no role, and grand_child_element_value=contributor
if grand_child_element_value == 'contributor':
no_roles = bs_object.select('role')
if len(no_roles) == 0:
all_matches = bs_object.select(children)
all_matches_joined += ", ".join([i.text for i in all_matches])
return all_matches_joined