def clean_hrefs(allHrefs): links = {\'links\' : allHrefs} df = pd.DataFrame(links).drop_duplicates() df = df[df[\'links\'].str.contains(\'financial|inve