add_is_cross_host_like.py (1001B)
1 from gus import constants 2 from gus.lib.db_model import init_db, Link, Page 3 from gus.lib.gemini import GeminiResource, GeminiRobotFileParser 4 5 def main(): 6 db = init_db(f"index.new/{constants.DB_FILENAME}") 7 PageFrom = Page.alias() 8 PageTo = Page.alias() 9 link_query = (Link 10 .select(Link, PageFrom, PageTo) 11 .join(PageFrom, on=(Link.from_page_id == PageFrom.id)) 12 .join(PageTo, on=(Link.to_page_id == PageTo.id))) 13 for link in link_query.iterator(): 14 from_resource = GeminiResource(link.from_page.fetchable_url) 15 to_resource = GeminiResource(link.to_page.fetchable_url) 16 is_cross_host_like = Link.get_is_cross_host_like(from_resource, to_resource) 17 link.is_cross_host_like = is_cross_host_like 18 link.save() 19 print("[{}] {} -> {}".format("T" if is_cross_host_like else "F", from_resource.fetchable_url, to_resource.fetchable_url)) 20 print("\nDone!") 21 22 23 if __name__ == "__main__": 24 main()