remove_domain.py (1123B)
1 import sys 2 3 from whoosh.qparser import QueryParser 4 from whoosh.index import open_dir 5 6 def main(): 7 if len(sys.argv) < 2: 8 print("Please specify a domain...") 9 return 10 11 ix = open_dir("index") 12 with ix.searcher() as searcher: 13 query_parser = QueryParser("domain", ix.schema) 14 query = query_parser.parse(sys.argv[1]) 15 results = searcher.search(query, limit=None) 16 17 if len(results) == 0: 18 print("No documents found for domain.") 19 return 20 21 # confirm removal before proceeding 22 print("Documents facing removal") 23 print("------------------------") 24 for result in results: 25 print(result["url"]) 26 answer = input("\nPlease confirm removal [y/n]:") 27 if answer.lower()[0] != "y": 28 print("Aborting removal.") 29 return 30 docnums = [result.docnum for result in results] 31 32 with ix.writer() as writer: 33 for docnum in docnums: 34 writer.delete_document(docnum) 35 36 print("{} documents removed from index.".format(len(results))) 37 38 39 if __name__ == "__main__": 40 main()