geminispace.info

gemini search engine
git clone https://git.clttr.info/geminispace.info.git
Log (Feed) | Files | Refs (Tags) | README | LICENSE

remove_domain.py (1123B)


      1 import sys
      2 
      3 from whoosh.qparser import QueryParser
      4 from whoosh.index import open_dir
      5 
      6 def main():
      7     if len(sys.argv) < 2:
      8         print("Please specify a domain...")
      9         return
     10 
     11     ix = open_dir("index")
     12     with ix.searcher() as searcher:
     13         query_parser = QueryParser("domain", ix.schema)
     14         query = query_parser.parse(sys.argv[1])
     15         results = searcher.search(query, limit=None)
     16 
     17         if len(results) == 0:
     18             print("No documents found for domain.")
     19             return
     20 
     21         # confirm removal before proceeding
     22         print("Documents facing removal")
     23         print("------------------------")
     24         for result in results:
     25             print(result["url"])
     26         answer = input("\nPlease confirm removal [y/n]:")
     27         if answer.lower()[0] != "y":
     28             print("Aborting removal.")
     29             return
     30         docnums = [result.docnum for result in results]
     31 
     32     with ix.writer() as writer:
     33         for docnum in docnums:
     34             writer.delete_document(docnum)
     35 
     36     print("{} documents removed from index.".format(len(results)))
     37 
     38 
     39 if __name__ == "__main__":
     40     main()