geminispace.info

gemini search engine
git clone https://git.clttr.info/geminispace.info.git
Log (Feed) | Files | Refs (Tags) | README | LICENSE

commit b8f73c5617bd502d6d952b2714eee404d8132eca
Author: Natalie Pendragon <natpen@natpen.net>
Date:   Thu, 30 Jan 2020 08:47:38 -0500

Initial commit

Diffstat:
A.gitignore | 132+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Agus/__init__.py | 1+
Agus/crawl.py | 133+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Agus/serve.py | 138+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Apoetry.lock | 594+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Apyproject.toml | 26++++++++++++++++++++++++++
6 files changed, 1024 insertions(+), 0 deletions(-)

diff --git a/.gitignore b/.gitignore @@ -0,0 +1,132 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# Search index files +/index/ diff --git a/gus/__init__.py b/gus/__init__.py @@ -0,0 +1 @@ +__version__ = '0.1.0' diff --git a/gus/crawl.py b/gus/crawl.py @@ -0,0 +1,133 @@ +import pathlib +import re +import shutil +from urllib.parse import urlparse, urlunparse + +import gusmobile as gemini +from whoosh.fields import Schema, TEXT +from whoosh.index import create_in + +INDEX_DIR = "index" + +KNOWN_URLS = [ + "gemini://gemini.conman.org/", + "gemini://zaibatsu.circumlunar.space:1965", + "gemini://carcosa.net:1965", + "gemini://heavysquare.com:1965", + "gemini://mozz.us:1965", + "gemini://dgold.eu:1965", + "gemini://typed-hole.org:1965", + "gemini://consensus.circumlunar.space:1965", + "gemini://tilde.black:1965", + "gemini://tilde.pink:1965", + "gemini://vger.cloud:1965", + "gemini://yam655.com:1965", +] + + +def create_index(index_dir): + shutil.rmtree(index_dir) + pathlib.Path(index_dir).mkdir(parents=True, exist_ok=True) + schema = Schema( + url=TEXT(stored=True), + content=TEXT(stored=True), + ) + index = create_in("index", schema) + index_writer = index.writer() + return index_writer + + +def clean_links(links): + clean_links = [] + for link in links: + clean_link = link + u = urlparse(link) + if u.scheme is not None and u.scheme != "gemini": + continue + if u.port is None: + clean_link = clean_link.replace(u.hostname, u.hostname+":1965") + if u.scheme is None: + clean_link = clean_link.replace(u.hostname, "gemini://"+u.hostname) + clean_links.append(clean_link) + return clean_links + + +def normalize_gemini_url(url): + u = urlparse(url.lower().strip().rstrip('/'), 'gemini') + url_normalized = urlunparse(u) + if u.port is None: + url_normalized = url_normalized.replace(u.hostname, u.hostname+":1965") + if u.scheme is None: + url_normalized = url_normalized.replace(u.hostname, "gemini://"+u.hostname) + return url_normalized + +def extract_gemini_links(content): + link_pattern = "=>\s(\S+)" + links = re.findall(link_pattern, content) + gemini_links = clean_links(links) + return gemini_links + + +def index_content(response): + print("INDEXING...") + index_writer.add_document( + url=response.url, + content=response.content, + ) + + +def crawl_url(url): + u = urlparse(url, 'gemini') + url = urlunparse(u) + path = u.path.lower().rstrip().rstrip('/') + if path.endswith(".mp3") or path.endswith(".png") or path.endswith(".jpg") or path.endswith(".jpeg") or path.endswith(".zip"): + print("BINARY SKIP : %s" % url) + print("--------------------------") + return + normalized_url = normalize_gemini_url(url) + if normalized_url in visited_urls: + print("ALREADY SEEN : %s" % url) + print("--------------------------") + return + else: + visited_urls.append(normalized_url) + r = gemini.fetch(url) + if r is None: + # problem before getting a response + print("ERROR : %s" % url) + print("--------------------------") + elif r.status.startswith("3"): + # redirect status + print("REDIRECT : %s -> %s" % (url, r.url)) + crawl_url(r.url) + elif r.status.startswith("2"): + # success status + print("URL : %s" % r.url) + print("STATUS : %s" % r.status) + print("STATUS META : %s" % r.status_meta) + print("CONTENT TYPE : %s" % r.content_type) + index_content(r) + print("--------------------------") + gemini_links = extract_gemini_links(r.content) + for link in gemini_links: + crawl_url(link) + else: + # input, error, etc (all other statuses) + print("UNHANDLED : %s" % url) + print("--------------------------") + + +def main(): + global index_writer + index_writer = create_index(INDEX_DIR) + global visited_urls + visited_urls = [] + try: + for url in KNOWN_URLS: + crawl_url(url) + finally: + index_writer.commit() + + +if __name__ == "__main__": + main() diff --git a/gus/serve.py b/gus/serve.py @@ -0,0 +1,138 @@ +""" +A gemini search engine frontend. +""" +import asyncio +import math +import sys + +import jetforce +from jetforce import Response, Status +from whoosh.index import open_dir +from whoosh.qparser import QueryParser + + +app = jetforce.JetforceApplication() + + +def _render_header(): + return [ + "=> / -----------GUS------------", + "|Gemini Universal Search |", + "==========================", + "" + ] + + +def _render_footer(): + return [ + ] + + +@app.route("") +def index(request): + data = _render_header() + data.extend([ + "Welcome to GUS, the Gemini search engine!", + "", + "=> /search Search GUS", + "=> /known-servers List of known Gemini servers", + "=> /about About the indexer", + "=> gemini://gemini.circumlunar.space Gemini Project information" + ]) + data.extend(_render_footer()) + return Response(Status.SUCCESS, "text/gemini", "\n".join(data)) + + +@app.route("/about") +def index(request): + data = _render_header() + data.extend([ + "GUS has many features to help make a relevant", + "index. It will only index content within Geminispace,", + "and will not index links out to other protocols.", + "", + "To control crawling of your site, you can use a", + "robots.txt file, Place it in your document root", + "directory such that a request for \"robots.txt\" will", + "fetch it.", + "", + "GUS obeys User-agent of \"gus\" and \"*\"." + ]) + data.extend(_render_footer()) + return Response(Status.SUCCESS, "text/gemini", "\n".join(data)) + + +def _search_index(query): + query = QueryParser("content", ix.schema).parse(query) + results = searcher.search(query) + return ( + len(results), + [(result.score, result["url"]) for result in results] + ) + + +def _render_results(results): + data = [] + for i, result in enumerate(results): + if i > 0: + data.append("") + data.append("# {} (score: {:.2f})".format(i + 1, result[0])) + data.append("=> {}".format(result[1])) + return data + + +def _render_results_header(query, num_results): + return [ + "| You searched for: \"{}\"".format(query), + "| Number of hits: {}".format(num_results), + "==========================", + "" + ] + + +def _render_results_footer(num_results): + return [ + "", + "==========================", + "Page 1 of {} (paging coming later)".format(math.ceil(num_results / 10)) + ] + + +@app.route("/search") +def search(request): + data = _render_header() + if request.query: + num_results, results = _search_index(request.query) + print(num_results) + print(results) + data.extend(_render_results_header(request.query, num_results)) + data.extend(_render_results(results)) + data.extend(_render_results_footer(num_results)) + data.extend(_render_footer()) + + return Response(Status.SUCCESS, "text/gemini", "\n".join(data)) + else: + return Response(Status.INPUT, "Search query") + + +def main(): + args = jetforce.command_line_parser().parse_args() + ssl_context = jetforce.make_ssl_context( + args.hostname, args.certfile, args.keyfile, args.cafile, args.capath + ) + server = jetforce.GeminiServer( + host=args.host, + port=args.port, + ssl_context=ssl_context, + hostname=args.hostname, + app=app, + ) + global ix + ix = open_dir("index") + global searcher + with ix.searcher() as searcher: + asyncio.run(server.run()) + + +if __name__ == "__main__": + main() diff --git a/poetry.lock b/poetry.lock @@ -0,0 +1,594 @@ +[[package]] +category = "dev" +description = "A small Python module for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." +name = "appdirs" +optional = false +python-versions = "*" +version = "1.4.3" + +[[package]] +category = "dev" +description = "Disable App Nap on OS X 10.9" +marker = "sys_platform == \"darwin\"" +name = "appnope" +optional = false +python-versions = "*" +version = "0.1.0" + +[[package]] +category = "dev" +description = "Atomic file writes." +marker = "sys_platform == \"win32\"" +name = "atomicwrites" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +version = "1.3.0" + +[[package]] +category = "dev" +description = "Classes Without Boilerplate" +name = "attrs" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +version = "19.3.0" + +[package.extras] +azure-pipelines = ["coverage", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "zope.interface", "pytest-azurepipelines"] +dev = ["coverage", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "zope.interface", "sphinx", "pre-commit"] +docs = ["sphinx", "zope.interface"] +tests = ["coverage", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "zope.interface"] + +[[package]] +category = "dev" +description = "Specifications for callback functions passed in to an API" +name = "backcall" +optional = false +python-versions = "*" +version = "0.1.0" + +[[package]] +category = "dev" +description = "The uncompromising code formatter." +name = "black" +optional = false +python-versions = ">=3.6" +version = "19.10b0" + +[package.dependencies] +appdirs = "*" +attrs = ">=18.1.0" +click = ">=6.5" +pathspec = ">=0.6,<1" +regex = "*" +toml = ">=0.9.4" +typed-ast = ">=1.4.0" + +[package.extras] +d = ["aiohttp (>=3.3.2)", "aiohttp-cors"] + +[[package]] +category = "dev" +description = "Composable command line interface toolkit" +name = "click" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +version = "7.0" + +[[package]] +category = "dev" +description = "Cross-platform colored terminal text." +marker = "sys_platform == \"win32\"" +name = "colorama" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +version = "0.4.3" + +[[package]] +category = "dev" +description = "Decorators for Humans" +name = "decorator" +optional = false +python-versions = ">=2.6, !=3.0.*, !=3.1.*" +version = "4.4.1" + +[[package]] +category = "main" +description = "A simple library for requesting resources using the gemini protocol" +develop = true +name = "gusmobile" +optional = false +python-versions = "*" +version = "0.1.0" + +[package.source] +reference = "" +type = "directory" +url = "../nbh/gusmobile" + +[[package]] +category = "dev" +description = "Read metadata from Python packages" +marker = "python_version < \"3.8\"" +name = "importlib-metadata" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7" +version = "1.5.0" + +[package.dependencies] +zipp = ">=0.5" + +[package.extras] +docs = ["sphinx", "rst.linker"] +testing = ["packaging", "importlib-resources"] + +[[package]] +category = "dev" +description = "IPython: Productive Interactive Computing" +name = "ipython" +optional = false +python-versions = ">=3.6" +version = "7.11.1" + +[package.dependencies] +appnope = "*" +backcall = "*" +colorama = "*" +decorator = "*" +jedi = ">=0.10" +pexpect = "*" +pickleshare = "*" +prompt-toolkit = ">=2.0.0,<3.0.0 || >3.0.0,<3.0.1 || >3.0.1,<3.1.0" +pygments = "*" +setuptools = ">=18.5" +traitlets = ">=4.2" + +[package.extras] +all = ["ipywidgets", "ipyparallel", "qtconsole", "ipykernel", "nbconvert", "notebook", "nbformat", "testpath", "pygments", "requests", "numpy (>=1.14)", "nose (>=0.10.1)", "Sphinx (>=1.3)"] +doc = ["Sphinx (>=1.3)"] +kernel = ["ipykernel"] +nbconvert = ["nbconvert"] +nbformat = ["nbformat"] +notebook = ["notebook", "ipywidgets"] +parallel = ["ipyparallel"] +qtconsole = ["qtconsole"] +test = ["nose (>=0.10.1)", "requests", "testpath", "pygments", "nbformat", "ipykernel", "numpy (>=1.14)"] + +[[package]] +category = "dev" +description = "Vestigial utilities from IPython" +name = "ipython-genutils" +optional = false +python-versions = "*" +version = "0.2.0" + +[[package]] +category = "dev" +description = "An autocompletion tool for Python that can be used for text editors." +name = "jedi" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +version = "0.16.0" + +[package.dependencies] +parso = ">=0.5.2" + +[package.extras] +qa = ["flake8 (3.7.9)"] +testing = ["colorama (0.4.1)", "docopt", "pytest (>=3.9.0,<5.0.0)"] + +[[package]] +category = "main" +description = "An Experimental Gemini Server" +name = "jetforce" +optional = false +python-versions = ">=3.7" +version = "0.2.0" + +[[package]] +category = "dev" +description = "More routines for operating on iterables, beyond itertools" +name = "more-itertools" +optional = false +python-versions = ">=3.5" +version = "8.2.0" + +[[package]] +category = "dev" +description = "Core utilities for Python packages" +name = "packaging" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +version = "20.1" + +[package.dependencies] +pyparsing = ">=2.0.2" +six = "*" + +[[package]] +category = "dev" +description = "A Python Parser" +name = "parso" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +version = "0.6.0" + +[package.extras] +testing = ["docopt", "pytest (>=3.0.7)"] + +[[package]] +category = "dev" +description = "Utility library for gitignore style pattern matching of file paths." +name = "pathspec" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +version = "0.7.0" + +[[package]] +category = "dev" +description = "Pexpect allows easy control of interactive console applications." +marker = "sys_platform != \"win32\"" +name = "pexpect" +optional = false +python-versions = "*" +version = "4.8.0" + +[package.dependencies] +ptyprocess = ">=0.5" + +[[package]] +category = "dev" +description = "Tiny 'shelve'-like database with concurrency support" +name = "pickleshare" +optional = false +python-versions = "*" +version = "0.7.5" + +[[package]] +category = "dev" +description = "plugin and hook calling mechanisms for python" +name = "pluggy" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +version = "0.13.1" + +[package.dependencies] +[package.dependencies.importlib-metadata] +python = "<3.8" +version = ">=0.12" + +[package.extras] +dev = ["pre-commit", "tox"] + +[[package]] +category = "dev" +description = "Library for building powerful interactive command lines in Python" +name = "prompt-toolkit" +optional = false +python-versions = ">=3.6" +version = "3.0.3" + +[package.dependencies] +wcwidth = "*" + +[[package]] +category = "dev" +description = "Run a subprocess in a pseudo terminal" +marker = "sys_platform != \"win32\"" +name = "ptyprocess" +optional = false +python-versions = "*" +version = "0.6.0" + +[[package]] +category = "dev" +description = "library with cross-python path, ini-parsing, io, code, log facilities" +name = "py" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +version = "1.8.1" + +[[package]] +category = "dev" +description = "Pygments is a syntax highlighting package written in Python." +name = "pygments" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +version = "2.5.2" + +[[package]] +category = "dev" +description = "Python parsing module" +name = "pyparsing" +optional = false +python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" +version = "2.4.6" + +[[package]] +category = "dev" +description = "pytest: simple powerful testing with Python" +name = "pytest" +optional = false +python-versions = ">=3.5" +version = "5.3.4" + +[package.dependencies] +atomicwrites = ">=1.0" +attrs = ">=17.4.0" +colorama = "*" +more-itertools = ">=4.0.0" +packaging = "*" +pluggy = ">=0.12,<1.0" +py = ">=1.5.0" +wcwidth = "*" + +[package.dependencies.importlib-metadata] +python = "<3.8" +version = ">=0.12" + +[package.extras] +checkqa-mypy = ["mypy (v0.761)"] +testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "requests", "xmlschema"] + +[[package]] +category = "dev" +description = "Alternative regular expression module, to replace re." +name = "regex" +optional = false +python-versions = "*" +version = "2020.1.8" + +[[package]] +category = "dev" +description = "Python 2 and 3 compatibility utilities" +name = "six" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" +version = "1.14.0" + +[[package]] +category = "dev" +description = "Python Library for Tom's Obvious, Minimal Language" +name = "toml" +optional = false +python-versions = "*" +version = "0.10.0" + +[[package]] +category = "dev" +description = "Traitlets Python config system" +name = "traitlets" +optional = false +python-versions = "*" +version = "4.3.3" + +[package.dependencies] +decorator = "*" +ipython-genutils = "*" +six = "*" + +[package.extras] +test = ["pytest", "mock"] + +[[package]] +category = "dev" +description = "a fork of Python 2 and 3 ast modules with type comment support" +name = "typed-ast" +optional = false +python-versions = "*" +version = "1.4.1" + +[[package]] +category = "dev" +description = "Measures number of Terminal column cells of wide-character codes" +name = "wcwidth" +optional = false +python-versions = "*" +version = "0.1.8" + +[[package]] +category = "main" +description = "Fast, pure-Python full text indexing, search, and spell checking library." +name = "whoosh" +optional = false +python-versions = "*" +version = "2.7.4" + +[[package]] +category = "dev" +description = "Backport of pathlib-compatible object wrapper for zip files" +marker = "python_version < \"3.8\"" +name = "zipp" +optional = false +python-versions = ">=3.6" +version = "2.1.0" + +[package.extras] +docs = ["sphinx", "jaraco.packaging (>=3.2)", "rst.linker (>=1.9)"] +testing = ["jaraco.itertools"] + +[metadata] +content-hash = "2690e73f653c79739d62917bd68913471b55534500deeb03d298937cdb7bf651" +python-versions = "^3.7" + +[metadata.files] +appdirs = [ + {file = "appdirs-1.4.3-py2.py3-none-any.whl", hash = "sha256:d8b24664561d0d34ddfaec54636d502d7cea6e29c3eaf68f3df6180863e2166e"}, + {file = "appdirs-1.4.3.tar.gz", hash = "sha256:9e5896d1372858f8dd3344faf4e5014d21849c756c8d5701f78f8a103b372d92"}, +] +appnope = [ + {file = "appnope-0.1.0-py2.py3-none-any.whl", hash = "sha256:5b26757dc6f79a3b7dc9fab95359328d5747fcb2409d331ea66d0272b90ab2a0"}, + {file = "appnope-0.1.0.tar.gz", hash = "sha256:8b995ffe925347a2138d7ac0fe77155e4311a0ea6d6da4f5128fe4b3cbe5ed71"}, +] +atomicwrites = [ + {file = "atomicwrites-1.3.0-py2.py3-none-any.whl", hash = "sha256:03472c30eb2c5d1ba9227e4c2ca66ab8287fbfbbda3888aa93dc2e28fc6811b4"}, + {file = "atomicwrites-1.3.0.tar.gz", hash = "sha256:75a9445bac02d8d058d5e1fe689654ba5a6556a1dfd8ce6ec55a0ed79866cfa6"}, +] +attrs = [ + {file = "attrs-19.3.0-py2.py3-none-any.whl", hash = "sha256:08a96c641c3a74e44eb59afb61a24f2cb9f4d7188748e76ba4bb5edfa3cb7d1c"}, + {file = "attrs-19.3.0.tar.gz", hash = "sha256:f7b7ce16570fe9965acd6d30101a28f62fb4a7f9e926b3bbc9b61f8b04247e72"}, +] +backcall = [ + {file = "backcall-0.1.0.tar.gz", hash = "sha256:38ecd85be2c1e78f77fd91700c76e14667dc21e2713b63876c0eb901196e01e4"}, + {file = "backcall-0.1.0.zip", hash = "sha256:bbbf4b1e5cd2bdb08f915895b51081c041bac22394fdfcfdfbe9f14b77c08bf2"}, +] +black = [ + {file = "black-19.10b0-py36-none-any.whl", hash = "sha256:1b30e59be925fafc1ee4565e5e08abef6b03fe455102883820fe5ee2e4734e0b"}, + {file = "black-19.10b0.tar.gz", hash = "sha256:c2edb73a08e9e0e6f65a0e6af18b059b8b1cdd5bef997d7a0b181df93dc81539"}, +] +click = [ + {file = "Click-7.0-py2.py3-none-any.whl", hash = "sha256:2335065e6395b9e67ca716de5f7526736bfa6ceead690adf616d925bdc622b13"}, + {file = "Click-7.0.tar.gz", hash = "sha256:5b94b49521f6456670fdb30cd82a4eca9412788a93fa6dd6df72c94d5a8ff2d7"}, +] +colorama = [ + {file = "colorama-0.4.3-py2.py3-none-any.whl", hash = "sha256:7d73d2a99753107a36ac6b455ee49046802e59d9d076ef8e47b61499fa29afff"}, + {file = "colorama-0.4.3.tar.gz", hash = "sha256:e96da0d330793e2cb9485e9ddfd918d456036c7149416295932478192f4436a1"}, +] +decorator = [ + {file = "decorator-4.4.1-py2.py3-none-any.whl", hash = "sha256:5d19b92a3c8f7f101c8dd86afd86b0f061a8ce4540ab8cd401fa2542756bce6d"}, + {file = "decorator-4.4.1.tar.gz", hash = "sha256:54c38050039232e1db4ad7375cfce6748d7b41c29e95a081c8a6d2c30364a2ce"}, +] +gusmobile = [] +importlib-metadata = [ + {file = "importlib_metadata-1.5.0-py2.py3-none-any.whl", hash = "sha256:b97607a1a18a5100839aec1dc26a1ea17ee0d93b20b0f008d80a5a050afb200b"}, + {file = "importlib_metadata-1.5.0.tar.gz", hash = "sha256:06f5b3a99029c7134207dd882428a66992a9de2bef7c2b699b5641f9886c3302"}, +] +ipython = [ + {file = "ipython-7.11.1-py3-none-any.whl", hash = "sha256:387686dd7fc9caf29d2fddcf3116c4b07a11d9025701d220c589a430b0171d8a"}, + {file = "ipython-7.11.1.tar.gz", hash = "sha256:0f4bcf18293fb666df8511feec0403bdb7e061a5842ea6e88a3177b0ceb34ead"}, +] +ipython-genutils = [ + {file = "ipython_genutils-0.2.0-py2.py3-none-any.whl", hash = "sha256:72dd37233799e619666c9f639a9da83c34013a73e8bbc79a7a6348d93c61fab8"}, + {file = "ipython_genutils-0.2.0.tar.gz", hash = "sha256:eb2e116e75ecef9d4d228fdc66af54269afa26ab4463042e33785b887c628ba8"}, +] +jedi = [ + {file = "jedi-0.16.0-py2.py3-none-any.whl", hash = "sha256:b4f4052551025c6b0b0b193b29a6ff7bdb74c52450631206c262aef9f7159ad2"}, + {file = "jedi-0.16.0.tar.gz", hash = "sha256:d5c871cb9360b414f981e7072c52c33258d598305280fef91c6cae34739d65d5"}, +] +jetforce = [ + {file = "Jetforce-0.2.0-py3-none-any.whl", hash = "sha256:c7112f8daad67369920be4cbe7a6a1b6d2e2ba165c5d75660410622722dda9f4"}, + {file = "Jetforce-0.2.0.tar.gz", hash = "sha256:06fddac811fc7891a902ca523ab2fd706201ce82c7d843dbefe2f83342e1874e"}, +] +more-itertools = [ + {file = "more-itertools-8.2.0.tar.gz", hash = "sha256:b1ddb932186d8a6ac451e1d95844b382f55e12686d51ca0c68b6f61f2ab7a507"}, + {file = "more_itertools-8.2.0-py3-none-any.whl", hash = "sha256:5dd8bcf33e5f9513ffa06d5ad33d78f31e1931ac9a18f33d37e77a180d393a7c"}, +] +packaging = [ + {file = "packaging-20.1-py2.py3-none-any.whl", hash = "sha256:170748228214b70b672c581a3dd610ee51f733018650740e98c7df862a583f73"}, + {file = "packaging-20.1.tar.gz", hash = "sha256:e665345f9eef0c621aa0bf2f8d78cf6d21904eef16a93f020240b704a57f1334"}, +] +parso = [ + {file = "parso-0.6.0-py2.py3-none-any.whl", hash = "sha256:1376bdc8cb81377ca481976933773295218a2df47d3e1182ba76d372b1acb128"}, + {file = "parso-0.6.0.tar.gz", hash = "sha256:597f36de5102a8db05ffdf7ecdc761838b86565a4a111604c6e78beaedf1b045"}, +] +pathspec = [ + {file = "pathspec-0.7.0-py2.py3-none-any.whl", hash = "sha256:163b0632d4e31cef212976cf57b43d9fd6b0bac6e67c26015d611a647d5e7424"}, + {file = "pathspec-0.7.0.tar.gz", hash = "sha256:562aa70af2e0d434367d9790ad37aed893de47f1693e4201fd1d3dca15d19b96"}, +] +pexpect = [ + {file = "pexpect-4.8.0-py2.py3-none-any.whl", hash = "sha256:0b48a55dcb3c05f3329815901ea4fc1537514d6ba867a152b581d69ae3710937"}, + {file = "pexpect-4.8.0.tar.gz", hash = "sha256:fc65a43959d153d0114afe13997d439c22823a27cefceb5ff35c2178c6784c0c"}, +] +pickleshare = [ + {file = "pickleshare-0.7.5-py2.py3-none-any.whl", hash = "sha256:9649af414d74d4df115d5d718f82acb59c9d418196b7b4290ed47a12ce62df56"}, + {file = "pickleshare-0.7.5.tar.gz", hash = "sha256:87683d47965c1da65cdacaf31c8441d12b8044cdec9aca500cd78fc2c683afca"}, +] +pluggy = [ + {file = "pluggy-0.13.1-py2.py3-none-any.whl", hash = "sha256:966c145cd83c96502c3c3868f50408687b38434af77734af1e9ca461a4081d2d"}, + {file = "pluggy-0.13.1.tar.gz", hash = "sha256:15b2acde666561e1298d71b523007ed7364de07029219b604cf808bfa1c765b0"}, +] +prompt-toolkit = [ + {file = "prompt_toolkit-3.0.3-py3-none-any.whl", hash = "sha256:c93e53af97f630f12f5f62a3274e79527936ed466f038953dfa379d4941f651a"}, + {file = "prompt_toolkit-3.0.3.tar.gz", hash = "sha256:a402e9bf468b63314e37460b68ba68243d55b2f8c4d0192f85a019af3945050e"}, +] +ptyprocess = [ + {file = "ptyprocess-0.6.0-py2.py3-none-any.whl", hash = "sha256:d7cc528d76e76342423ca640335bd3633420dc1366f258cb31d05e865ef5ca1f"}, + {file = "ptyprocess-0.6.0.tar.gz", hash = "sha256:923f299cc5ad920c68f2bc0bc98b75b9f838b93b599941a6b63ddbc2476394c0"}, +] +py = [ + {file = "py-1.8.1-py2.py3-none-any.whl", hash = "sha256:c20fdd83a5dbc0af9efd622bee9a5564e278f6380fffcacc43ba6f43db2813b0"}, + {file = "py-1.8.1.tar.gz", hash = "sha256:5e27081401262157467ad6e7f851b7aa402c5852dbcb3dae06768434de5752aa"}, +] +pygments = [ + {file = "Pygments-2.5.2-py2.py3-none-any.whl", hash = "sha256:2a3fe295e54a20164a9df49c75fa58526d3be48e14aceba6d6b1e8ac0bfd6f1b"}, + {file = "Pygments-2.5.2.tar.gz", hash = "sha256:98c8aa5a9f778fcd1026a17361ddaf7330d1b7c62ae97c3bb0ae73e0b9b6b0fe"}, +] +pyparsing = [ + {file = "pyparsing-2.4.6-py2.py3-none-any.whl", hash = "sha256:c342dccb5250c08d45fd6f8b4a559613ca603b57498511740e65cd11a2e7dcec"}, + {file = "pyparsing-2.4.6.tar.gz", hash = "sha256:4c830582a84fb022400b85429791bc551f1f4871c33f23e44f353119e92f969f"}, +] +pytest = [ + {file = "pytest-5.3.4-py3-none-any.whl", hash = "sha256:c13d1943c63e599b98cf118fcb9703e4d7bde7caa9a432567bcdcae4bf512d20"}, + {file = "pytest-5.3.4.tar.gz", hash = "sha256:1d122e8be54d1a709e56f82e2d85dcba3018313d64647f38a91aec88c239b600"}, +] +regex = [ + {file = "regex-2020.1.8-cp27-cp27m-win32.whl", hash = "sha256:4e8f02d3d72ca94efc8396f8036c0d3bcc812aefc28ec70f35bb888c74a25161"}, + {file = "regex-2020.1.8-cp27-cp27m-win_amd64.whl", hash = "sha256:e6c02171d62ed6972ca8631f6f34fa3281d51db8b326ee397b9c83093a6b7242"}, + {file = "regex-2020.1.8-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:4eae742636aec40cf7ab98171ab9400393360b97e8f9da67b1867a9ee0889b26"}, + {file = "regex-2020.1.8-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:bd25bb7980917e4e70ccccd7e3b5740614f1c408a642c245019cff9d7d1b6149"}, + {file = "regex-2020.1.8-cp36-cp36m-manylinux2010_i686.whl", hash = "sha256:3e77409b678b21a056415da3a56abfd7c3ad03da71f3051bbcdb68cf44d3c34d"}, + {file = "regex-2020.1.8-cp36-cp36m-manylinux2010_x86_64.whl", hash = "sha256:07b39bf943d3d2fe63d46281d8504f8df0ff3fe4c57e13d1656737950e53e525"}, + {file = "regex-2020.1.8-cp36-cp36m-win32.whl", hash = "sha256:23e2c2c0ff50f44877f64780b815b8fd2e003cda9ce817a7fd00dea5600c84a0"}, + {file = "regex-2020.1.8-cp36-cp36m-win_amd64.whl", hash = "sha256:27429b8d74ba683484a06b260b7bb00f312e7c757792628ea251afdbf1434003"}, + {file = "regex-2020.1.8-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:0e182d2f097ea8549a249040922fa2b92ae28be4be4895933e369a525ba36576"}, + {file = "regex-2020.1.8-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:e3cd21cc2840ca67de0bbe4071f79f031c81418deb544ceda93ad75ca1ee9f7b"}, + {file = "regex-2020.1.8-cp37-cp37m-manylinux2010_i686.whl", hash = "sha256:ecc6de77df3ef68fee966bb8cb4e067e84d4d1f397d0ef6fce46913663540d77"}, + {file = "regex-2020.1.8-cp37-cp37m-manylinux2010_x86_64.whl", hash = "sha256:26ff99c980f53b3191d8931b199b29d6787c059f2e029b2b0c694343b1708c35"}, + {file = "regex-2020.1.8-cp37-cp37m-win32.whl", hash = "sha256:7bcd322935377abcc79bfe5b63c44abd0b29387f267791d566bbb566edfdd146"}, + {file = "regex-2020.1.8-cp37-cp37m-win_amd64.whl", hash = "sha256:10671601ee06cf4dc1bc0b4805309040bb34c9af423c12c379c83d7895622bb5"}, + {file = "regex-2020.1.8-cp38-cp38-manylinux1_i686.whl", hash = "sha256:98b8ed7bb2155e2cbb8b76f627b2fd12cf4b22ab6e14873e8641f266e0fb6d8f"}, + {file = "regex-2020.1.8-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:6a6ba91b94427cd49cd27764679024b14a96874e0dc638ae6bdd4b1a3ce97be1"}, + {file = "regex-2020.1.8-cp38-cp38-manylinux2010_i686.whl", hash = "sha256:6a6ae17bf8f2d82d1e8858a47757ce389b880083c4ff2498dba17c56e6c103b9"}, + {file = "regex-2020.1.8-cp38-cp38-manylinux2010_x86_64.whl", hash = "sha256:0932941cdfb3afcbc26cc3bcf7c3f3d73d5a9b9c56955d432dbf8bbc147d4c5b"}, + {file = "regex-2020.1.8-cp38-cp38-win32.whl", hash = "sha256:d58e4606da2a41659c84baeb3cfa2e4c87a74cec89a1e7c56bee4b956f9d7461"}, + {file = "regex-2020.1.8-cp38-cp38-win_amd64.whl", hash = "sha256:e7c7661f7276507bce416eaae22040fd91ca471b5b33c13f8ff21137ed6f248c"}, + {file = "regex-2020.1.8.tar.gz", hash = "sha256:d0f424328f9822b0323b3b6f2e4b9c90960b24743d220763c7f07071e0778351"}, +] +six = [ + {file = "six-1.14.0-py2.py3-none-any.whl", hash = "sha256:8f3cd2e254d8f793e7f3d6d9df77b92252b52637291d0f0da013c76ea2724b6c"}, + {file = "six-1.14.0.tar.gz", hash = "sha256:236bdbdce46e6e6a3d61a337c0f8b763ca1e8717c03b369e87a7ec7ce1319c0a"}, +] +toml = [ + {file = "toml-0.10.0-py2.7.egg", hash = "sha256:f1db651f9657708513243e61e6cc67d101a39bad662eaa9b5546f789338e07a3"}, + {file = "toml-0.10.0-py2.py3-none-any.whl", hash = "sha256:235682dd292d5899d361a811df37e04a8828a5b1da3115886b73cf81ebc9100e"}, + {file = "toml-0.10.0.tar.gz", hash = "sha256:229f81c57791a41d65e399fc06bf0848bab550a9dfd5ed66df18ce5f05e73d5c"}, +] +traitlets = [ + {file = "traitlets-4.3.3-py2.py3-none-any.whl", hash = "sha256:70b4c6a1d9019d7b4f6846832288f86998aa3b9207c6821f3578a6a6a467fe44"}, + {file = "traitlets-4.3.3.tar.gz", hash = "sha256:d023ee369ddd2763310e4c3eae1ff649689440d4ae59d7485eb4cfbbe3e359f7"}, +] +typed-ast = [ + {file = "typed_ast-1.4.1-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:73d785a950fc82dd2a25897d525d003f6378d1cb23ab305578394694202a58c3"}, + {file = "typed_ast-1.4.1-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:aaee9905aee35ba5905cfb3c62f3e83b3bec7b39413f0a7f19be4e547ea01ebb"}, + {file = "typed_ast-1.4.1-cp35-cp35m-win32.whl", hash = "sha256:0c2c07682d61a629b68433afb159376e24e5b2fd4641d35424e462169c0a7919"}, + {file = "typed_ast-1.4.1-cp35-cp35m-win_amd64.whl", hash = "sha256:4083861b0aa07990b619bd7ddc365eb7fa4b817e99cf5f8d9cf21a42780f6e01"}, + {file = "typed_ast-1.4.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:269151951236b0f9a6f04015a9004084a5ab0d5f19b57de779f908621e7d8b75"}, + {file = "typed_ast-1.4.1-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:24995c843eb0ad11a4527b026b4dde3da70e1f2d8806c99b7b4a7cf491612652"}, + {file = "typed_ast-1.4.1-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:fe460b922ec15dd205595c9b5b99e2f056fd98ae8f9f56b888e7a17dc2b757e7"}, + {file = "typed_ast-1.4.1-cp36-cp36m-win32.whl", hash = "sha256:4e3e5da80ccbebfff202a67bf900d081906c358ccc3d5e3c8aea42fdfdfd51c1"}, + {file = "typed_ast-1.4.1-cp36-cp36m-win_amd64.whl", hash = "sha256:249862707802d40f7f29f6e1aad8d84b5aa9e44552d2cc17384b209f091276aa"}, + {file = "typed_ast-1.4.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8ce678dbaf790dbdb3eba24056d5364fb45944f33553dd5869b7580cdbb83614"}, + {file = "typed_ast-1.4.1-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:c9e348e02e4d2b4a8b2eedb48210430658df6951fa484e59de33ff773fbd4b41"}, + {file = "typed_ast-1.4.1-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:bcd3b13b56ea479b3650b82cabd6b5343a625b0ced5429e4ccad28a8973f301b"}, + {file = "typed_ast-1.4.1-cp37-cp37m-win32.whl", hash = "sha256:d5d33e9e7af3b34a40dc05f498939f0ebf187f07c385fd58d591c533ad8562fe"}, + {file = "typed_ast-1.4.1-cp37-cp37m-win_amd64.whl", hash = "sha256:0666aa36131496aed8f7be0410ff974562ab7eeac11ef351def9ea6fa28f6355"}, + {file = "typed_ast-1.4.1-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:d205b1b46085271b4e15f670058ce182bd1199e56b317bf2ec004b6a44f911f6"}, + {file = "typed_ast-1.4.1-cp38-cp38-manylinux1_i686.whl", hash = "sha256:6daac9731f172c2a22ade6ed0c00197ee7cc1221aa84cfdf9c31defeb059a907"}, + {file = "typed_ast-1.4.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:498b0f36cc7054c1fead3d7fc59d2150f4d5c6c56ba7fb150c013fbc683a8d2d"}, + {file = "typed_ast-1.4.1-cp38-cp38-win32.whl", hash = "sha256:715ff2f2df46121071622063fc7543d9b1fd19ebfc4f5c8895af64a77a8c852c"}, + {file = "typed_ast-1.4.1-cp38-cp38-win_amd64.whl", hash = "sha256:fc0fea399acb12edbf8a628ba8d2312f583bdbdb3335635db062fa98cf71fca4"}, + {file = "typed_ast-1.4.1-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:d43943ef777f9a1c42bf4e552ba23ac77a6351de620aa9acf64ad54933ad4d34"}, + {file = "typed_ast-1.4.1.tar.gz", hash = "sha256:8c8aaad94455178e3187ab22c8b01a3837f8ee50e09cf31f1ba129eb293ec30b"}, +] +wcwidth = [ + {file = "wcwidth-0.1.8-py2.py3-none-any.whl", hash = "sha256:8fd29383f539be45b20bd4df0dc29c20ba48654a41e661925e612311e9f3c603"}, + {file = "wcwidth-0.1.8.tar.gz", hash = "sha256:f28b3e8a6483e5d49e7f8949ac1a78314e740333ae305b4ba5defd3e74fb37a8"}, +] +whoosh = [ + {file = "Whoosh-2.7.4-py2.py3-none-any.whl", hash = "sha256:aa39c3c3426e3fd107dcb4bde64ca1e276a65a889d9085a6e4b54ba82420a852"}, + {file = "Whoosh-2.7.4.tar.gz", hash = "sha256:7ca5633dbfa9e0e0fa400d3151a8a0c4bec53bd2ecedc0a67705b17565c31a83"}, + {file = "Whoosh-2.7.4.zip", hash = "sha256:e0857375f63e9041e03fedd5b7541f97cf78917ac1b6b06c1fcc9b45375dda69"}, +] +zipp = [ + {file = "zipp-2.1.0-py3-none-any.whl", hash = "sha256:ccc94ed0909b58ffe34430ea5451f07bc0c76467d7081619a454bf5c98b89e28"}, + {file = "zipp-2.1.0.tar.gz", hash = "sha256:feae2f18633c32fc71f2de629bfb3bd3c9325cd4419642b1f1da42ee488d9b98"}, +] diff --git a/pyproject.toml b/pyproject.toml @@ -0,0 +1,26 @@ +[tool.poetry] +name = "gus" +version = "0.1.0" +description = "Geminispace Universal Search" +authors = ["Natalie Pendragon <natpen@natpen.net>"] +license = "MIT" + +[tool.poetry.dependencies] +python = "^3.7" +gusmobile = { path = "../nbh/gusmobile/" } +whoosh = "^2.7.4" +jetforce = "^0.2.0" + +[tool.poetry.dev-dependencies] +black = "^19.10b0" +ipython = "^7.11.1" +pytest = "^5.2" + +[build-system] +requires = ["poetry>=0.12"] +build-backend = "poetry.masonry.api" + +[tool.poetry.scripts] +crawl = "gus.crawl:main" +search_index = "gus.search_index:main" +serve = "gus.serve:main"