init
This commit is contained in:
commit
21e3ae3b1c
13
Pipfile
Normal file
13
Pipfile
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
[[source]]
|
||||||
|
url = "https://pypi.org/simple"
|
||||||
|
verify_ssl = true
|
||||||
|
name = "pypi"
|
||||||
|
|
||||||
|
[packages]
|
||||||
|
bottle = "*"
|
||||||
|
gunicorn = "*"
|
||||||
|
|
||||||
|
[dev-packages]
|
||||||
|
|
||||||
|
[requires]
|
||||||
|
python_version = "3.10"
|
45
Pipfile.lock
generated
Normal file
45
Pipfile.lock
generated
Normal file
@ -0,0 +1,45 @@
|
|||||||
|
{
|
||||||
|
"_meta": {
|
||||||
|
"hash": {
|
||||||
|
"sha256": "6cf82d4043d4c4bf722b99766ec56d49b04447a5e89726986ac957c46efc2196"
|
||||||
|
},
|
||||||
|
"pipfile-spec": 6,
|
||||||
|
"requires": {
|
||||||
|
"python_version": "3.10"
|
||||||
|
},
|
||||||
|
"sources": [
|
||||||
|
{
|
||||||
|
"name": "pypi",
|
||||||
|
"url": "https://pypi.org/simple",
|
||||||
|
"verify_ssl": true
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"default": {
|
||||||
|
"bottle": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:d6f15f9d422670b7c073d63bd8d287b135388da187a0f3e3c19293626ce034ea",
|
||||||
|
"sha256:e1a9c94970ae6d710b3fb4526294dfeb86f2cb4a81eff3a4b98dc40fb0e5e021"
|
||||||
|
],
|
||||||
|
"index": "pypi",
|
||||||
|
"version": "==0.12.25"
|
||||||
|
},
|
||||||
|
"gunicorn": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:9dcc4547dbb1cb284accfb15ab5667a0e5d1881cc443e0677b4882a4067a807e",
|
||||||
|
"sha256:e0a968b5ba15f8a328fdfd7ab1fcb5af4470c28aaf7e55df02a99bc13138e6e8"
|
||||||
|
],
|
||||||
|
"index": "pypi",
|
||||||
|
"version": "==20.1.0"
|
||||||
|
},
|
||||||
|
"setuptools": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:23aaf86b85ca52ceb801d32703f12d77517b2556af839621c641fca11287952b",
|
||||||
|
"sha256:f104fa03692a2602fa0fec6c6a9e63b6c8a968de13e17c026957dd1f53d80990"
|
||||||
|
],
|
||||||
|
"markers": "python_version >= '3.7'",
|
||||||
|
"version": "==67.7.2"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"develop": {}
|
||||||
|
}
|
40
README.md
Normal file
40
README.md
Normal file
@ -0,0 +1,40 @@
|
|||||||
|
# Cursed API for ArchiveBox
|
||||||
|
|
||||||
|
ArchiveBox [doesn't have a web API](https://github.com/ArchiveBox/ArchiveBox/issues/496) yet. This is shitty single-endpoint API to automate page archiving. It uses subprocess to run archivebox CLI. archivebox CLI runs in new thread to avoid blocking main thread.
|
||||||
|
|
||||||
|
# Install and run
|
||||||
|
|
||||||
|
Install dependencies:
|
||||||
|
|
||||||
|
```
|
||||||
|
pip install bottle gunicorn
|
||||||
|
```
|
||||||
|
|
||||||
|
Start API on server where ArchiveBox container is running. Set actual path to docker-compose.yml.
|
||||||
|
|
||||||
|
```
|
||||||
|
ARCHIVEBOX_BIN="docker compose -f /opt/archievebox/docker-compose.yml run archivebox" python cursed_archivebox_api.py
|
||||||
|
```
|
||||||
|
|
||||||
|
# Environment
|
||||||
|
|
||||||
|
| Variable | Default |
|
||||||
|
| ----------------- | --------------------- |
|
||||||
|
| `ARCHIVEBOX_BIN` | `/usr/bin/archivebox` (default for non-Docker installations) |
|
||||||
|
| `CURSED_PORT` | `9998` |
|
||||||
|
| `CURSED_HOST` | `0.0.0.0` |
|
||||||
|
| `CURSED_SERVER` | `gunicorn` See [server backends](https://bottlepy.org/docs/dev/deployment.html#switching-the-server-backend) |
|
||||||
|
|
||||||
|
# GET /add
|
||||||
|
|
||||||
|
Query parameters:
|
||||||
|
|
||||||
|
* `url`. Resource URL
|
||||||
|
* `depth`. Archive depth. Default: 0 (current page)
|
||||||
|
* `tag`. List of comma separated tags e.g. `my_tag`, `my_tag,another_one`.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
|
||||||
|
```
|
||||||
|
curl -i 'http://localhost:9998/add?url=https://example.com&depth=0&tag=api,example'
|
||||||
|
```
|
67
cursed_archivebox_api.py
Normal file
67
cursed_archivebox_api.py
Normal file
@ -0,0 +1,67 @@
|
|||||||
|
import os
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import subprocess
|
||||||
|
from multiprocessing import Process
|
||||||
|
|
||||||
|
from bottle import run, get, request, response
|
||||||
|
|
||||||
|
|
||||||
|
ARCHIVEBOX_BIN = os.getenv('ARCHIVEBOX_BIN') or '/usr/bin/archivebox'
|
||||||
|
CURSED_PORT = os.getenv('CURSED_PORT') or 9998
|
||||||
|
CURSED_HOST = os.getenv('CURSED_HOST') or '0.0.0.0'
|
||||||
|
CURSED_SERVER = os.getenv('CURSED_SERVER') or 'gunicorn'
|
||||||
|
|
||||||
|
|
||||||
|
logging.basicConfig(
|
||||||
|
level=logging.DEBUG,
|
||||||
|
format="%(asctime)s:%(levelname)s:%(name)s: %(message)s"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def shell_exec(command: list, to_stdin: str = None) -> None:
|
||||||
|
"""Execute shell command and return output."""
|
||||||
|
pipe = subprocess.Popen(command,
|
||||||
|
stdin=subprocess.PIPE,
|
||||||
|
stdout=subprocess.PIPE,
|
||||||
|
stderr=subprocess.PIPE)
|
||||||
|
if to_stdin:
|
||||||
|
to_stdin = '%s\n' % to_stdin
|
||||||
|
pipe.stdin.write(to_stdin.encode('utf-8'))
|
||||||
|
pipe.stdin.flush()
|
||||||
|
output, error = pipe.communicate()
|
||||||
|
output = output.strip().decode("utf-8")
|
||||||
|
error = error.decode("utf-8")
|
||||||
|
if pipe.returncode != 0:
|
||||||
|
raise RuntimeError(error)
|
||||||
|
return output
|
||||||
|
|
||||||
|
|
||||||
|
def run_bg_task(cmd):
|
||||||
|
logging.debug('PID=%s Run "background" thread...', os.getpid())
|
||||||
|
shell_exec(cmd)
|
||||||
|
logging.debug('PID=%s Background thread finished', os.getpid())
|
||||||
|
|
||||||
|
|
||||||
|
@get('/add')
|
||||||
|
def add_to_archive() -> str:
|
||||||
|
url = request.query.url or None
|
||||||
|
depth = request.query.depth or None
|
||||||
|
tag = request.query.tag or None
|
||||||
|
cmd = ARCHIVEBOX_BIN.split()
|
||||||
|
cmd.append("add")
|
||||||
|
if depth:
|
||||||
|
cmd.append('--depth=' + str(depth))
|
||||||
|
if tag:
|
||||||
|
cmd.append('--tag=' + tag)
|
||||||
|
if url is None:
|
||||||
|
response.status = 400
|
||||||
|
return json.dumps({'msg': 'Error: No URL query parameter provided'})
|
||||||
|
cmd.append("'" + url + "'")
|
||||||
|
logging.debug('PID=%s Command to run: %s', os.getpid(), cmd)
|
||||||
|
taskrun = Process(target=run_bg_task, args=(cmd,))
|
||||||
|
taskrun.start()
|
||||||
|
return json.dumps({'msg': 'OK'})
|
||||||
|
|
||||||
|
|
||||||
|
run(server=CURSED_SERVER, host=CURSED_HOST, port=CURSED_PORT)
|
4
requirements.txt
Normal file
4
requirements.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
-i https://pypi.org/simple
|
||||||
|
bottle==0.12.25
|
||||||
|
gunicorn==20.1.0
|
||||||
|
setuptools==67.7.2 ; python_version >= '3.7'
|
Loading…
Reference in New Issue
Block a user