init
This commit is contained in:
commit
21e3ae3b1c
13
Pipfile
Normal file
13
Pipfile
Normal file
@ -0,0 +1,13 @@
|
||||
[[source]]
|
||||
url = "https://pypi.org/simple"
|
||||
verify_ssl = true
|
||||
name = "pypi"
|
||||
|
||||
[packages]
|
||||
bottle = "*"
|
||||
gunicorn = "*"
|
||||
|
||||
[dev-packages]
|
||||
|
||||
[requires]
|
||||
python_version = "3.10"
|
45
Pipfile.lock
generated
Normal file
45
Pipfile.lock
generated
Normal file
@ -0,0 +1,45 @@
|
||||
{
|
||||
"_meta": {
|
||||
"hash": {
|
||||
"sha256": "6cf82d4043d4c4bf722b99766ec56d49b04447a5e89726986ac957c46efc2196"
|
||||
},
|
||||
"pipfile-spec": 6,
|
||||
"requires": {
|
||||
"python_version": "3.10"
|
||||
},
|
||||
"sources": [
|
||||
{
|
||||
"name": "pypi",
|
||||
"url": "https://pypi.org/simple",
|
||||
"verify_ssl": true
|
||||
}
|
||||
]
|
||||
},
|
||||
"default": {
|
||||
"bottle": {
|
||||
"hashes": [
|
||||
"sha256:d6f15f9d422670b7c073d63bd8d287b135388da187a0f3e3c19293626ce034ea",
|
||||
"sha256:e1a9c94970ae6d710b3fb4526294dfeb86f2cb4a81eff3a4b98dc40fb0e5e021"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==0.12.25"
|
||||
},
|
||||
"gunicorn": {
|
||||
"hashes": [
|
||||
"sha256:9dcc4547dbb1cb284accfb15ab5667a0e5d1881cc443e0677b4882a4067a807e",
|
||||
"sha256:e0a968b5ba15f8a328fdfd7ab1fcb5af4470c28aaf7e55df02a99bc13138e6e8"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==20.1.0"
|
||||
},
|
||||
"setuptools": {
|
||||
"hashes": [
|
||||
"sha256:23aaf86b85ca52ceb801d32703f12d77517b2556af839621c641fca11287952b",
|
||||
"sha256:f104fa03692a2602fa0fec6c6a9e63b6c8a968de13e17c026957dd1f53d80990"
|
||||
],
|
||||
"markers": "python_version >= '3.7'",
|
||||
"version": "==67.7.2"
|
||||
}
|
||||
},
|
||||
"develop": {}
|
||||
}
|
40
README.md
Normal file
40
README.md
Normal file
@ -0,0 +1,40 @@
|
||||
# Cursed API for ArchiveBox
|
||||
|
||||
ArchiveBox [doesn't have a web API](https://github.com/ArchiveBox/ArchiveBox/issues/496) yet. This is shitty single-endpoint API to automate page archiving. It uses subprocess to run archivebox CLI. archivebox CLI runs in new thread to avoid blocking main thread.
|
||||
|
||||
# Install and run
|
||||
|
||||
Install dependencies:
|
||||
|
||||
```
|
||||
pip install bottle gunicorn
|
||||
```
|
||||
|
||||
Start API on server where ArchiveBox container is running. Set actual path to docker-compose.yml.
|
||||
|
||||
```
|
||||
ARCHIVEBOX_BIN="docker compose -f /opt/archievebox/docker-compose.yml run archivebox" python cursed_archivebox_api.py
|
||||
```
|
||||
|
||||
# Environment
|
||||
|
||||
| Variable | Default |
|
||||
| ----------------- | --------------------- |
|
||||
| `ARCHIVEBOX_BIN` | `/usr/bin/archivebox` (default for non-Docker installations) |
|
||||
| `CURSED_PORT` | `9998` |
|
||||
| `CURSED_HOST` | `0.0.0.0` |
|
||||
| `CURSED_SERVER` | `gunicorn` See [server backends](https://bottlepy.org/docs/dev/deployment.html#switching-the-server-backend) |
|
||||
|
||||
# GET /add
|
||||
|
||||
Query parameters:
|
||||
|
||||
* `url`. Resource URL
|
||||
* `depth`. Archive depth. Default: 0 (current page)
|
||||
* `tag`. List of comma separated tags e.g. `my_tag`, `my_tag,another_one`.
|
||||
|
||||
Example:
|
||||
|
||||
```
|
||||
curl -i 'http://localhost:9998/add?url=https://example.com&depth=0&tag=api,example'
|
||||
```
|
67
cursed_archivebox_api.py
Normal file
67
cursed_archivebox_api.py
Normal file
@ -0,0 +1,67 @@
|
||||
import os
|
||||
import json
|
||||
import logging
|
||||
import subprocess
|
||||
from multiprocessing import Process
|
||||
|
||||
from bottle import run, get, request, response
|
||||
|
||||
|
||||
ARCHIVEBOX_BIN = os.getenv('ARCHIVEBOX_BIN') or '/usr/bin/archivebox'
|
||||
CURSED_PORT = os.getenv('CURSED_PORT') or 9998
|
||||
CURSED_HOST = os.getenv('CURSED_HOST') or '0.0.0.0'
|
||||
CURSED_SERVER = os.getenv('CURSED_SERVER') or 'gunicorn'
|
||||
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.DEBUG,
|
||||
format="%(asctime)s:%(levelname)s:%(name)s: %(message)s"
|
||||
)
|
||||
|
||||
|
||||
def shell_exec(command: list, to_stdin: str = None) -> None:
|
||||
"""Execute shell command and return output."""
|
||||
pipe = subprocess.Popen(command,
|
||||
stdin=subprocess.PIPE,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE)
|
||||
if to_stdin:
|
||||
to_stdin = '%s\n' % to_stdin
|
||||
pipe.stdin.write(to_stdin.encode('utf-8'))
|
||||
pipe.stdin.flush()
|
||||
output, error = pipe.communicate()
|
||||
output = output.strip().decode("utf-8")
|
||||
error = error.decode("utf-8")
|
||||
if pipe.returncode != 0:
|
||||
raise RuntimeError(error)
|
||||
return output
|
||||
|
||||
|
||||
def run_bg_task(cmd):
|
||||
logging.debug('PID=%s Run "background" thread...', os.getpid())
|
||||
shell_exec(cmd)
|
||||
logging.debug('PID=%s Background thread finished', os.getpid())
|
||||
|
||||
|
||||
@get('/add')
|
||||
def add_to_archive() -> str:
|
||||
url = request.query.url or None
|
||||
depth = request.query.depth or None
|
||||
tag = request.query.tag or None
|
||||
cmd = ARCHIVEBOX_BIN.split()
|
||||
cmd.append("add")
|
||||
if depth:
|
||||
cmd.append('--depth=' + str(depth))
|
||||
if tag:
|
||||
cmd.append('--tag=' + tag)
|
||||
if url is None:
|
||||
response.status = 400
|
||||
return json.dumps({'msg': 'Error: No URL query parameter provided'})
|
||||
cmd.append("'" + url + "'")
|
||||
logging.debug('PID=%s Command to run: %s', os.getpid(), cmd)
|
||||
taskrun = Process(target=run_bg_task, args=(cmd,))
|
||||
taskrun.start()
|
||||
return json.dumps({'msg': 'OK'})
|
||||
|
||||
|
||||
run(server=CURSED_SERVER, host=CURSED_HOST, port=CURSED_PORT)
|
4
requirements.txt
Normal file
4
requirements.txt
Normal file
@ -0,0 +1,4 @@
|
||||
-i https://pypi.org/simple
|
||||
bottle==0.12.25
|
||||
gunicorn==20.1.0
|
||||
setuptools==67.7.2 ; python_version >= '3.7'
|
Loading…
x
Reference in New Issue
Block a user