From 87c0e7d8dd4819aff14d6ec10d2d038996f0b818 Mon Sep 17 00:00:00 2001 From: Jeltz Date: Tue, 4 Jan 2022 00:13:27 +0100 Subject: [PATCH] Initial commit --- server.py | 98 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) create mode 100644 server.py diff --git a/server.py b/server.py new file mode 100644 index 0000000..944fea4 --- /dev/null +++ b/server.py @@ -0,0 +1,98 @@ +import datetime +import urllib.parse +import hashlib +import lxml.builder +import lxml.etree + +import aiohttp.web + + +SEARCH_URL = "https://www.conseil-etat.fr/xsearch" +DOWNLOAD_URL = "https://www.conseil-etat.fr/plugin?plugin=Service.downloadFilePagePlugin&Index=Ariane_Web" + + +def url_add_params(url, params): + parts = urllib.parse.urlsplit(url) + qs = {**dict(urllib.parse.parse_qsl(parts.query)), **params} + parts = parts._replace(query=urllib.parse.urlencode(qs)) + return urllib.parse.urlunsplit(parts) + + +def make_atom_entry(document): + maker = lxml.builder.ElementMaker() + hash_id = hashlib.sha256(document["Id"].encode("ascii")) + date = datetime.datetime.fromisoformat(document["Version"]) + return maker.entry( + maker.title("{}: {}".format(document["SourceStr3"], document["SourceStr5"])), + maker.id("urn:hash::sha256:{}".format(hash_id.hexdigest())), + maker.link(href=url_add_params(DOWNLOAD_URL, {"Id": document["Id"]})), + maker.update(date.isoformat()), + maker.author(document["SourceStr3"]), + maker.content(document["HtmlSummary"]), + ) + + +def latest_date(documents): + return max( + (datetime.datetime.fromisoformat(d["Version"]) for d in documents), + default=datetime.datetime.min, + ) + + +def make_atom(content): + feed = lxml.etree.Element("feed") + documents = content["Documents"] + updated = lxml.etree.SubElement(feed, "updated") + updated.text = latest_date(documents).isoformat() + feed.extend(make_atom_entry(d) for d in documents) + return feed + + +async def view(request): + text = request.rel_url.query["text"] + data = { + "advanced": "1", + "type": "json", + "SourceStr4": [ + "AW_DCE", + "AW_AJCE", + "AW_CRP", + "AW_DTC", + "AW_AJTC", + "AW_DCA", + "AW_AJCA", + ], + "text.add": text, + "synonyms": "true", + "scmode": "smart", + "SkipCount": "50", + "SkipMode": "0", + "sort": "SourceDateTime1.desc,SourceStr5.desc", + } + + async with request.app["client_session"].post(SEARCH_URL, data=data) as resp: + content = await resp.json() + feed = make_atom(content) + return aiohttp.web.Response( + body=lxml.etree.tostring(feed, pretty_print=True), + content_type="application/atom+xml", + ) + + +async def client_session_ctx(app): + app["client_session"] = aiohttp.ClientSession() + yield + await app["client_session"].close() + + +def main(): + + app = aiohttp.web.Application() + app.cleanup_ctx.append(client_session_ctx) + app.add_routes((aiohttp.web.get("/", view),)) + + aiohttp.web.run_app(app) + + +if __name__ == "__main__": + main()