import datetime import urllib.parse import hashlib import lxml.builder import lxml.etree import aiohttp.web SEARCH_URL = "https://www.conseil-etat.fr/xsearch" DOWNLOAD_URL = "https://www.conseil-etat.fr/plugin?plugin=Service.downloadFilePagePlugin&Index=Ariane_Web" def url_add_params(url, params): parts = urllib.parse.urlsplit(url) qs = {**dict(urllib.parse.parse_qsl(parts.query)), **params} parts = parts._replace(query=urllib.parse.urlencode(qs)) return urllib.parse.urlunsplit(parts) def make_atom_entry(document): maker = lxml.builder.ElementMaker() hash_id = hashlib.sha256(document["Id"].encode("ascii")) date = datetime.datetime.fromisoformat(document["Version"]) return maker.entry( maker.title("{}: {}".format(document["SourceStr3"], document["SourceStr5"])), maker.id("urn:hash::sha256:{}".format(hash_id.hexdigest())), maker.link(href=url_add_params(DOWNLOAD_URL, {"Id": document["Id"]})), maker.update(date.isoformat()), maker.author(document["SourceStr3"]), maker.content(document["HtmlSummary"]), ) def latest_date(documents): return max( (datetime.datetime.fromisoformat(d["Version"]) for d in documents), default=datetime.datetime.min, ) def make_atom(content): feed = lxml.etree.Element("feed") documents = content["Documents"] updated = lxml.etree.SubElement(feed, "updated") updated.text = latest_date(documents).isoformat() feed.extend(make_atom_entry(d) for d in documents) return feed async def view(request): text = request.rel_url.query["text"] data = { "advanced": "1", "type": "json", "SourceStr4": [ "AW_DCE", "AW_AJCE", "AW_CRP", "AW_DTC", "AW_AJTC", "AW_DCA", "AW_AJCA", ], "text.add": text, "synonyms": "true", "scmode": "smart", "SkipCount": "50", "SkipMode": "0", "sort": "SourceDateTime1.desc,SourceStr5.desc", } async with request.app["client_session"].post(SEARCH_URL, data=data) as resp: content = await resp.json() feed = make_atom(content) return aiohttp.web.Response( body=lxml.etree.tostring(feed, pretty_print=True), content_type="application/atom+xml", ) async def client_session_ctx(app): app["client_session"] = aiohttp.ClientSession() yield await app["client_session"].close() def main(): app = aiohttp.web.Application() app.cleanup_ctx.append(client_session_ctx) app.add_routes((aiohttp.web.get("/", view),)) aiohttp.web.run_app(app) if __name__ == "__main__": main()