#!/usr/bin/env nix-shell
#!nix-shell -i python3 -p "python3.withPackages (ps: with ps; [ pygments markdown2 ])"
# License: 0BSD
import os, shutil, time, traceback, markdown2, json, html, base64, hashlib
from datetime import datetime, timezone
from email.utils import formatdate
def make_important(vars):
return vars.replace(';', ' !important;')
def add_tabs(vars):
return '\n'.join(' ' + x for x in vars.split('\n'))
def canonicize_url(url):
while url.startswith('./'):
url = url[2:]
for x in ['index.html', 'index.htm']:
if url == x:
return ''
if url.endswith('/' + x):
return url[:-len(x)-1]
return url.rstrip('/')
old_sitemap = json.loads(open(os.path.join('src', '_sitemap.json'), 'rt', encoding='utf-8').read())
new_sitemap = {}
page_mapping = {}
comments = open(os.path.join('src', '_comments.html'), 'rt', encoding='utf-8').read()
cm = json.loads(open(os.path.join('src', '_comments.json'), 'rt', encoding='utf-8').read())
def process(src, dst, template=None, menu=[], titles={}, actually_write=True):
dir_prefix = ''
if len(dst) > 2:
dir_prefix = dst[2:] + '/'
try:
template = open(os.path.join(src, '_template.html'), 'rt', encoding='utf-8').read()
except (KeyboardInterrupt, SystemExit):
raise
except:
if template == None:
template = '{{body}}'
try:
vars = open(os.path.join(src, '_vars.css'), 'rt', encoding='utf-8').read().strip().split('\n\n')
except (KeyboardInterrupt, SystemExit):
raise
except:
vars = []
fake_files = {}
rewrite_names = {}
metadata = {}
if src == 'src':
full_pages = set(['about.html'])
else:
full_pages = set()
try:
index = []
feed = '\n\n\npavluk.org Blog\nhttps://pavluk.org/blog\nMy random scribblings\n'
feed += formatdate()
feed += '\n\nLicensed under CC0\nen-us\nwebmaster@pavluk.org\nwebmaster@pavluk.org\nBlogs\nhttps://validator.w3.org/feed/docs/rss2.html\nhttps://pavluk.org/pfp.jpg\n'
for entry in open(os.path.join(src, '_index.txt'), 'rt', encoding='utf-8').read().strip().split('\n'):
entry = entry.split('::')
titles[canonicize_url(dir_prefix + entry[0].replace('.md', '.html'))] = entry[2]
metadata[canonicize_url(dir_prefix + entry[0].replace('.md', '.html'))] = (entry[1], entry[3], entry[4])
date = entry[0].split('_', 1)[0]
dest = dir_prefix + date[:4] + '/' + date[5:7] + '/' + date[8:10] + '/' + entry[0].split('_', 1)[1].replace('.md', '.html')
rewrite_names[entry[0].replace('.md', '.html')] = dest
full_pages.add(entry[0].replace('.md', '.html'))
try:
dt = datetime.strptime(date, '%Y-%m-%dT%H%M%S%z')
except:
try:
dt = datetime.strptime(date, '%Y-%m-%dT%H%M%S')
except:
dt = datetime.strptime(date, '%Y-%m-%d')
index.append('' + entry[2] + ' (' + date[:10] + ')')
feed += '' + entry[2] + 'https://pavluk.org/' + dest + ''
feed += '' + formatdate(time.mktime(dt.timetuple())) + '' + 'https://pavluk.org/' + dest + '' + entry[3] + 'webmaster@pavluk.org' + entry[1] + '\n'
feed += '\n'
if index:
index = (
'RSS feed ' +
'
' +
''.join(f'
{x}
' for x in index) +
'
'
)
fake_files['index.html'] = index
fake_files['feed.xml'] = feed
except (KeyboardInterrupt, SystemExit):
raise
except:
pass
old_menu = menu
try:
menu = []
for entry in open(os.path.join(src, '_menu.txt'), 'rt', encoding='utf-8').read().strip().split('\n'):
entry = entry.split(':')
if dir_prefix + entry[0]: # don't title the title page
titles[canonicize_url(dir_prefix + entry[0])] = entry[1]
menu.append((dir_prefix + entry[0], entry[1]))
except (KeyboardInterrupt, SystemExit):
raise
except:
if old_menu:
menu = old_menu
else:
menu = []
for x in [*os.listdir(src), *fake_files.keys()]:
SRC = None
DST = None
if not x.startswith('_') and not x.startswith('.'):
print(src, x)
if x in fake_files.keys() or os.path.isfile(os.path.join(src, x)):
if x in fake_files.keys():
contents = fake_files[x]
old_contents = contents
else:
# make source code public
SRC = os.path.join(src, x)
try:
shutil.chown(os.path.join(src, x), user='nginx', group='nginx')
except (KeyboardInterrupt, SystemExit):
raise
except:
pass
dest_name = os.path.join(dst, x)
if x.endswith('.html') or x.endswith('.md'):
if x not in fake_files.keys():
with open(os.path.join(src, x), 'rt', encoding='utf-8') as f:
contents = f.read()
if x.endswith('.md'):
extras = {'fenced-code-blocks': {'linespans': True},'footnotes':{},'markdown-in-html':{}, 'metadata':{}, 'strike': {}, 'tables': {}, 'header-ids': {}}
contents = markdown2.markdown('\n' + contents, extras=extras)
x = x[:-3] + '.html'
dest_name = os.path.join(dst, x)
if x in rewrite_names.keys():
dest_name = rewrite_names[x]
os.makedirs(dest_name[::-1].split('/', 1)[1][::-1], exist_ok=True)
contents = contents.replace('', '').replace('', '')
old_contents = contents
cur_menu = ''
if menu:
cur_menu = []
for entry in menu:
if canonicize_url(entry[0]) == canonicize_url(dir_prefix + x) or (dir_prefix and canonicize_url(dir_prefix) == canonicize_url(entry[0])):
cur_menu.append('' + entry[1] + '')
else:
cur_menu.append('' + entry[1] + '')
cur_menu = (
'
' +
''.join(f'
{x}
' for x in cur_menu) +
'
'
)
title = titles.get(canonicize_url(dir_prefix + x), 'pavluk.org')
facebook = '\n'
facebook += '\n'
facebook += '\n'
if x in full_pages:
toc = []
for w0 in contents.split('
')[0]
for w in w.split():
if w.startswith('id='):
w = w[3:].strip('"')
toc.append((w, w0.split('>')[1].split('<')[0]))
break
tocs = ''
if toc:
tocs += '
\n' + contents
if tocs:
contents = contents.replace('\n'
facebook += '\n'
meta = metadata.get(canonicize_url(dir_prefix + x), None)
if meta != None:
facebook += '\n'
facebook += '\n'
facebook += '\n'
facebook += '\n'
created = new_sitemap.get(dest_name, {}).get('created', None)
edited = new_sitemap.get(dest_name, {}).get('edited', None)
if created is None: created = old_sitemap.get(dest_name, {}).get('created', None)
if edited is None: edited = old_sitemap.get(dest_name, {}).get('edited', None)
if created is not None:
facebook += '\n'
if edited is not None:
facebook += '\n'
else:
facebook += '\n'
if 'index.html' in fake_files.keys() and x != 'index.html':
contents = '< Index ' + contents
cur_comments = ''
if x in full_pages:
cur_comments = comments + ''
if SRC:
for cmt in cm.get(SRC[4:], [])[::-1]:
if cmt['name'] == 'chayleaf' and cmt['trip'] == 'e60312d6ccb4ec77706d2f5e4fcfc86b37c46c7f5a3fce5e21e91e1bbddc8917':
name = '
chayleaf
'
else:
def from_hex(s):
return bytes(int(s[i:i+2], 16) for i in range(0, len(s), 2))
name = '
' + html.escape(cmt['name'])
if cmt['trip']:
name += '
'
text = cmt['comment']
if not cmt.get('raw', False):
text = html.escape(text).replace('\r', '').replace('\n', ' ').replace('\t', ' ')
dtime = cmt['time']
cur_comments += '
'
cur_comments += '
' + name + ' wrote at
' + dtime.replace('T', ' ') + '
:
'
cur_comments += '
' + text + '
'
cur_comments += '
'
cur_comments += ''
if cur_comments: cur_comments = '
' + cur_comments + '
'
contents = template.replace('{{body}}', contents).replace('{{title}}', title).replace('{{menu}}', cur_menu).replace('{{comments}}', cur_comments).replace('{{meta}}', facebook)
elif x.endswith('.css'):
if x not in fake_files.keys():
with open(os.path.join(src, x), 'rt', encoding='utf-8') as f:
contents = f.read()
if x in rewrite_names.keys():
dest_name = rewrite_names[x]
os.makedirs(dest_name[::-1].split('/', 1)[1][::-1], exist_ok=True)
if len(vars) == 2:
var_contents = ':root {\n'
var_contents += add_tabs(vars[0])
var_contents += '\n}\n'
var_contents += '@media (prefers-color-scheme: dark) {\n :root {\n'
var_contents += make_important(add_tabs(add_tabs(vars[1])))
var_contents += '\n }\n}\n'
for a, b in zip(['light-mode', 'dark-mode'], vars):
var_contents += '.' + a + ' {\n'
var_contents += make_important(add_tabs(b))
var_contents += '\n}\n'
contents = contents.split('*/\n', 1) # license header
contents = contents[0] + '*/\n' + var_contents + '\n' + contents[1]
elif vars:
raise ValueError('invalid var count')
else:
if x in rewrite_names.keys():
dest_name = rewrite_names[x]
os.makedirs(dest_name[::-1].split('/', 1)[1][::-1], exist_ok=True)
h = hashlib.sha256()
h2 = hashlib.sha256()
if x.endswith('.html') or x.endswith('.css') or x.endswith('.md') or x in fake_files.keys():
h.update(contents.encode('utf-8'))
h2.update(old_contents.encode('utf-8'))
# keep the modified date
if actually_write and old_sitemap.get(dest_name, {}).get('hash', '') != h.hexdigest():
with open(dest_name, 'wt', encoding='utf-8') as f:
f.write(contents)
else:
with open(os.path.join(src, x), 'rb') as f:
h.update(f.read())
h2.update(f.read())
if actually_write and old_sitemap.get(dest_name, {}).get('hash', '') != h.hexdigest():
shutil.copy2(os.path.join(src, x), dest_name)
if dest_name not in new_sitemap:
new_sitemap[dest_name] = {'created': datetime.utcnow().replace(microsecond=0, tzinfo=timezone.utc).isoformat(), 'hash': h.hexdigest(), 'edited': datetime.utcnow().replace(microsecond=0, tzinfo=timezone.utc).isoformat(), 'time': datetime.utcnow().replace(microsecond=0, tzinfo=timezone.utc).isoformat(), 'content_hash': h2.hexdigest()}
# make served file public
DST = dest_name
if actually_write:
try:
shutil.chown(dest_name, user='nginx', group='nginx')
except (KeyboardInterrupt, SystemExit):
raise
except:
pass
else:
if actually_write:
os.makedirs(os.path.join(dst, x), exist_ok=True)
process(os.path.join(src, x), os.path.join(dst, x), template, menu, titles, actually_write)
if SRC is not None and DST is not None:
if DST.startswith('./'): DST = DST[2:]
page_mapping[DST] = SRC[4:]
process('src', '.', None, [], {}, False)
with open('src/_mapping.json', 'wt', encoding='utf-8') as f:
f.write(json.dumps(page_mapping, sort_keys=True))
print('old', old_sitemap)
print('new', new_sitemap)
for k, v in old_sitemap.items():
if k not in new_sitemap.keys():
# TODO: remove redundant directories?
# don't need it for now
# safeguard against removing the sources
assert(not (k.startswith('src/') or '/src' in k))
os.remove(k)
else:
if v['hash'] == new_sitemap[k]['hash']:
new_sitemap[k]['time'] = v['time']
if v.get('content_hash', None) == new_sitemap[k]['content_hash']:
new_sitemap[k]['edited'] = v['edited']
if v.get('created', None) is not None:
new_sitemap[k]['created'] = v['created']
print('new2', new_sitemap)
process('src', '.')
with open('src/_sitemap.json', 'wt', encoding='utf-8') as f:
f.write(json.dumps(new_sitemap, sort_keys=True))
smxml = '\n\n'
for k, v in sorted(new_sitemap.items(), key=lambda kv: kv[1]['time']):
if k.endswith('.html'):
k = canonicize_url(k)
smxml += 'https://pavluk.org/' + k + ''
smxml += '' + v['time'] + ''
smxml += '\n'
smxml += ''
with open('sitemap.xml', 'wt', encoding='utf-8') as f:
f.write(smxml)
' + name + ' wrote at' + dtime.replace('T', ' ') + ' :
' cur_comments += '