ikiwiki to nikola: the script
People asked for it:
#! /usr/bin/python3 import argparse from datetime import datetime from glob import glob from os import stat from os.path import basename, splitext import re import sys import time footnote_re = re.compile(r'\[(?P<foot_number>\d+)\]') taglink_re = re.compile(r'\[\[!taglink (?P<tag_name>[^\]]*)\]\]') image_re = re.compile(r'\[\[!img (?P<path>.*)\]\]') format_start_re = re.compile(r'^\[\[!format (?P<language>.*) """$') format_end_re = re.compile(r'^"""\]\]$') def rewrite_footnotes_line(line, text_block, footnote_block, taglink_block, foot_number): new_line = line changed = False while footnote := footnote_re.search(new_line): # remove the []s start = footnote.start('foot_number') - 1 end = footnote.end('foot_number') + 1 prefix = new_line[:start] postfix = new_line[end:] foot_number = footnote.group('foot_number') if text_block: new_line = f"{prefix}[^{foot_number}]{postfix}" elif footnote_block: new_line = f"{prefix}[^{foot_number}]:{postfix}" else: raise ValueError('found a footnote in the taglink_block!') changed = True else: if not changed and footnote_block and len(line) > 0: # '[^]: ' <-- 5 extra chars new_line = f"{' ' * (len(foot_number) + 5)}{line.strip()}" return new_line, foot_number def rewrite_footnotes(src): lines = src.splitlines() hr_count = len([ line for line in lines if line.startswith('---') ]) new_lines = [] text_block = True footnote_block = False taglink_block = False hr_seen = 0 foot_number = '' for line in lines: line_length = len(line) if line_length > 4 and line[:4] == ' ': # it's an inline code block, leave alone new_lines.append(line) continue if line.startswith('---'): hr_seen += 1 # if there is only one hr, then we have text + taglink blocks # if there are two or more, it's text + footnote + taglink blocks if text_block and hr_count >= 2 and hr_seen == hr_count - 1: text_block = False footnote_block = True # don't keep it continue elif hr_seen == hr_count: text_block = False footnote_block = False taglink_block = True # we'll need it later new_lines.append(line) continue try: new_line, foot_number = rewrite_footnotes_line(line, text_block, footnote_block, taglink_block, foot_number) except Exception as e: print(f"got `{e}´ for `{line}´.") raise new_lines.append(new_line) return '\n'.join(new_lines) + '\n' def rewrite_taglinks(src): new_lines = [] new_tags = [] for line in src.splitlines(): if len(line) > 0 and line == '-' * len(line): # don't keep it continue tags = taglink_re.findall(line) if len(tags) > 0: new_tags.extend(tags) else: new_lines.append(line) return '\n'.join(new_lines) + '\n', new_tags def rewrite_images(src): new_lines = [] for line in src.splitlines(): image = image_re.search(line) if image is not None: # get the text before and after the whole directive start = image.start(0) end = image.end(0) prefix = line[:start] postfix = line[end:] path = image.group('path') # the root to which this 'absolute' path points is the website's root new_line = f"{prefix}{postfix}" new_lines.append(new_line) else: new_lines.append(line) return '\n'.join(new_lines) + '\n' lang_map = dict( py='python', sh='bash', ) def rewrite_format(src): new_lines = [] for line in src.splitlines(): start = format_start_re.match(line) if start is not None: lang = start.group('language') # if there's no mapping return the same lang new_line = f"```{lang_map.get(lang, lang)}" new_lines.append(new_line) continue if format_end_re.match(line): new_lines.append('```') continue new_lines.append(line) return '\n'.join(new_lines) + '\n' def titlify(src): words = src.split('-') words[0] = words[0].title() return ' '.join(words) def test_offesetify(): src = -3600 dst = '+0100' assert offsetify(src) == dst def offsetify(src): hours, seconds = divmod(src, 3600) # "offsets are always in minutes" sounds like one item in 'things dveloper believe about timezones' minutes, _ = divmod(seconds, 60) # NOTE: time.timezone returns seconds west of UTC, which is opposite of what usual offsets go if src > 0: sign = '-' else: sign = '+' return f"{sign}{-hours:02d}{minutes:02d}" def datify(src): '''1701288755.377908 -> 2023-11-29 21:12:35 +0100''' # BUG: I'm gonna assume current timezone. # thanks SirDonNick#python@libera.chat # dto=DT(2023,11,29, 12,13,59, tzinfo=UTC_TZ); DT.astimezone( dto , getTZ('Europe/Brussels') ) #==> 2023-11-29 13:13:59+01:00 offset = time.timezone dt = datetime.fromtimestamp(src) return f"{dt.strftime('%Y-%m-%d %H:%M:%S')} {offsetify(offset)}" # zoneinfo for some reason doesn't know about CEST, so I'll just hack a mapping here tzname_to_utc_offset = dict( CEST='+0200', CET='+0100', ) month_name_to_number = dict( jan= 1, ene= 1, feb= 2, mar= 3, apr= 4, abr= 4, may= 5, jun= 6, jul= 7, aug= 8, ago= 8, sep= 9, oct=10, nov=11, dec=12, dic=12, ) def dedatify(src): # 0 1 2 3 4 5 6 7 # src=['Posted', 'Sun', '26', 'Aug', '2012', '11:27:16', 'PM', 'CEST'] month = month_name_to_number[src[3].lower()] utc_offset = tzname_to_utc_offset[src[7]] h, m, s = [ int(x) for x in src[5].split(':') ] if src[6].upper() == 'PM': h += 12 # TODO: support 12PM return f"{src[4]}-{month:02d}-{int(src[2]):02d} {h:02d}:{m:02d}:{s:02d} {utc_offset}" def build_meta(filepath, tags, date=None): filename = splitext(basename(filepath))[0] if date is None: mtime = stat(filepath).st_mtime date_string = datify(mtime) else: date_string = dedatify(date) meta = f""".. title: {titlify(filename)} .. slug: {filename} .. date: {date_string} .. tags: {', '.join(tags)} .. type: text """ return filename, meta def import_post(opts): src = open(opts.filepath).read() mid, tags = rewrite_taglinks(rewrite_footnotes(src)) dst = rewrite_format(rewrite_images(mid)) if opts.date is None: filename, meta = build_meta(opts.filepath, tags) else: filename, meta = build_meta(opts.filepath, tags, date=opts.date) open(f"posts/{filename}.md", 'w+').write(dst) open(f"posts/{filename}.meta", 'w+').write(meta) def parse_args(): parser = argparse.ArgumentParser() parser.add_argument('filepath', metavar='FILE') parser.add_argument('-d', '--date', nargs=8, help='Just pass something like "Posted Wed 12 Sep 2012 08:19:23 PM CEST".') return parser.parse_args() if __name__ == '__main__': opts = parse_args() import_post(opts)
I removed all the tests, but they all looked like this:
def test_dedatify(): src = 'Posted Wed 12 Sep 2012 08:19:23 PM CEST'.split() dst = '2012-09-12 20:19:23 +0200' assert dedatify(src) == dst
Enjoy.