From d6ef04ba3e528acc219a61b815b45b1d3076de1f Mon Sep 17 00:00:00 2001 From: Piero Toffanin Date: Sun, 22 May 2022 16:17:41 -0400 Subject: [PATCH] Extract JSONL from suggestions.db script --- suggestions-to-jsonl.py | 46 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100755 suggestions-to-jsonl.py diff --git a/suggestions-to-jsonl.py b/suggestions-to-jsonl.py new file mode 100755 index 0000000..7878d52 --- /dev/null +++ b/suggestions-to-jsonl.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python +import argparse +import time +import sqlite3 +import json + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Program to generate JSONL files from a LibreTranslate's suggestions.db") + parser.add_argument( + "--db", + type=str, + nargs=1, + help="Path to suggestions.db file", + default='suggestions.db' + ) + parser.add_argument( + "--clear", + action='store_true', + help="Clear suggestions.db after generation", + default=False + ) + args = parser.parse_args() + + output_file = str(int(time.time())) + ".jsonl" + + con = sqlite3.connect(args.db, check_same_thread=False) + cur = con.cursor() + + with open(output_file, 'w', encoding="utf-8") as f: + for row in cur.execute('SELECT q, s, source, target FROM suggestions WHERE source != "auto" ORDER BY source'): + q, s, source, target = row + obj = { + 'q': q, + 's': s, + 'source': source, + 'target': target + } + json.dump(obj, f, ensure_ascii=False) + f.write('\n') + + print("Wrote %s" % output_file) + + if args.clear: + cur.execute("DELETE FROM suggestions") + con.commit() + print("Cleared " + args.db) \ No newline at end of file