mirror of
https://github.com/LibreTranslate/LibreTranslate.git
synced 2024-11-21 23:41:01 +00:00
Extract JSONL from suggestions.db script
This commit is contained in:
parent
0136d8808c
commit
d6ef04ba3e
1 changed files with 46 additions and 0 deletions
46
suggestions-to-jsonl.py
Executable file
46
suggestions-to-jsonl.py
Executable file
|
@ -0,0 +1,46 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
import argparse
|
||||||
|
import time
|
||||||
|
import sqlite3
|
||||||
|
import json
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
parser = argparse.ArgumentParser(description="Program to generate JSONL files from a LibreTranslate's suggestions.db")
|
||||||
|
parser.add_argument(
|
||||||
|
"--db",
|
||||||
|
type=str,
|
||||||
|
nargs=1,
|
||||||
|
help="Path to suggestions.db file",
|
||||||
|
default='suggestions.db'
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--clear",
|
||||||
|
action='store_true',
|
||||||
|
help="Clear suggestions.db after generation",
|
||||||
|
default=False
|
||||||
|
)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
output_file = str(int(time.time())) + ".jsonl"
|
||||||
|
|
||||||
|
con = sqlite3.connect(args.db, check_same_thread=False)
|
||||||
|
cur = con.cursor()
|
||||||
|
|
||||||
|
with open(output_file, 'w', encoding="utf-8") as f:
|
||||||
|
for row in cur.execute('SELECT q, s, source, target FROM suggestions WHERE source != "auto" ORDER BY source'):
|
||||||
|
q, s, source, target = row
|
||||||
|
obj = {
|
||||||
|
'q': q,
|
||||||
|
's': s,
|
||||||
|
'source': source,
|
||||||
|
'target': target
|
||||||
|
}
|
||||||
|
json.dump(obj, f, ensure_ascii=False)
|
||||||
|
f.write('\n')
|
||||||
|
|
||||||
|
print("Wrote %s" % output_file)
|
||||||
|
|
||||||
|
if args.clear:
|
||||||
|
cur.execute("DELETE FROM suggestions")
|
||||||
|
con.commit()
|
||||||
|
print("Cleared " + args.db)
|
Loading…
Reference in a new issue