Brush up output format, add --color switch to force colors
This commit is contained in:
parent
0b2249b1c4
commit
3e0ee1e090
69
leo.py
69
leo.py
|
@ -3,13 +3,23 @@
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
|
import readline
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
from bs4 import BeautifulSoup, Tag
|
from bs4 import BeautifulSoup, Tag
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
if len(sys.argv) < 2:
|
args = []
|
||||||
print("leo.py must be run with at least 1 parameter (word to translate)")
|
force_color = False
|
||||||
|
for arg in sys.argv:
|
||||||
|
if arg == '--color':
|
||||||
|
force_color = True
|
||||||
|
else:
|
||||||
|
args.append(arg)
|
||||||
|
if len(args) < 2:
|
||||||
|
print(f"Usage: leo [--color] WORD [WORD...]")
|
||||||
|
print(f"Note: WORDs will be concatenated using spaces")
|
||||||
|
print(f"Note: --color forces color output")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
url = 'https://dict.leo.org/englisch-deutsch/' + " ".join(sys.argv[1:])
|
url = 'https://dict.leo.org/englisch-deutsch/' + " ".join(sys.argv[1:])
|
||||||
|
@ -21,7 +31,7 @@ if __name__ == "__main__":
|
||||||
s = re.sub(r"\.([\w\.]+) ", r". (\1) ", s)
|
s = re.sub(r"\.([\w\.]+) ", r". (\1) ", s)
|
||||||
s = s.replace(" - ", ": ")
|
s = s.replace(" - ", ": ")
|
||||||
|
|
||||||
s = re.sub(r"[\s\xa0]+", " ", s)
|
s = re.sub(r"[\s\xa0]+", " ", s) # nbsp == "\xa0"
|
||||||
|
|
||||||
s = re.sub("\u21d4 (\\w+)", r"\1", s)
|
s = re.sub("\u21d4 (\\w+)", r"\1", s)
|
||||||
|
|
||||||
|
@ -32,6 +42,10 @@ if __name__ == "__main__":
|
||||||
s = re.sub(r"\[ ", "[", s)
|
s = re.sub(r"\[ ", "[", s)
|
||||||
s = re.sub(r" \]", "]", s)
|
s = re.sub(r" \]", "]", s)
|
||||||
s = s.replace(")[",") [")
|
s = s.replace(")[",") [")
|
||||||
|
if re.search(r"\SAE ", s) and re.search(r"\SBE ", s):
|
||||||
|
s = re.sub(r"(?<=\S)AE ", " (AE) ", s)
|
||||||
|
s = re.sub(r"(?<=\S)BE ", " (BE) ", s)
|
||||||
|
s = re.sub(" (Pron|Adj|Adv).($| )", r" (\1.)\2", s)
|
||||||
|
|
||||||
s = s.strip()
|
s = s.strip()
|
||||||
return s
|
return s
|
||||||
|
@ -40,17 +54,33 @@ if __name__ == "__main__":
|
||||||
max_widths = [0] * len(table[0])
|
max_widths = [0] * len(table[0])
|
||||||
string_table = ""
|
string_table = ""
|
||||||
for row in table:
|
for row in table:
|
||||||
for i, element in enumerate(row):
|
for i, element in enumerate(row): # j = 0..1 (2 columns)
|
||||||
if (width := len(element)) > max_widths[i]:
|
if (width := max(map(len, element.split("<NL>")))) > max_widths[i]:
|
||||||
max_widths[i] = width
|
max_widths[i] = width
|
||||||
for i, row in enumerate(table):
|
for i, row in enumerate(table):
|
||||||
for j, element in enumerate(row):
|
lines = []
|
||||||
table[i][j] = element.ljust(max_widths[j])
|
for j, element in enumerate(row): # j = 0..1 (2 columns)
|
||||||
string_table += delim.join(table[i]) + "\n"
|
parts = element.split("<NL>")
|
||||||
|
lines.append([])
|
||||||
|
for part in parts:
|
||||||
|
lines[-1].append(part.ljust(max_widths[j]))
|
||||||
|
max_lines = 0
|
||||||
|
for line in lines:
|
||||||
|
max_lines = max(max_lines, len(line))
|
||||||
|
for i, line in enumerate(lines):
|
||||||
|
if len(line) < max_lines:
|
||||||
|
lines[i] += [''] * (max_lines - len(line))
|
||||||
|
first = True
|
||||||
|
for line in zip(*lines):
|
||||||
|
if first:
|
||||||
|
string_table += delim.join(line) + "\n"
|
||||||
|
else:
|
||||||
|
string_table += " ".join(line) + "\n"
|
||||||
|
first = False
|
||||||
return string_table
|
return string_table
|
||||||
|
|
||||||
os.system("")
|
os.system("")
|
||||||
for tbody in soup.select("table.tblf1.tblf-fullwidth.tblf-alternate"):
|
for tbody in soup.select("table.tblf1.tblf-fullwidth.tblf-alternate")[::-1]:
|
||||||
h2_tag = tbody.find("h2")
|
h2_tag = tbody.find("h2")
|
||||||
if not h2_tag:
|
if not h2_tag:
|
||||||
continue
|
continue
|
||||||
|
@ -58,12 +88,27 @@ if __name__ == "__main__":
|
||||||
table = tbody.find("tbody")
|
table = tbody.find("tbody")
|
||||||
if not table:
|
if not table:
|
||||||
continue
|
continue
|
||||||
print("\x1b[33m"+"#"*10, heading, "#"*10+"\x1b[0m")
|
if os.isatty(1) or force_color:
|
||||||
|
print(end="\x1b[33m")
|
||||||
|
print("#"*10, heading, "#"*10)
|
||||||
|
if os.isatty(1) or force_color:
|
||||||
|
print(end="\x1b[0m")
|
||||||
leo_entry = []
|
leo_entry = []
|
||||||
for line in table:
|
for line in table:
|
||||||
try:
|
try:
|
||||||
en = line.select("td[lang=en]")[0].text if isinstance(line, Tag) else ''
|
if isinstance(line, Tag):
|
||||||
de = line.select("td[lang=de]")[0].text if isinstance(line, Tag) else ''
|
en_tag = line.select("td[lang=en]")[0]
|
||||||
|
de_tag = line.select("td[lang=de]")[0]
|
||||||
|
en_br = en_tag.select_one("br")
|
||||||
|
de_br = de_tag.select_one("br")
|
||||||
|
if en_br is not None:
|
||||||
|
en_br.replace_with("<NL>")
|
||||||
|
if de_br is not None:
|
||||||
|
de_br.replace_with("<NL>")
|
||||||
|
en = en_tag.text
|
||||||
|
de = de_tag.text
|
||||||
|
else:
|
||||||
|
en = de = ''
|
||||||
if en and de:
|
if en and de:
|
||||||
leo_entry.append([format_dict_line(en), format_dict_line(de)])
|
leo_entry.append([format_dict_line(en), format_dict_line(de)])
|
||||||
except:
|
except:
|
||||||
|
|
Loading…
Reference in New Issue