Brush up output format, add --color switch to force colors

This commit is contained in:
tosu 2024-06-17 01:17:22 +02:00
parent 0b2249b1c4
commit 3e0ee1e090
1 changed files with 102 additions and 57 deletions

159
leo.py
View File

@ -3,76 +3,121 @@
import os
import re
import sys
import readline
import requests
from bs4 import BeautifulSoup, Tag
if __name__ == "__main__":
if len(sys.argv) < 2:
print("leo.py must be run with at least 1 parameter (word to translate)")
sys.exit(1)
args = []
force_color = False
for arg in sys.argv:
if arg == '--color':
force_color = True
else:
args.append(arg)
if len(args) < 2:
print(f"Usage: leo [--color] WORD [WORD...]")
print(f"Note: WORDs will be concatenated using spaces")
print(f"Note: --color forces color output")
sys.exit(1)
url = 'https://dict.leo.org/englisch-deutsch/' + " ".join(sys.argv[1:])
r = requests.get(url)
url = 'https://dict.leo.org/englisch-deutsch/' + " ".join(sys.argv[1:])
r = requests.get(url)
soup = BeautifulSoup(r.text, "html.parser")
soup = BeautifulSoup(r.text, "html.parser")
def format_dict_line(s):
s = re.sub(r"\.([\w\.]+) ", r". (\1) ", s)
s = s.replace(" - ", ": ")
def format_dict_line(s):
s = re.sub(r"\.([\w\.]+) ", r". (\1) ", s)
s = s.replace(" - ", ": ")
s = re.sub(r"[\s\xa0]+", " ", s)
s = re.sub(r"[\s\xa0]+", " ", s) # nbsp == "\xa0"
s = re.sub("\u21d4 (\\w+)", r"\1", s)
s = re.sub("\u21d4 (\\w+)", r"\1", s)
s = s.replace("|", "[", 1)
s = s.replace("|", "]", 1)
s = s.replace("|", "[", 1)
s = s.replace("|", "]", 1)
s = re.sub(r"\[ ", "[", s)
s = re.sub(r" \]", "]", s)
s = s.replace(")[",") [")
s = s.replace("|", "[", 1)
s = s.replace("|", "]", 1)
s = s.replace("|", "[", 1)
s = s.replace("|", "]", 1)
s = re.sub(r"\[ ", "[", s)
s = re.sub(r" \]", "]", s)
s = s.replace(")[",") [")
if re.search(r"\SAE ", s) and re.search(r"\SBE ", s):
s = re.sub(r"(?<=\S)AE ", " (AE) ", s)
s = re.sub(r"(?<=\S)BE ", " (BE) ", s)
s = re.sub(" (Pron|Adj|Adv).($| )", r" (\1.)\2", s)
s = s.strip()
return s
s = s.strip()
return s
def align(table, delim="|"):
max_widths = [0] * len(table[0])
string_table = ""
for row in table:
for i, element in enumerate(row):
if (width := len(element)) > max_widths[i]:
max_widths[i] = width
for i, row in enumerate(table):
for j, element in enumerate(row):
table[i][j] = element.ljust(max_widths[j])
string_table += delim.join(table[i]) + "\n"
return string_table
def align(table, delim="|"):
max_widths = [0] * len(table[0])
string_table = ""
for row in table:
for i, element in enumerate(row): # j = 0..1 (2 columns)
if (width := max(map(len, element.split("<NL>")))) > max_widths[i]:
max_widths[i] = width
for i, row in enumerate(table):
lines = []
for j, element in enumerate(row): # j = 0..1 (2 columns)
parts = element.split("<NL>")
lines.append([])
for part in parts:
lines[-1].append(part.ljust(max_widths[j]))
max_lines = 0
for line in lines:
max_lines = max(max_lines, len(line))
for i, line in enumerate(lines):
if len(line) < max_lines:
lines[i] += [''] * (max_lines - len(line))
first = True
for line in zip(*lines):
if first:
string_table += delim.join(line) + "\n"
else:
string_table += " ".join(line) + "\n"
first = False
return string_table
os.system("")
for tbody in soup.select("table.tblf1.tblf-fullwidth.tblf-alternate"):
h2_tag = tbody.find("h2")
if not h2_tag:
continue
heading = h2_tag.text
table = tbody.find("tbody")
if not table:
continue
print("\x1b[33m"+"#"*10, heading, "#"*10+"\x1b[0m")
leo_entry = []
for line in table:
try:
en = line.select("td[lang=en]")[0].text if isinstance(line, Tag) else ''
de = line.select("td[lang=de]")[0].text if isinstance(line, Tag) else ''
if en and de:
leo_entry.append([format_dict_line(en), format_dict_line(de)])
except:
continue
try:
print(align(leo_entry, " | "))
except:
continue
os.system("")
for tbody in soup.select("table.tblf1.tblf-fullwidth.tblf-alternate")[::-1]:
h2_tag = tbody.find("h2")
if not h2_tag:
continue
heading = h2_tag.text
table = tbody.find("tbody")
if not table:
continue
if os.isatty(1) or force_color:
print(end="\x1b[33m")
print("#"*10, heading, "#"*10)
if os.isatty(1) or force_color:
print(end="\x1b[0m")
leo_entry = []
for line in table:
try:
if isinstance(line, Tag):
en_tag = line.select("td[lang=en]")[0]
de_tag = line.select("td[lang=de]")[0]
en_br = en_tag.select_one("br")
de_br = de_tag.select_one("br")
if en_br is not None:
en_br.replace_with("<NL>")
if de_br is not None:
de_br.replace_with("<NL>")
en = en_tag.text
de = de_tag.text
else:
en = de = ''
if en and de:
leo_entry.append([format_dict_line(en), format_dict_line(de)])
except:
continue
try:
print(align(leo_entry, " | "))
except:
continue
# print("\x1b[3A")
sys.exit(0)
# print("\x1b[3A")
sys.exit(0)