Skip to content

Instantly share code, notes, and snippets.

@ponte-vecchio
Created April 25, 2021 10:45
Show Gist options
  • Select an option

  • Save ponte-vecchio/8cc311cef5c926ad60f3dcc1e7d2e8f9 to your computer and use it in GitHub Desktop.

Select an option

Save ponte-vecchio/8cc311cef5c926ad60f3dcc1e7d2e8f9 to your computer and use it in GitHub Desktop.
def sana_gen(limit):
sanakirja = []
output = []
C_foreign = ['b', 'c', 'f', 'g', 'q', 'w', 'x', 'z', 'š', 'ž']
C_no_loan = ['h','j', 'k', 'l', 'm', 'n', 'p', 'r', 's', 't', 'v']
C_no_loan_initial = ['h','j', 'k', 'l', 'm', 'n', 'p', 'r', 's', 't', 'v']
C_final = ['t', 's', 'n', 'r', 'l']
C_double = ['hd', 'hh', 'hj', 'hk', 'hl', 'hm', 'hn', 'hp', 'ht', 'hv', 'jk',
'pp', 'tt', 'kk', 'mm', 'nn', 'ng', 'nj', 'nk', 'ns', 'nt', 'll', 'lt', 'lk', 'lj', 'ls', 'lv', 'lm', 'ln', 'rv', 'rr', 'rk']
C_all = C_no_loan + C_double + C_foreign
C_all_native = C_no_loan + C_double
V_mono_back_only = ['a', 'o', 'u']
V_mono_front_only = ['ä', 'ö', 'y']
V_mono_composite = ['e', 'i']
V_mono_back = V_mono_back_only + V_mono_composite
V_mono_front = V_mono_front_only + V_mono_composite
V_mono_all = V_mono_back_only + V_mono_front_only + V_mono_composite
V_diph_back = ['aa', 'ai', 'au', 'oi', 'oo', 'ou', 'ui', 'uo','uu']
V_diph_front = ['ää', 'äi', 'äy', 'öi', 'öö', 'öy', 'yi', 'yö', 'yy']
V_diph_composite = ['ee', 'ei', 'eu', 'ey', 'ie', 'ii', 'iu', 'iy']
V_back = V_mono_back + V_diph_back
V_front = V_mono_front + V_diph_front
V_all = V_mono_all + V_diph_back + V_diph_composite + V_diph_front
# C + V + C + V + (C)
for c_init in C_no_loan_initial:
for v in V_back:
for c_final in C_final:
chunk = c_init + v + c_final # CVC
if len(chunk) == limit:
sanakirja.append(chunk)
for other_c in C_all_native:
for v_2 in V_back:
chunk = c_init + v + other_c + v_2 # CVCV
if len(chunk) == limit:
sanakirja.append(chunk)
elif len(chunk) < limit:
for c_final in C_final:
chunk += c_final # CVCVC
if len(chunk) == limit:
sanakirja.append(chunk)
for v in V_front:
for c_final in C_final:
chunk = c_init + v + c_final # CVC
if len(chunk) == limit:
sanakirja.append(chunk)
for other_c in C_all_native:
for v_2 in V_front:
chunk = c_init + v + other_c + v_2 # CVCV
if len(chunk) == limit:
sanakirja.append(chunk)
elif len(chunk) < limit:
for c_final in C_final:
chunk += c_final # CVCVC
if len(chunk) == limit:
sanakirja.append(chunk)
# V + C + V + (C)
for v in V_back:
for other_c in C_all_native:
for v_2 in V_back:
chunk = v + other_c + v_2 # VCV
if len(chunk) == limit:
sanakirja.append(chunk)
elif len(chunk) < limit:
for c_final2 in C_final:
chunk += c_final2 # VCVC
if len(chunk) == limit:
sanakirja.append(chunk)
for v in V_front:
for other_c in C_all_native:
for v_2 in V_front:
chunk = v + other_c + v_2 # VCV
if len(chunk) == limit:
sanakirja.append(chunk)
elif len(chunk) < limit:
for c_final2 in C_final:
chunk += c_final2 # VCVC
if len(chunk) == limit:
sanakirja.append(chunk)
print(len(sanakirja))
# remove redundancy
output = list(dict.fromkeys(sanakirja))
print(len(output))
return output
list_of_finnish_words = sana_gen(4)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment