import pathlib

from absl import app
from absl import flags

from scripts import utils

# egrep -o 'https?://[^ ]+vec.gz' links.txt
# https://github.com/facebookresearch/fastText/blob/master/docs/crawl-vectors.md
LINKS = [
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.af.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.sq.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.als.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.am.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ar.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.an.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.hy.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.as.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ast.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.az.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ba.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.eu.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.bar.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.be.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.bn.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.bh.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.bpy.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.bs.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.br.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.bg.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.my.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ca.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ceb.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.bcl.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ce.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.zh.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.cv.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.co.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.hr.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.cs.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.da.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.dv.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.nl.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.pa.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.arz.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.eml.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.en.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.myv.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.eo.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.et.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.hif.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.fi.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.fr.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.gl.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ka.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.de.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.gom.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.el.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.gu.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ht.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.he.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.mrj.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.hi.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.hu.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.is.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.io.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ilo.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.id.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ia.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ga.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.it.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ja.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.jv.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.kn.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.pam.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.kk.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.km.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ky.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ko.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ku.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ckb.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.la.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.lv.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.li.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.lt.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.lmo.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.nds.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.lb.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.mk.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.mai.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.mg.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ms.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ml.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.mt.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.gv.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.mr.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.mzn.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.mhr.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.min.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.xmf.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.mwl.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.mn.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.nah.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.nap.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ne.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.new.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.frr.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.nso.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.no.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.nn.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.oc.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.or.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.os.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.pfl.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ps.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.fa.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.pms.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.pl.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.pt.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.qu.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ro.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.rm.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ru.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.sah.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.sa.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.sc.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.sco.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.gd.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.sr.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.sh.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.scn.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.sd.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.si.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.sk.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.sl.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.so.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.azb.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.es.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.su.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.sw.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.sv.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.tl.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.tg.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ta.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.tt.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.te.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.th.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.bo.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.tr.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.tk.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.uk.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.hsb.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ur.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ug.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.uz.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.vec.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.vi.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.vo.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.wa.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.war.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.cy.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.vls.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.fy.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.pnb.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.yi.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.yo.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.diq.300.vec.gz",
    "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.zea.300.vec.gz",
]

CODE_2_LANG = {
    "af": "Afrikaans",
    "aii": "Assyrian",
    "ajp": "South_Levantine_Arabic",
    "akk": "Akkadian",
    "am": "Amharic",
    "apu": "Apurina",
    "aqz": "Akuntsu",
    "ar": "Arabic",
    "be": "Belarusian",
    "bg": "Bulgarian",
    "bho": "Bhojpuri",
    "bm": "Bambara",
    "br": "Breton",
    "bxr": "Buryat",
    "ca": "Catalan",
    "ckt": "Chukchi",
    "cop": "Coptic",
    "cs": "Czech",
    "cu": "Old_Church_Slavonic",
    "cy": "Welsh",
    "da": "Danish",
    "de": "German",
    "el": "Greek",
    "en": "English",
    "es": "Spanish",
    "et": "Estonian",
    "eu": "Basque",
    "fa": "Persian",
    "fi": "Finnish",
    "fo": "Faroese",
    "fr": "French",
    "fro": "Old_French",
    "ga": "Irish",
    "gd": "Scottish_Gaelic",
    "gl": "Galician",
    "got": "Gothic",
    "grc": "Ancient_Greek",
    "gsw": "Swiss_German",
    "gun": "Mbya_Guarani",
    "gv": "Manx",
    "he": "Hebrew",
    "hi": "Hindi",
    "hr": "Croatian",
    "hsb": "Upper_Sorbian",
    "hu": "Hungarian",
    "hy": "Armenian",
    "id": "Indonesian",
    "is": "Icelandic",
    "it": "Italian",
    "ja": "Japanese",
    "kfm": "Khunsari",
    "kk": "Kazakh",
    "kmr": "Kurmanji",
    "ko": "Korean",
    "koi": "Komi_Permyak",
    "kpv": "Komi_Zyrian",
    "krl": "Karelian",
    "la": "Latin",
    "lt": "Lithuanian",
    "lv": "Latvian",
    "lzh": "Classical_Chinese",
    "mdf": "Moksha",
    "mr": "Marathi",
    "mt": "Maltese",
    "myu": "Munduruku",
    "myv": "Erzya",
    "nl": "Dutch",
    "no": "Norwegian",
    "nyq": "Nayini",
    "olo": "Livvi",
    "orv": "Old_Russian",
    "otk": "Old_Turkish",
    "pcm": "Naija",
    "pl": "Polish",
    "pt": "Portuguese",
    "qhe": "Hindi_English",
    "qtd": "Turkish_German",
    "ro": "Romanian",
    "ru": "Russian",
    "sa": "Sanskrit",
    "sk": "Slovak",
    "sl": "Slovenian",
    "sme": "North_Sami",
    "sms": "Skolt_Sami",
    "soj": "Soi",
    "sq": "Albanian",
    "sr": "Serbian",
    "sv": "Swedish",
    "swl": "Swedish_Sign_Language",
    "ta": "Tamil",
    "te": "Telugu",
    "th": "Thai",
    "tl": "Tagalog",
    "tpn": "Tupinamba",
    "tr": "Turkish",
    "ug": "Uyghur",
    "uk": "Ukrainian",
    "ur": "Urdu",
    "vi": "Vietnamese",
    "wbp": "Warlpiri",
    "wo": "Wolof",
    "yo": "Yoruba",
    "yue": "Cantonese",
    "zh": "Chinese",
}

FLAGS = flags.FLAGS
flags.DEFINE_string(name="output_dir", default="",
                    help="Path to store embeddings.")


def run(_):
    output_dir = pathlib.Path(FLAGS.output_dir)
    for link in LINKS:
        lang_code = link.split(".")[-4]

        if lang_code not in CODE_2_LANG:
            print(f"Unknown code {lang_code}.")
            continue

        output_file = output_dir / CODE_2_LANG[lang_code]
        output_file.mkdir(exist_ok=True, parents=True)
        if (output_file / 'vectors.vec.gz').exists():
            print(f"Vectors for {CODE_2_LANG[lang_code]} already exists, skipping.")
            continue

        utils.execute_command(f"wget -O {output_file / 'vectors.vec.gz'} {link}")


def main():
    app.run(run)


if __name__ == "__main__":
    main()