mirror-ghostty/benchmark-sources.txt

15 lines
988 B
Plaintext

# https://chatgpt.com/share/68c2da5f-65c8-800f-b3e4-55cdff7150cb
zig-out/bin/ghostty-gen +utf8 | head -c 200000000 > data.txt
curl -O https://www.gutenberg.org/cache/epub/30/pg30.txt
curl -O https://www.gutenberg.org/cache/epub/24264/pg24264.txt # 紅樓夢 by Xueqin Cao
# From https://linguatools.org/tools/corpora/wikipedia-monolingual-corpora/
curl -L "https://www.dropbox.com/scl/fi/86gjpfzopssavk2nzo69u/arwiki-20180920-corpus.xml.bz2?dl=1&e=1&file_subpath=%2Fdata&rlkey=dmjlaw1xegg8vsje4xrn040v8" | bzcat | head -c 1000000000 > arwiki-20180920-corpus.xml # arabic
curl -L "https://www.dropbox.com/scl/fi/la1nvupgk2honb3n6m9zc/enwiki-20181001-corpus.xml.bz2?rlkey=8vg4vokbaijh1lg5lw3ytc864&e=1&dl=1" | bzcat | head -c 1000000000 > enwiki-20181001-corpus.xml # english
curl -L "https://www.dropbox.com/scl/fi/vru4zxv5qff1klod9xiht/jawiki-20181001-corpus.xml.bz2?rlkey=utuuooiwyupws3x5517u8n8jl&e=1&dl=1" | bzcat | head -c 1000000000 > jawiki-20181001-corpus.xml # japanese