remove the tinyshakespeare dataset until i can bring it back later in a nicer form, otherwise right now we just have a ton of copy paste code here

This commit is contained in:
Andrej Karpathy
2023-08-13 02:18:30 +00:00
parent f5fc0c245f
commit 00a61dc7f9
2 changed files with 1 additions and 144 deletions
+1 -4
View File
@@ -29,7 +29,6 @@ from torch.distributed import destroy_process_group, init_process_group
from torch.nn.parallel import DistributedDataParallel as DDP
from tinystories import Task
from tinyshakespeare import ShakespeareTask
# -----------------------------------------------------------------------------
# I/O
@@ -49,7 +48,6 @@ batch_size = 128 # if gradient_accumulation_steps > 1, this is the micro-batch
max_seq_len = 256
vocab_source = "custom" # llama2|custom; use Lllama 2 vocab from Meta, or custom trained
vocab_size = 512
dataset = "tinystories" # tinystories|tinyshakespeare
# model
dim = 288
n_layers = 6
@@ -129,9 +127,8 @@ ctx = (
)
# task-specific setup
task = {'tinystories': Task, 'tinyshakespeare': ShakespeareTask}[dataset]
iter_batches = partial(
task.iter_batches,
Task.iter_batches,
batch_size=batch_size,
max_seq_len=max_seq_len,
vocab_size=vocab_size,