remove the tinyshakespeare dataset until i can bring it back later in a nicer form, otherwise right now we just have a ton of copy paste code here

2023-08-13 02:18:30 +00:00
parent f5fc0c245f
commit 00a61dc7f9
2 changed files with 1 additions and 144 deletions
@@ -29,7 +29,6 @@ from torch.distributed import destroy_process_group, init_process_group
 from torch.nn.parallel import DistributedDataParallel as DDP

 from tinystories import Task
-from tinyshakespeare import ShakespeareTask

 # -----------------------------------------------------------------------------
 # I/O
@@ -49,7 +48,6 @@ batch_size = 128  # if gradient_accumulation_steps > 1, this is the micro-batch
 max_seq_len = 256
 vocab_source = "custom" # llama2|custom; use Lllama 2 vocab from Meta, or custom trained
 vocab_size = 512
-dataset = "tinystories"  # tinystories|tinyshakespeare
 # model
 dim = 288
 n_layers = 6
@@ -129,9 +127,8 @@ ctx = (
 )

 # task-specific setup
-task = {'tinystories': Task, 'tinyshakespeare': ShakespeareTask}[dataset]
 iter_batches = partial(
-    task.iter_batches,
+    Task.iter_batches,
    batch_size=batch_size,
    max_seq_len=max_seq_len,
    vocab_size=vocab_size,