feat: add code for finetuning moondream

2024-12-08 17:33:37 +00:00
parent 4bf2c89bf8
commit 6b53cb0411
11 changed files with 261 additions and 0 deletions
--- a/moondream/hyperparams.py
+++ b/moondream/hyperparams.py
@@ -0,0 +1,31 @@
+TEST_SIZE = 0.2
+
+# Number of times to repeat the training dataset. Increasing this may cause the model to overfit or
+# lose generalization due to catastrophic forgetting. Decreasing it may cause the model to underfit.
+EPOCHS = 1
+
+# Number of samples to process in each batch. Set this to the highest value that doesn't cause an
+# out-of-memory error. Decrease it if you're running out of memory.
+BATCH_SIZE = 8
+
+# Number of batches to process before updating the model. You can use this to simulate a higher batch
+# size than your GPU can handle. Set this to 1 to disable gradient accumulation.
+GRAD_ACCUM_STEPS = 2
+
+# Learning rate for the Adam optimizer. Needs to be tuned on a case-by-case basis. As a general rule
+# of thumb, increase it by 1.4 times each time you double the effective batch size.
+#
+# Source: https://www.cs.princeton.edu/~smalladi/blog/2024/01/22/SDEs-ScalingRules/
+#
+# Note that we linearly warm the learning rate up from 0.1 * LR to LR over the first 10% of the
+# training run, and then decay it back to 0.1 * LR over the last 90% of the training run using a
+# cosine schedule.
+LR = 1e-5
+
+# Whether to use Weights and Biases for logging training metrics.
+USE_WANDB = False
+
+ANSWER_EOS = "<|endoftext|>"
+
+# Number of tokens used to represent each image.
+IMG_TOKENS = 729