1461 lines
37 KiB
JSON
1461 lines
37 KiB
JSON
{
|
|
"best_metric": 0.81632209,
|
|
"best_model_checkpoint": "/mnt/workspace/output/qwen2-7b-instruct/v0-20240906-160301/checkpoint-1300",
|
|
"epoch": 0.9782710939704637,
|
|
"eval_steps": 100,
|
|
"global_step": 1300,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"acc": 0.56549656,
|
|
"epoch": 0.0007525162261311259,
|
|
"grad_norm": 2.546875,
|
|
"learning_rate": 1.25e-06,
|
|
"loss": 1.70594239,
|
|
"memory(GiB)": 16.58,
|
|
"step": 1,
|
|
"train_speed(iter/s)": 0.138978
|
|
},
|
|
{
|
|
"acc": 0.62718646,
|
|
"epoch": 0.007525162261311259,
|
|
"grad_norm": 1.6484375,
|
|
"learning_rate": 1.25e-05,
|
|
"loss": 1.38872963,
|
|
"memory(GiB)": 20.36,
|
|
"step": 10,
|
|
"train_speed(iter/s)": 0.177611
|
|
},
|
|
{
|
|
"acc": 0.65103383,
|
|
"epoch": 0.015050324522622519,
|
|
"grad_norm": 1.7578125,
|
|
"learning_rate": 2.5e-05,
|
|
"loss": 1.2758357,
|
|
"memory(GiB)": 20.36,
|
|
"step": 20,
|
|
"train_speed(iter/s)": 0.179256
|
|
},
|
|
{
|
|
"acc": 0.6864974,
|
|
"epoch": 0.022575486783933778,
|
|
"grad_norm": 0.703125,
|
|
"learning_rate": 3.7500000000000003e-05,
|
|
"loss": 1.08145266,
|
|
"memory(GiB)": 21.71,
|
|
"step": 30,
|
|
"train_speed(iter/s)": 0.180531
|
|
},
|
|
{
|
|
"acc": 0.70903621,
|
|
"epoch": 0.030100649045245037,
|
|
"grad_norm": 1.015625,
|
|
"learning_rate": 5e-05,
|
|
"loss": 0.98728647,
|
|
"memory(GiB)": 21.71,
|
|
"step": 40,
|
|
"train_speed(iter/s)": 0.17987
|
|
},
|
|
{
|
|
"acc": 0.72273793,
|
|
"epoch": 0.037625811306556296,
|
|
"grad_norm": 0.7265625,
|
|
"learning_rate": 4.9992563706868603e-05,
|
|
"loss": 0.91920843,
|
|
"memory(GiB)": 16.47,
|
|
"step": 50,
|
|
"train_speed(iter/s)": 0.180818
|
|
},
|
|
{
|
|
"acc": 0.71242127,
|
|
"epoch": 0.045150973567867556,
|
|
"grad_norm": 0.80859375,
|
|
"learning_rate": 4.997025925135086e-05,
|
|
"loss": 0.95391417,
|
|
"memory(GiB)": 16.47,
|
|
"step": 60,
|
|
"train_speed(iter/s)": 0.181081
|
|
},
|
|
{
|
|
"acc": 0.72270803,
|
|
"epoch": 0.052676135829178815,
|
|
"grad_norm": 0.78515625,
|
|
"learning_rate": 4.99330999024443e-05,
|
|
"loss": 0.92681618,
|
|
"memory(GiB)": 16.47,
|
|
"step": 70,
|
|
"train_speed(iter/s)": 0.181559
|
|
},
|
|
{
|
|
"acc": 0.71700387,
|
|
"epoch": 0.060201298090490074,
|
|
"grad_norm": 0.890625,
|
|
"learning_rate": 4.988110776637383e-05,
|
|
"loss": 0.92395344,
|
|
"memory(GiB)": 16.47,
|
|
"step": 80,
|
|
"train_speed(iter/s)": 0.182177
|
|
},
|
|
{
|
|
"acc": 0.72583823,
|
|
"epoch": 0.06772646035180134,
|
|
"grad_norm": 0.8125,
|
|
"learning_rate": 4.981431377344059e-05,
|
|
"loss": 0.9007925,
|
|
"memory(GiB)": 17.57,
|
|
"step": 90,
|
|
"train_speed(iter/s)": 0.182174
|
|
},
|
|
{
|
|
"acc": 0.72602968,
|
|
"epoch": 0.07525162261311259,
|
|
"grad_norm": 0.734375,
|
|
"learning_rate": 4.973275765962145e-05,
|
|
"loss": 0.91265221,
|
|
"memory(GiB)": 17.57,
|
|
"step": 100,
|
|
"train_speed(iter/s)": 0.182475
|
|
},
|
|
{
|
|
"epoch": 0.07525162261311259,
|
|
"eval_acc": 0.726610340283699,
|
|
"eval_loss": 0.9165924787521362,
|
|
"eval_runtime": 17.5418,
|
|
"eval_samples_per_second": 12.199,
|
|
"eval_steps_per_second": 12.199,
|
|
"step": 100
|
|
},
|
|
{
|
|
"acc": 0.73096576,
|
|
"epoch": 0.08277678487442386,
|
|
"grad_norm": 0.79296875,
|
|
"learning_rate": 4.963648794292992e-05,
|
|
"loss": 0.88530436,
|
|
"memory(GiB)": 17.57,
|
|
"step": 110,
|
|
"train_speed(iter/s)": 0.177125
|
|
},
|
|
{
|
|
"acc": 0.73312225,
|
|
"epoch": 0.09030194713573511,
|
|
"grad_norm": 0.7890625,
|
|
"learning_rate": 4.952556189455266e-05,
|
|
"loss": 0.87520084,
|
|
"memory(GiB)": 18.79,
|
|
"step": 120,
|
|
"train_speed(iter/s)": 0.177734
|
|
},
|
|
{
|
|
"acc": 0.7300118,
|
|
"epoch": 0.09782710939704638,
|
|
"grad_norm": 1.078125,
|
|
"learning_rate": 4.940004550477858e-05,
|
|
"loss": 0.89652071,
|
|
"memory(GiB)": 18.79,
|
|
"step": 130,
|
|
"train_speed(iter/s)": 0.178221
|
|
},
|
|
{
|
|
"acc": 0.72675986,
|
|
"epoch": 0.10535227165835763,
|
|
"grad_norm": 0.8359375,
|
|
"learning_rate": 4.9260013443741074e-05,
|
|
"loss": 0.88306637,
|
|
"memory(GiB)": 18.79,
|
|
"step": 140,
|
|
"train_speed(iter/s)": 0.178327
|
|
},
|
|
{
|
|
"acc": 0.7353128,
|
|
"epoch": 0.1128774339196689,
|
|
"grad_norm": 0.8203125,
|
|
"learning_rate": 4.9105549016996414e-05,
|
|
"loss": 0.87760782,
|
|
"memory(GiB)": 18.79,
|
|
"step": 150,
|
|
"train_speed(iter/s)": 0.178348
|
|
},
|
|
{
|
|
"acc": 0.73115945,
|
|
"epoch": 0.12040259618098015,
|
|
"grad_norm": 0.96484375,
|
|
"learning_rate": 4.893674411596507e-05,
|
|
"loss": 0.88493299,
|
|
"memory(GiB)": 18.79,
|
|
"step": 160,
|
|
"train_speed(iter/s)": 0.179132
|
|
},
|
|
{
|
|
"acc": 0.73725371,
|
|
"epoch": 0.1279277584422914,
|
|
"grad_norm": 1.078125,
|
|
"learning_rate": 4.875369916326512e-05,
|
|
"loss": 0.8738554,
|
|
"memory(GiB)": 18.79,
|
|
"step": 170,
|
|
"train_speed(iter/s)": 0.180388
|
|
},
|
|
{
|
|
"acc": 0.73314247,
|
|
"epoch": 0.13545292070360268,
|
|
"grad_norm": 0.8984375,
|
|
"learning_rate": 4.855652305297052e-05,
|
|
"loss": 0.87920074,
|
|
"memory(GiB)": 18.79,
|
|
"step": 180,
|
|
"train_speed(iter/s)": 0.181303
|
|
},
|
|
{
|
|
"acc": 0.73699288,
|
|
"epoch": 0.14297808296491393,
|
|
"grad_norm": 0.8828125,
|
|
"learning_rate": 4.834533308582965e-05,
|
|
"loss": 0.86675138,
|
|
"memory(GiB)": 18.79,
|
|
"step": 190,
|
|
"train_speed(iter/s)": 0.181264
|
|
},
|
|
{
|
|
"acc": 0.7354795,
|
|
"epoch": 0.15050324522622519,
|
|
"grad_norm": 0.796875,
|
|
"learning_rate": 4.8120254899482665e-05,
|
|
"loss": 0.85725517,
|
|
"memory(GiB)": 18.79,
|
|
"step": 200,
|
|
"train_speed(iter/s)": 0.182005
|
|
},
|
|
{
|
|
"epoch": 0.15050324522622519,
|
|
"eval_acc": 0.7330827067669173,
|
|
"eval_loss": 0.8884855508804321,
|
|
"eval_runtime": 17.6396,
|
|
"eval_samples_per_second": 12.132,
|
|
"eval_steps_per_second": 12.132,
|
|
"step": 200
|
|
},
|
|
{
|
|
"acc": 0.7260026,
|
|
"epoch": 0.15802840748753644,
|
|
"grad_norm": 1.0078125,
|
|
"learning_rate": 4.788142239371927e-05,
|
|
"loss": 0.88581524,
|
|
"memory(GiB)": 18.79,
|
|
"step": 210,
|
|
"train_speed(iter/s)": 0.179819
|
|
},
|
|
{
|
|
"acc": 0.73441463,
|
|
"epoch": 0.16555356974884772,
|
|
"grad_norm": 1.2109375,
|
|
"learning_rate": 4.762897765082124e-05,
|
|
"loss": 0.87213602,
|
|
"memory(GiB)": 18.79,
|
|
"step": 220,
|
|
"train_speed(iter/s)": 0.180055
|
|
},
|
|
{
|
|
"acc": 0.73429847,
|
|
"epoch": 0.17307873201015897,
|
|
"grad_norm": 0.859375,
|
|
"learning_rate": 4.7363070851037175e-05,
|
|
"loss": 0.85944548,
|
|
"memory(GiB)": 18.79,
|
|
"step": 230,
|
|
"train_speed(iter/s)": 0.179967
|
|
},
|
|
{
|
|
"acc": 0.73485651,
|
|
"epoch": 0.18060389427147022,
|
|
"grad_norm": 1.0703125,
|
|
"learning_rate": 4.708386018323979e-05,
|
|
"loss": 0.87450695,
|
|
"memory(GiB)": 18.79,
|
|
"step": 240,
|
|
"train_speed(iter/s)": 0.180087
|
|
},
|
|
{
|
|
"acc": 0.73376856,
|
|
"epoch": 0.18812905653278147,
|
|
"grad_norm": 1.1015625,
|
|
"learning_rate": 4.6791511750818784e-05,
|
|
"loss": 0.87443905,
|
|
"memory(GiB)": 18.79,
|
|
"step": 250,
|
|
"train_speed(iter/s)": 0.18021
|
|
},
|
|
{
|
|
"acc": 0.72789798,
|
|
"epoch": 0.19565421879409275,
|
|
"grad_norm": 1.03125,
|
|
"learning_rate": 4.6486199472865344e-05,
|
|
"loss": 0.89129868,
|
|
"memory(GiB)": 18.79,
|
|
"step": 260,
|
|
"train_speed(iter/s)": 0.180445
|
|
},
|
|
{
|
|
"acc": 0.73053126,
|
|
"epoch": 0.203179381055404,
|
|
"grad_norm": 1.28125,
|
|
"learning_rate": 4.6168104980707107e-05,
|
|
"loss": 0.85515051,
|
|
"memory(GiB)": 18.79,
|
|
"step": 270,
|
|
"train_speed(iter/s)": 0.180552
|
|
},
|
|
{
|
|
"acc": 0.74167385,
|
|
"epoch": 0.21070454331671526,
|
|
"grad_norm": 1.3046875,
|
|
"learning_rate": 4.583741750985505e-05,
|
|
"loss": 0.84416103,
|
|
"memory(GiB)": 18.79,
|
|
"step": 280,
|
|
"train_speed(iter/s)": 0.180675
|
|
},
|
|
{
|
|
"acc": 0.74637232,
|
|
"epoch": 0.21822970557802654,
|
|
"grad_norm": 1.03125,
|
|
"learning_rate": 4.5494333787426635e-05,
|
|
"loss": 0.82163248,
|
|
"memory(GiB)": 18.79,
|
|
"step": 290,
|
|
"train_speed(iter/s)": 0.180719
|
|
},
|
|
{
|
|
"acc": 0.73941607,
|
|
"epoch": 0.2257548678393378,
|
|
"grad_norm": 0.92578125,
|
|
"learning_rate": 4.5139057915112135e-05,
|
|
"loss": 0.87674456,
|
|
"memory(GiB)": 18.79,
|
|
"step": 300,
|
|
"train_speed(iter/s)": 0.180666
|
|
},
|
|
{
|
|
"epoch": 0.2257548678393378,
|
|
"eval_acc": 0.7357569180683667,
|
|
"eval_loss": 0.8706684112548828,
|
|
"eval_runtime": 17.5139,
|
|
"eval_samples_per_second": 12.219,
|
|
"eval_steps_per_second": 12.219,
|
|
"step": 300
|
|
},
|
|
{
|
|
"acc": 0.73469014,
|
|
"epoch": 0.23328003010064904,
|
|
"grad_norm": 1.0625,
|
|
"learning_rate": 4.477180124775388e-05,
|
|
"loss": 0.85682964,
|
|
"memory(GiB)": 18.79,
|
|
"step": 310,
|
|
"train_speed(iter/s)": 0.17887
|
|
},
|
|
{
|
|
"acc": 0.72672372,
|
|
"epoch": 0.2408051923619603,
|
|
"grad_norm": 1.0625,
|
|
"learning_rate": 4.43927822676105e-05,
|
|
"loss": 0.89327288,
|
|
"memory(GiB)": 18.79,
|
|
"step": 320,
|
|
"train_speed(iter/s)": 0.179041
|
|
},
|
|
{
|
|
"acc": 0.73567257,
|
|
"epoch": 0.24833035462327158,
|
|
"grad_norm": 1.3203125,
|
|
"learning_rate": 4.400222645438109e-05,
|
|
"loss": 0.85311451,
|
|
"memory(GiB)": 18.79,
|
|
"step": 330,
|
|
"train_speed(iter/s)": 0.179189
|
|
},
|
|
{
|
|
"acc": 0.7419868,
|
|
"epoch": 0.2558555168845828,
|
|
"grad_norm": 1.15625,
|
|
"learning_rate": 4.3600366151066575e-05,
|
|
"loss": 0.82233105,
|
|
"memory(GiB)": 18.79,
|
|
"step": 340,
|
|
"train_speed(iter/s)": 0.179204
|
|
},
|
|
{
|
|
"acc": 0.74832144,
|
|
"epoch": 0.2633806791458941,
|
|
"grad_norm": 1.0,
|
|
"learning_rate": 4.3187440425747994e-05,
|
|
"loss": 0.81996908,
|
|
"memory(GiB)": 18.79,
|
|
"step": 350,
|
|
"train_speed(iter/s)": 0.179326
|
|
},
|
|
{
|
|
"acc": 0.73445973,
|
|
"epoch": 0.27090584140720536,
|
|
"grad_norm": 1.2421875,
|
|
"learning_rate": 4.2763694929364166e-05,
|
|
"loss": 0.85199509,
|
|
"memory(GiB)": 18.79,
|
|
"step": 360,
|
|
"train_speed(iter/s)": 0.179521
|
|
},
|
|
{
|
|
"acc": 0.74308429,
|
|
"epoch": 0.2784310036685166,
|
|
"grad_norm": 1.1953125,
|
|
"learning_rate": 4.232938174957302e-05,
|
|
"loss": 0.83573999,
|
|
"memory(GiB)": 18.79,
|
|
"step": 370,
|
|
"train_speed(iter/s)": 0.179651
|
|
},
|
|
{
|
|
"acc": 0.7366713,
|
|
"epoch": 0.28595616592982787,
|
|
"grad_norm": 1.46875,
|
|
"learning_rate": 4.1884759260783816e-05,
|
|
"loss": 0.86369457,
|
|
"memory(GiB)": 18.79,
|
|
"step": 380,
|
|
"train_speed(iter/s)": 0.179604
|
|
},
|
|
{
|
|
"acc": 0.74553895,
|
|
"epoch": 0.29348132819113915,
|
|
"grad_norm": 1.21875,
|
|
"learning_rate": 4.143009197044932e-05,
|
|
"loss": 0.82978315,
|
|
"memory(GiB)": 19.6,
|
|
"step": 390,
|
|
"train_speed(iter/s)": 0.179568
|
|
},
|
|
{
|
|
"acc": 0.73931322,
|
|
"epoch": 0.30100649045245037,
|
|
"grad_norm": 1.265625,
|
|
"learning_rate": 4.0965650361709363e-05,
|
|
"loss": 0.85100927,
|
|
"memory(GiB)": 19.6,
|
|
"step": 400,
|
|
"train_speed(iter/s)": 0.17966
|
|
},
|
|
{
|
|
"epoch": 0.30100649045245037,
|
|
"eval_acc": 0.7413378807844353,
|
|
"eval_loss": 0.8540410399436951,
|
|
"eval_runtime": 17.4936,
|
|
"eval_samples_per_second": 12.233,
|
|
"eval_steps_per_second": 12.233,
|
|
"step": 400
|
|
},
|
|
{
|
|
"acc": 0.73913217,
|
|
"epoch": 0.30853165271376165,
|
|
"grad_norm": 1.0625,
|
|
"learning_rate": 4.0491710732479566e-05,
|
|
"loss": 0.83017845,
|
|
"memory(GiB)": 19.6,
|
|
"step": 410,
|
|
"train_speed(iter/s)": 0.178277
|
|
},
|
|
{
|
|
"acc": 0.73962994,
|
|
"epoch": 0.3160568149750729,
|
|
"grad_norm": 1.2265625,
|
|
"learning_rate": 4.000855503108069e-05,
|
|
"loss": 0.84346523,
|
|
"memory(GiB)": 19.6,
|
|
"step": 420,
|
|
"train_speed(iter/s)": 0.178475
|
|
},
|
|
{
|
|
"acc": 0.74960279,
|
|
"epoch": 0.32358197723638416,
|
|
"grad_norm": 1.3671875,
|
|
"learning_rate": 3.951647068850662e-05,
|
|
"loss": 0.79658604,
|
|
"memory(GiB)": 19.6,
|
|
"step": 430,
|
|
"train_speed(iter/s)": 0.17856
|
|
},
|
|
{
|
|
"acc": 0.74074645,
|
|
"epoch": 0.33110713949769544,
|
|
"grad_norm": 1.1796875,
|
|
"learning_rate": 3.901575044743072e-05,
|
|
"loss": 0.86585417,
|
|
"memory(GiB)": 19.6,
|
|
"step": 440,
|
|
"train_speed(iter/s)": 0.178589
|
|
},
|
|
{
|
|
"acc": 0.74740105,
|
|
"epoch": 0.33863230175900666,
|
|
"grad_norm": 1.0625,
|
|
"learning_rate": 3.8506692188052116e-05,
|
|
"loss": 0.8223978,
|
|
"memory(GiB)": 19.6,
|
|
"step": 450,
|
|
"train_speed(iter/s)": 0.178586
|
|
},
|
|
{
|
|
"acc": 0.75465593,
|
|
"epoch": 0.34615746402031794,
|
|
"grad_norm": 1.015625,
|
|
"learning_rate": 3.798959875088584e-05,
|
|
"loss": 0.80566111,
|
|
"memory(GiB)": 19.6,
|
|
"step": 460,
|
|
"train_speed(iter/s)": 0.178662
|
|
},
|
|
{
|
|
"acc": 0.74560366,
|
|
"epoch": 0.3536826262816292,
|
|
"grad_norm": 1.140625,
|
|
"learning_rate": 3.7464777756601905e-05,
|
|
"loss": 0.83098888,
|
|
"memory(GiB)": 19.6,
|
|
"step": 470,
|
|
"train_speed(iter/s)": 0.178743
|
|
},
|
|
{
|
|
"acc": 0.7393681,
|
|
"epoch": 0.36120778854294044,
|
|
"grad_norm": 1.671875,
|
|
"learning_rate": 3.693254142302071e-05,
|
|
"loss": 0.82851734,
|
|
"memory(GiB)": 19.6,
|
|
"step": 480,
|
|
"train_speed(iter/s)": 0.178887
|
|
},
|
|
{
|
|
"acc": 0.74919853,
|
|
"epoch": 0.3687329508042517,
|
|
"grad_norm": 1.15625,
|
|
"learning_rate": 3.639320637937357e-05,
|
|
"loss": 0.83529673,
|
|
"memory(GiB)": 19.6,
|
|
"step": 490,
|
|
"train_speed(iter/s)": 0.178857
|
|
},
|
|
{
|
|
"acc": 0.74647813,
|
|
"epoch": 0.37625811306556295,
|
|
"grad_norm": 1.1171875,
|
|
"learning_rate": 3.5847093477938956e-05,
|
|
"loss": 0.83272486,
|
|
"memory(GiB)": 19.6,
|
|
"step": 500,
|
|
"train_speed(iter/s)": 0.17888
|
|
},
|
|
{
|
|
"epoch": 0.37625811306556295,
|
|
"eval_acc": 0.7429656615766219,
|
|
"eval_loss": 0.8442411422729492,
|
|
"eval_runtime": 17.5096,
|
|
"eval_samples_per_second": 12.222,
|
|
"eval_steps_per_second": 12.222,
|
|
"step": 500
|
|
},
|
|
{
|
|
"acc": 0.73344955,
|
|
"epoch": 0.38378327532687423,
|
|
"grad_norm": 1.1875,
|
|
"learning_rate": 3.529452760316629e-05,
|
|
"loss": 0.85569019,
|
|
"memory(GiB)": 19.6,
|
|
"step": 510,
|
|
"train_speed(iter/s)": 0.177842
|
|
},
|
|
{
|
|
"acc": 0.73404479,
|
|
"epoch": 0.3913084375881855,
|
|
"grad_norm": 1.15625,
|
|
"learning_rate": 3.473583747840112e-05,
|
|
"loss": 0.87756214,
|
|
"memory(GiB)": 19.6,
|
|
"step": 520,
|
|
"train_speed(iter/s)": 0.178009
|
|
},
|
|
{
|
|
"acc": 0.74784298,
|
|
"epoch": 0.39883359984949673,
|
|
"grad_norm": 1.1953125,
|
|
"learning_rate": 3.4171355470326414e-05,
|
|
"loss": 0.81443148,
|
|
"memory(GiB)": 19.6,
|
|
"step": 530,
|
|
"train_speed(iter/s)": 0.177998
|
|
},
|
|
{
|
|
"acc": 0.73395772,
|
|
"epoch": 0.406358762110808,
|
|
"grad_norm": 1.1796875,
|
|
"learning_rate": 3.360141739123655e-05,
|
|
"loss": 0.87308407,
|
|
"memory(GiB)": 19.6,
|
|
"step": 540,
|
|
"train_speed(iter/s)": 0.178089
|
|
},
|
|
{
|
|
"acc": 0.74080787,
|
|
"epoch": 0.4138839243721193,
|
|
"grad_norm": 1.3046875,
|
|
"learning_rate": 3.302636229926135e-05,
|
|
"loss": 0.83921108,
|
|
"memory(GiB)": 19.6,
|
|
"step": 550,
|
|
"train_speed(iter/s)": 0.178188
|
|
},
|
|
{
|
|
"acc": 0.74777498,
|
|
"epoch": 0.4214090866334305,
|
|
"grad_norm": 1.125,
|
|
"learning_rate": 3.244653229665925e-05,
|
|
"loss": 0.80149298,
|
|
"memory(GiB)": 19.6,
|
|
"step": 560,
|
|
"train_speed(iter/s)": 0.178273
|
|
},
|
|
{
|
|
"acc": 0.74247198,
|
|
"epoch": 0.4289342488947418,
|
|
"grad_norm": 1.1796875,
|
|
"learning_rate": 3.1862272326299526e-05,
|
|
"loss": 0.84387398,
|
|
"memory(GiB)": 19.6,
|
|
"step": 570,
|
|
"train_speed(iter/s)": 0.178376
|
|
},
|
|
{
|
|
"acc": 0.73954334,
|
|
"epoch": 0.4364594111560531,
|
|
"grad_norm": 1.40625,
|
|
"learning_rate": 3.1273929966454535e-05,
|
|
"loss": 0.83296976,
|
|
"memory(GiB)": 19.6,
|
|
"step": 580,
|
|
"train_speed(iter/s)": 0.178516
|
|
},
|
|
{
|
|
"acc": 0.74422665,
|
|
"epoch": 0.4439845734173643,
|
|
"grad_norm": 1.265625,
|
|
"learning_rate": 3.0681855224024235e-05,
|
|
"loss": 0.8339119,
|
|
"memory(GiB)": 19.6,
|
|
"step": 590,
|
|
"train_speed(iter/s)": 0.178637
|
|
},
|
|
{
|
|
"acc": 0.73981266,
|
|
"epoch": 0.4515097356786756,
|
|
"grad_norm": 1.15625,
|
|
"learning_rate": 3.008640032631585e-05,
|
|
"loss": 0.83661747,
|
|
"memory(GiB)": 19.6,
|
|
"step": 600,
|
|
"train_speed(iter/s)": 0.178682
|
|
},
|
|
{
|
|
"epoch": 0.4515097356786756,
|
|
"eval_acc": 0.7431594450042632,
|
|
"eval_loss": 0.8382883667945862,
|
|
"eval_runtime": 17.5013,
|
|
"eval_samples_per_second": 12.228,
|
|
"eval_steps_per_second": 12.228,
|
|
"step": 600
|
|
},
|
|
{
|
|
"acc": 0.74818993,
|
|
"epoch": 0.4590348979399868,
|
|
"grad_norm": 1.3359375,
|
|
"learning_rate": 2.9487919511502653e-05,
|
|
"loss": 0.81010456,
|
|
"memory(GiB)": 19.6,
|
|
"step": 610,
|
|
"train_speed(iter/s)": 0.177788
|
|
},
|
|
{
|
|
"acc": 0.75267715,
|
|
"epoch": 0.4665600602012981,
|
|
"grad_norm": 1.3125,
|
|
"learning_rate": 2.888676881788645e-05,
|
|
"loss": 0.80105429,
|
|
"memory(GiB)": 19.6,
|
|
"step": 620,
|
|
"train_speed(iter/s)": 0.17777
|
|
},
|
|
{
|
|
"acc": 0.7422555,
|
|
"epoch": 0.47408522246260937,
|
|
"grad_norm": 1.1875,
|
|
"learning_rate": 2.8283305872089145e-05,
|
|
"loss": 0.8282918,
|
|
"memory(GiB)": 19.6,
|
|
"step": 630,
|
|
"train_speed(iter/s)": 0.177843
|
|
},
|
|
{
|
|
"acc": 0.74146862,
|
|
"epoch": 0.4816103847239206,
|
|
"grad_norm": 1.3046875,
|
|
"learning_rate": 2.767788967629944e-05,
|
|
"loss": 0.83825617,
|
|
"memory(GiB)": 19.6,
|
|
"step": 640,
|
|
"train_speed(iter/s)": 0.177979
|
|
},
|
|
{
|
|
"acc": 0.74322186,
|
|
"epoch": 0.4891355469852319,
|
|
"grad_norm": 1.2265625,
|
|
"learning_rate": 2.707088039470122e-05,
|
|
"loss": 0.82794752,
|
|
"memory(GiB)": 19.6,
|
|
"step": 650,
|
|
"train_speed(iter/s)": 0.178091
|
|
},
|
|
{
|
|
"acc": 0.74506578,
|
|
"epoch": 0.49666070924654315,
|
|
"grad_norm": 1.2421875,
|
|
"learning_rate": 2.64626391392106e-05,
|
|
"loss": 0.82765636,
|
|
"memory(GiB)": 19.6,
|
|
"step": 660,
|
|
"train_speed(iter/s)": 0.178174
|
|
},
|
|
{
|
|
"acc": 0.74580374,
|
|
"epoch": 0.5041858715078544,
|
|
"grad_norm": 1.078125,
|
|
"learning_rate": 2.5853527754649198e-05,
|
|
"loss": 0.82673864,
|
|
"memory(GiB)": 19.6,
|
|
"step": 670,
|
|
"train_speed(iter/s)": 0.178186
|
|
},
|
|
{
|
|
"acc": 0.74080172,
|
|
"epoch": 0.5117110337691656,
|
|
"grad_norm": 1.28125,
|
|
"learning_rate": 2.5243908603481453e-05,
|
|
"loss": 0.84204159,
|
|
"memory(GiB)": 19.6,
|
|
"step": 680,
|
|
"train_speed(iter/s)": 0.178249
|
|
},
|
|
{
|
|
"acc": 0.75322657,
|
|
"epoch": 0.5192361960304769,
|
|
"grad_norm": 1.1015625,
|
|
"learning_rate": 2.4634144350243894e-05,
|
|
"loss": 0.81114817,
|
|
"memory(GiB)": 19.6,
|
|
"step": 690,
|
|
"train_speed(iter/s)": 0.178267
|
|
},
|
|
{
|
|
"acc": 0.75419984,
|
|
"epoch": 0.5267613582917882,
|
|
"grad_norm": 1.1015625,
|
|
"learning_rate": 2.402459774579475e-05,
|
|
"loss": 0.79072003,
|
|
"memory(GiB)": 19.6,
|
|
"step": 700,
|
|
"train_speed(iter/s)": 0.178363
|
|
},
|
|
{
|
|
"epoch": 0.5267613582917882,
|
|
"eval_acc": 0.7448647391675064,
|
|
"eval_loss": 0.8292610049247742,
|
|
"eval_runtime": 17.5299,
|
|
"eval_samples_per_second": 12.208,
|
|
"eval_steps_per_second": 12.208,
|
|
"step": 700
|
|
},
|
|
{
|
|
"acc": 0.74434228,
|
|
"epoch": 0.5342865205530994,
|
|
"grad_norm": 1.2265625,
|
|
"learning_rate": 2.3415631411512283e-05,
|
|
"loss": 0.82065563,
|
|
"memory(GiB)": 19.6,
|
|
"step": 710,
|
|
"train_speed(iter/s)": 0.177614
|
|
},
|
|
{
|
|
"acc": 0.75825992,
|
|
"epoch": 0.5418116828144107,
|
|
"grad_norm": 1.234375,
|
|
"learning_rate": 2.2807607623569986e-05,
|
|
"loss": 0.78528976,
|
|
"memory(GiB)": 19.6,
|
|
"step": 720,
|
|
"train_speed(iter/s)": 0.17764
|
|
},
|
|
{
|
|
"acc": 0.73724399,
|
|
"epoch": 0.5493368450757219,
|
|
"grad_norm": 1.2265625,
|
|
"learning_rate": 2.2200888097417307e-05,
|
|
"loss": 0.83492756,
|
|
"memory(GiB)": 19.6,
|
|
"step": 730,
|
|
"train_speed(iter/s)": 0.177754
|
|
},
|
|
{
|
|
"acc": 0.74586935,
|
|
"epoch": 0.5568620073370332,
|
|
"grad_norm": 1.2734375,
|
|
"learning_rate": 2.159583377259384e-05,
|
|
"loss": 0.8267067,
|
|
"memory(GiB)": 19.6,
|
|
"step": 740,
|
|
"train_speed(iter/s)": 0.17778
|
|
},
|
|
{
|
|
"acc": 0.74503641,
|
|
"epoch": 0.5643871695983445,
|
|
"grad_norm": 1.2265625,
|
|
"learning_rate": 2.0992804598005174e-05,
|
|
"loss": 0.80896463,
|
|
"memory(GiB)": 19.6,
|
|
"step": 750,
|
|
"train_speed(iter/s)": 0.17788
|
|
},
|
|
{
|
|
"acc": 0.74723582,
|
|
"epoch": 0.5719123318596557,
|
|
"grad_norm": 1.3359375,
|
|
"learning_rate": 2.0392159317788028e-05,
|
|
"loss": 0.81317263,
|
|
"memory(GiB)": 19.6,
|
|
"step": 760,
|
|
"train_speed(iter/s)": 0.177963
|
|
},
|
|
{
|
|
"acc": 0.74401774,
|
|
"epoch": 0.579437494120967,
|
|
"grad_norm": 1.046875,
|
|
"learning_rate": 1.9794255257892125e-05,
|
|
"loss": 0.83063583,
|
|
"memory(GiB)": 19.6,
|
|
"step": 770,
|
|
"train_speed(iter/s)": 0.178029
|
|
},
|
|
{
|
|
"acc": 0.7539969,
|
|
"epoch": 0.5869626563822783,
|
|
"grad_norm": 1.2421875,
|
|
"learning_rate": 1.9199448113505802e-05,
|
|
"loss": 0.79329357,
|
|
"memory(GiB)": 19.6,
|
|
"step": 780,
|
|
"train_speed(iter/s)": 0.178108
|
|
},
|
|
{
|
|
"acc": 0.74454188,
|
|
"epoch": 0.5944878186435895,
|
|
"grad_norm": 1.2265625,
|
|
"learning_rate": 1.860809173745162e-05,
|
|
"loss": 0.83453617,
|
|
"memory(GiB)": 19.6,
|
|
"step": 790,
|
|
"train_speed(iter/s)": 0.178158
|
|
},
|
|
{
|
|
"acc": 0.74658017,
|
|
"epoch": 0.6020129809049007,
|
|
"grad_norm": 1.3359375,
|
|
"learning_rate": 1.802053792967819e-05,
|
|
"loss": 0.82045937,
|
|
"memory(GiB)": 19.6,
|
|
"step": 800,
|
|
"train_speed(iter/s)": 0.178154
|
|
},
|
|
{
|
|
"epoch": 0.6020129809049007,
|
|
"eval_acc": 0.7476164638400125,
|
|
"eval_loss": 0.8245773315429688,
|
|
"eval_runtime": 17.4656,
|
|
"eval_samples_per_second": 12.253,
|
|
"eval_steps_per_second": 12.253,
|
|
"step": 800
|
|
},
|
|
{
|
|
"acc": 0.74530048,
|
|
"epoch": 0.609538143166212,
|
|
"grad_norm": 1.609375,
|
|
"learning_rate": 1.743713622797311e-05,
|
|
"loss": 0.81913815,
|
|
"memory(GiB)": 19.6,
|
|
"step": 810,
|
|
"train_speed(iter/s)": 0.177468
|
|
},
|
|
{
|
|
"acc": 0.75947304,
|
|
"epoch": 0.6170633054275233,
|
|
"grad_norm": 1.2421875,
|
|
"learning_rate": 1.6858233700021754e-05,
|
|
"loss": 0.78629255,
|
|
"memory(GiB)": 19.6,
|
|
"step": 820,
|
|
"train_speed(iter/s)": 0.17748
|
|
},
|
|
{
|
|
"acc": 0.75599556,
|
|
"epoch": 0.6245884676888346,
|
|
"grad_norm": 1.34375,
|
|
"learning_rate": 1.628417473693552e-05,
|
|
"loss": 0.77964149,
|
|
"memory(GiB)": 19.6,
|
|
"step": 830,
|
|
"train_speed(iter/s)": 0.177495
|
|
},
|
|
{
|
|
"acc": 0.7474288,
|
|
"epoch": 0.6321136299501458,
|
|
"grad_norm": 1.359375,
|
|
"learning_rate": 1.571530084837234e-05,
|
|
"loss": 0.80840693,
|
|
"memory(GiB)": 19.6,
|
|
"step": 840,
|
|
"train_speed(iter/s)": 0.17758
|
|
},
|
|
{
|
|
"acc": 0.74778714,
|
|
"epoch": 0.639638792211457,
|
|
"grad_norm": 1.578125,
|
|
"learning_rate": 1.5151950459371417e-05,
|
|
"loss": 0.7930984,
|
|
"memory(GiB)": 19.6,
|
|
"step": 850,
|
|
"train_speed(iter/s)": 0.177866
|
|
},
|
|
{
|
|
"acc": 0.75338955,
|
|
"epoch": 0.6471639544727683,
|
|
"grad_norm": 1.328125,
|
|
"learning_rate": 1.4594458709023034e-05,
|
|
"loss": 0.78671389,
|
|
"memory(GiB)": 19.6,
|
|
"step": 860,
|
|
"train_speed(iter/s)": 0.178036
|
|
},
|
|
{
|
|
"acc": 0.76030412,
|
|
"epoch": 0.6546891167340796,
|
|
"grad_norm": 1.21875,
|
|
"learning_rate": 1.4043157251093097e-05,
|
|
"loss": 0.75907588,
|
|
"memory(GiB)": 19.6,
|
|
"step": 870,
|
|
"train_speed(iter/s)": 0.178081
|
|
},
|
|
{
|
|
"acc": 0.74864521,
|
|
"epoch": 0.6622142789953909,
|
|
"grad_norm": 1.1875,
|
|
"learning_rate": 1.3498374056721197e-05,
|
|
"loss": 0.82172766,
|
|
"memory(GiB)": 21.02,
|
|
"step": 880,
|
|
"train_speed(iter/s)": 0.178291
|
|
},
|
|
{
|
|
"acc": 0.75451798,
|
|
"epoch": 0.6697394412567022,
|
|
"grad_norm": 1.171875,
|
|
"learning_rate": 1.2960433219309453e-05,
|
|
"loss": 0.79079266,
|
|
"memory(GiB)": 21.02,
|
|
"step": 890,
|
|
"train_speed(iter/s)": 0.178342
|
|
},
|
|
{
|
|
"acc": 0.75273514,
|
|
"epoch": 0.6772646035180133,
|
|
"grad_norm": 1.4609375,
|
|
"learning_rate": 1.2429654761718206e-05,
|
|
"loss": 0.79605904,
|
|
"memory(GiB)": 21.02,
|
|
"step": 900,
|
|
"train_speed(iter/s)": 0.178448
|
|
},
|
|
{
|
|
"epoch": 0.6772646035180133,
|
|
"eval_acc": 0.7478102472676537,
|
|
"eval_loss": 0.8203556537628174,
|
|
"eval_runtime": 17.6833,
|
|
"eval_samples_per_second": 12.102,
|
|
"eval_steps_per_second": 12.102,
|
|
"step": 900
|
|
},
|
|
{
|
|
"acc": 0.75493579,
|
|
"epoch": 0.6847897657793246,
|
|
"grad_norm": 1.21875,
|
|
"learning_rate": 1.1906354445883342e-05,
|
|
"loss": 0.79625425,
|
|
"memory(GiB)": 21.02,
|
|
"step": 910,
|
|
"train_speed(iter/s)": 0.177828
|
|
},
|
|
{
|
|
"acc": 0.74433489,
|
|
"epoch": 0.6923149280406359,
|
|
"grad_norm": 1.3828125,
|
|
"learning_rate": 1.1390843584968398e-05,
|
|
"loss": 0.82548571,
|
|
"memory(GiB)": 21.02,
|
|
"step": 920,
|
|
"train_speed(iter/s)": 0.177868
|
|
},
|
|
{
|
|
"acc": 0.74827509,
|
|
"epoch": 0.6998400903019472,
|
|
"grad_norm": 1.21875,
|
|
"learning_rate": 1.08834288581633e-05,
|
|
"loss": 0.8204648,
|
|
"memory(GiB)": 21.02,
|
|
"step": 930,
|
|
"train_speed(iter/s)": 0.177889
|
|
},
|
|
{
|
|
"acc": 0.75613265,
|
|
"epoch": 0.7073652525632584,
|
|
"grad_norm": 1.21875,
|
|
"learning_rate": 1.0384412128239885e-05,
|
|
"loss": 0.77681332,
|
|
"memory(GiB)": 21.02,
|
|
"step": 940,
|
|
"train_speed(iter/s)": 0.177919
|
|
},
|
|
{
|
|
"acc": 0.75295358,
|
|
"epoch": 0.7148904148245696,
|
|
"grad_norm": 1.4609375,
|
|
"learning_rate": 9.894090261972639e-06,
|
|
"loss": 0.80203428,
|
|
"memory(GiB)": 21.02,
|
|
"step": 950,
|
|
"train_speed(iter/s)": 0.177945
|
|
},
|
|
{
|
|
"acc": 0.75752335,
|
|
"epoch": 0.7224155770858809,
|
|
"grad_norm": 1.4609375,
|
|
"learning_rate": 9.412754953531663e-06,
|
|
"loss": 0.77538629,
|
|
"memory(GiB)": 21.02,
|
|
"step": 960,
|
|
"train_speed(iter/s)": 0.177984
|
|
},
|
|
{
|
|
"acc": 0.75414553,
|
|
"epoch": 0.7299407393471922,
|
|
"grad_norm": 1.4765625,
|
|
"learning_rate": 8.940692550952806e-06,
|
|
"loss": 0.78667145,
|
|
"memory(GiB)": 21.02,
|
|
"step": 970,
|
|
"train_speed(iter/s)": 0.17801
|
|
},
|
|
{
|
|
"acc": 0.74900241,
|
|
"epoch": 0.7374659016085034,
|
|
"grad_norm": 1.3671875,
|
|
"learning_rate": 8.478183885788216e-06,
|
|
"loss": 0.80684509,
|
|
"memory(GiB)": 21.02,
|
|
"step": 980,
|
|
"train_speed(iter/s)": 0.178043
|
|
},
|
|
{
|
|
"acc": 0.75565805,
|
|
"epoch": 0.7449910638698147,
|
|
"grad_norm": 1.203125,
|
|
"learning_rate": 8.025504106038692e-06,
|
|
"loss": 0.80487137,
|
|
"memory(GiB)": 21.02,
|
|
"step": 990,
|
|
"train_speed(iter/s)": 0.17808
|
|
},
|
|
{
|
|
"acc": 0.74333644,
|
|
"epoch": 0.7525162261311259,
|
|
"grad_norm": 1.703125,
|
|
"learning_rate": 7.582922512467183e-06,
|
|
"loss": 0.81951437,
|
|
"memory(GiB)": 21.54,
|
|
"step": 1000,
|
|
"train_speed(iter/s)": 0.178069
|
|
},
|
|
{
|
|
"epoch": 0.7525162261311259,
|
|
"eval_acc": 0.7478490039531819,
|
|
"eval_loss": 0.8180410861968994,
|
|
"eval_runtime": 17.4781,
|
|
"eval_samples_per_second": 12.244,
|
|
"eval_steps_per_second": 12.244,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"acc": 0.75819354,
|
|
"epoch": 0.7600413883924372,
|
|
"grad_norm": 1.4765625,
|
|
"learning_rate": 7.150702398390841e-06,
|
|
"loss": 0.78239226,
|
|
"memory(GiB)": 17.02,
|
|
"step": 1010,
|
|
"train_speed(iter/s)": 0.17756
|
|
},
|
|
{
|
|
"acc": 0.75477595,
|
|
"epoch": 0.7675665506537485,
|
|
"grad_norm": 1.6015625,
|
|
"learning_rate": 6.729100893046897e-06,
|
|
"loss": 0.77195005,
|
|
"memory(GiB)": 17.02,
|
|
"step": 1020,
|
|
"train_speed(iter/s)": 0.177596
|
|
},
|
|
{
|
|
"acc": 0.75055728,
|
|
"epoch": 0.7750917129150597,
|
|
"grad_norm": 1.1796875,
|
|
"learning_rate": 6.318368808625641e-06,
|
|
"loss": 0.79199243,
|
|
"memory(GiB)": 17.02,
|
|
"step": 1030,
|
|
"train_speed(iter/s)": 0.177596
|
|
},
|
|
{
|
|
"acc": 0.74901199,
|
|
"epoch": 0.782616875176371,
|
|
"grad_norm": 1.1875,
|
|
"learning_rate": 5.918750491061323e-06,
|
|
"loss": 0.8067667,
|
|
"memory(GiB)": 17.02,
|
|
"step": 1040,
|
|
"train_speed(iter/s)": 0.177656
|
|
},
|
|
{
|
|
"acc": 0.75432835,
|
|
"epoch": 0.7901420374376823,
|
|
"grad_norm": 1.34375,
|
|
"learning_rate": 5.530483674669948e-06,
|
|
"loss": 0.80282774,
|
|
"memory(GiB)": 17.02,
|
|
"step": 1050,
|
|
"train_speed(iter/s)": 0.177668
|
|
},
|
|
{
|
|
"acc": 0.75713019,
|
|
"epoch": 0.7976671996989935,
|
|
"grad_norm": 1.21875,
|
|
"learning_rate": 5.153799340720309e-06,
|
|
"loss": 0.78160086,
|
|
"memory(GiB)": 17.02,
|
|
"step": 1060,
|
|
"train_speed(iter/s)": 0.177697
|
|
},
|
|
{
|
|
"acc": 0.74733901,
|
|
"epoch": 0.8051923619603047,
|
|
"grad_norm": 1.5,
|
|
"learning_rate": 4.788921580022421e-06,
|
|
"loss": 0.80715675,
|
|
"memory(GiB)": 17.02,
|
|
"step": 1070,
|
|
"train_speed(iter/s)": 0.177759
|
|
},
|
|
{
|
|
"acc": 0.75728092,
|
|
"epoch": 0.812717524221616,
|
|
"grad_norm": 1.2734375,
|
|
"learning_rate": 4.436067459615145e-06,
|
|
"loss": 0.78215985,
|
|
"memory(GiB)": 17.02,
|
|
"step": 1080,
|
|
"train_speed(iter/s)": 0.177787
|
|
},
|
|
{
|
|
"acc": 0.75329976,
|
|
"epoch": 0.8202426864829273,
|
|
"grad_norm": 1.2578125,
|
|
"learning_rate": 4.095446893632235e-06,
|
|
"loss": 0.77923803,
|
|
"memory(GiB)": 17.02,
|
|
"step": 1090,
|
|
"train_speed(iter/s)": 0.177802
|
|
},
|
|
{
|
|
"acc": 0.74634914,
|
|
"epoch": 0.8277678487442386,
|
|
"grad_norm": 1.453125,
|
|
"learning_rate": 3.7672625184237034e-06,
|
|
"loss": 0.81888247,
|
|
"memory(GiB)": 17.02,
|
|
"step": 1100,
|
|
"train_speed(iter/s)": 0.177796
|
|
},
|
|
{
|
|
"epoch": 0.8277678487442386,
|
|
"eval_acc": 0.7483140841795209,
|
|
"eval_loss": 0.8171122670173645,
|
|
"eval_runtime": 17.5192,
|
|
"eval_samples_per_second": 12.215,
|
|
"eval_steps_per_second": 12.215,
|
|
"step": 1100
|
|
},
|
|
{
|
|
"acc": 0.7570456,
|
|
"epoch": 0.8352930110055498,
|
|
"grad_norm": 1.2421875,
|
|
"learning_rate": 3.4517095720067783e-06,
|
|
"loss": 0.78159804,
|
|
"memory(GiB)": 17.02,
|
|
"step": 1110,
|
|
"train_speed(iter/s)": 0.177278
|
|
},
|
|
{
|
|
"acc": 0.75265865,
|
|
"epoch": 0.842818173266861,
|
|
"grad_norm": 1.46875,
|
|
"learning_rate": 3.148975777918095e-06,
|
|
"loss": 0.78856063,
|
|
"memory(GiB)": 17.02,
|
|
"step": 1120,
|
|
"train_speed(iter/s)": 0.177277
|
|
},
|
|
{
|
|
"acc": 0.75768456,
|
|
"epoch": 0.8503433355281723,
|
|
"grad_norm": 1.2265625,
|
|
"learning_rate": 2.8592412335363472e-06,
|
|
"loss": 0.77000494,
|
|
"memory(GiB)": 17.02,
|
|
"step": 1130,
|
|
"train_speed(iter/s)": 0.177348
|
|
},
|
|
{
|
|
"acc": 0.75568204,
|
|
"epoch": 0.8578684977894836,
|
|
"grad_norm": 1.3828125,
|
|
"learning_rate": 2.5826783029417157e-06,
|
|
"loss": 0.78663826,
|
|
"memory(GiB)": 17.02,
|
|
"step": 1140,
|
|
"train_speed(iter/s)": 0.177395
|
|
},
|
|
{
|
|
"acc": 0.74540915,
|
|
"epoch": 0.8653936600507949,
|
|
"grad_norm": 1.484375,
|
|
"learning_rate": 2.3194515143758976e-06,
|
|
"loss": 0.82176847,
|
|
"memory(GiB)": 17.02,
|
|
"step": 1150,
|
|
"train_speed(iter/s)": 0.177432
|
|
},
|
|
{
|
|
"acc": 0.74521751,
|
|
"epoch": 0.8729188223121062,
|
|
"grad_norm": 1.2734375,
|
|
"learning_rate": 2.0697174623636794e-06,
|
|
"loss": 0.80748806,
|
|
"memory(GiB)": 17.02,
|
|
"step": 1160,
|
|
"train_speed(iter/s)": 0.177452
|
|
},
|
|
{
|
|
"acc": 0.74583597,
|
|
"epoch": 0.8804439845734173,
|
|
"grad_norm": 1.328125,
|
|
"learning_rate": 1.8336247145543079e-06,
|
|
"loss": 0.80123825,
|
|
"memory(GiB)": 17.02,
|
|
"step": 1170,
|
|
"train_speed(iter/s)": 0.177474
|
|
},
|
|
{
|
|
"acc": 0.75462961,
|
|
"epoch": 0.8879691468347286,
|
|
"grad_norm": 1.3828125,
|
|
"learning_rate": 1.6113137233380954e-06,
|
|
"loss": 0.78282847,
|
|
"memory(GiB)": 17.02,
|
|
"step": 1180,
|
|
"train_speed(iter/s)": 0.177502
|
|
},
|
|
{
|
|
"acc": 0.76421356,
|
|
"epoch": 0.8954943090960399,
|
|
"grad_norm": 1.40625,
|
|
"learning_rate": 1.4029167422908107e-06,
|
|
"loss": 0.77380075,
|
|
"memory(GiB)": 17.02,
|
|
"step": 1190,
|
|
"train_speed(iter/s)": 0.177542
|
|
},
|
|
{
|
|
"acc": 0.7530828,
|
|
"epoch": 0.9030194713573512,
|
|
"grad_norm": 1.21875,
|
|
"learning_rate": 1.2085577474955533e-06,
|
|
"loss": 0.78172884,
|
|
"memory(GiB)": 17.02,
|
|
"step": 1200,
|
|
"train_speed(iter/s)": 0.177574
|
|
},
|
|
{
|
|
"epoch": 0.9030194713573512,
|
|
"eval_acc": 0.7491279745756143,
|
|
"eval_loss": 0.8163484930992126,
|
|
"eval_runtime": 17.5046,
|
|
"eval_samples_per_second": 12.225,
|
|
"eval_steps_per_second": 12.225,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"acc": 0.75201364,
|
|
"epoch": 0.9105446336186624,
|
|
"grad_norm": 1.4140625,
|
|
"learning_rate": 1.0283523637889592e-06,
|
|
"loss": 0.80502033,
|
|
"memory(GiB)": 17.02,
|
|
"step": 1210,
|
|
"train_speed(iter/s)": 0.17712
|
|
},
|
|
{
|
|
"acc": 0.74727998,
|
|
"epoch": 0.9180697958799736,
|
|
"grad_norm": 1.296875,
|
|
"learning_rate": 8.624077959756032e-07,
|
|
"loss": 0.79580932,
|
|
"memory(GiB)": 17.02,
|
|
"step": 1220,
|
|
"train_speed(iter/s)": 0.17714
|
|
},
|
|
{
|
|
"acc": 0.75626488,
|
|
"epoch": 0.9255949581412849,
|
|
"grad_norm": 1.1796875,
|
|
"learning_rate": 7.108227650514637e-07,
|
|
"loss": 0.80513897,
|
|
"memory(GiB)": 17.02,
|
|
"step": 1230,
|
|
"train_speed(iter/s)": 0.177153
|
|
},
|
|
{
|
|
"acc": 0.75931497,
|
|
"epoch": 0.9331201204025962,
|
|
"grad_norm": 1.078125,
|
|
"learning_rate": 5.736874494744887e-07,
|
|
"loss": 0.77948103,
|
|
"memory(GiB)": 17.02,
|
|
"step": 1240,
|
|
"train_speed(iter/s)": 0.177196
|
|
},
|
|
{
|
|
"acc": 0.74202876,
|
|
"epoch": 0.9406452826639075,
|
|
"grad_norm": 1.2734375,
|
|
"learning_rate": 4.5108343151710196e-07,
|
|
"loss": 0.82898502,
|
|
"memory(GiB)": 17.02,
|
|
"step": 1250,
|
|
"train_speed(iter/s)": 0.177234
|
|
},
|
|
{
|
|
"acc": 0.74493279,
|
|
"epoch": 0.9481704449252187,
|
|
"grad_norm": 1.3984375,
|
|
"learning_rate": 3.430836487326311e-07,
|
|
"loss": 0.83584261,
|
|
"memory(GiB)": 17.02,
|
|
"step": 1260,
|
|
"train_speed(iter/s)": 0.177249
|
|
},
|
|
{
|
|
"acc": 0.75014348,
|
|
"epoch": 0.9556956071865299,
|
|
"grad_norm": 1.4140625,
|
|
"learning_rate": 2.497523505645083e-07,
|
|
"loss": 0.80501699,
|
|
"memory(GiB)": 17.02,
|
|
"step": 1270,
|
|
"train_speed(iter/s)": 0.177308
|
|
},
|
|
{
|
|
"acc": 0.75359378,
|
|
"epoch": 0.9632207694478412,
|
|
"grad_norm": 1.359375,
|
|
"learning_rate": 1.7114506012405607e-07,
|
|
"loss": 0.80206327,
|
|
"memory(GiB)": 17.02,
|
|
"step": 1280,
|
|
"train_speed(iter/s)": 0.17736
|
|
},
|
|
{
|
|
"acc": 0.76561971,
|
|
"epoch": 0.9707459317091525,
|
|
"grad_norm": 1.2109375,
|
|
"learning_rate": 1.0730854115959532e-07,
|
|
"loss": 0.74447112,
|
|
"memory(GiB)": 17.02,
|
|
"step": 1290,
|
|
"train_speed(iter/s)": 0.177376
|
|
},
|
|
{
|
|
"acc": 0.74775319,
|
|
"epoch": 0.9782710939704637,
|
|
"grad_norm": 1.5625,
|
|
"learning_rate": 5.8280770236518456e-08,
|
|
"loss": 0.81718521,
|
|
"memory(GiB)": 17.02,
|
|
"step": 1300,
|
|
"train_speed(iter/s)": 0.177414
|
|
},
|
|
{
|
|
"epoch": 0.9782710939704637,
|
|
"eval_acc": 0.7494767847453686,
|
|
"eval_loss": 0.8163220882415771,
|
|
"eval_runtime": 17.5248,
|
|
"eval_samples_per_second": 12.211,
|
|
"eval_steps_per_second": 12.211,
|
|
"step": 1300
|
|
}
|
|
],
|
|
"logging_steps": 10,
|
|
"max_steps": 1328,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 100,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": false
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 1.720134513414052e+17,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|