This commit is contained in:
wql 2024-08-20 14:30:32 +08:00
commit abf6ab0743
36 changed files with 188159 additions and 0 deletions

52
results/gpu_status.json Normal file
View File

@ -0,0 +1,52 @@
{"cur_time": "2024-08-20 05:51:36", "all_gpu_status": [{"device": 0, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.14642333984375, "powerusage_W": 307.167}, {"device": 1, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18548583984375, "powerusage_W": 422.759}, {"device": 2, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.19329833984375, "powerusage_W": 277.214}, {"device": 3, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18548583984375, "powerusage_W": 338.271}, {"device": 4, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18548583984375, "powerusage_W": 336.763}, {"device": 5, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 242.195}, {"device": 6, "total_mem_GB": 95.5771484375, "used_mem_GB": 26.97259521484375, "powerusage_W": 263.752}]}
{"cur_time": "2024-08-20 05:51:41", "all_gpu_status": [{"device": 0, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.14642333984375, "powerusage_W": 346.189}, {"device": 1, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18548583984375, "powerusage_W": 429.544}, {"device": 2, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.19329833984375, "powerusage_W": 327.636}, {"device": 3, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18548583984375, "powerusage_W": 291.271}, {"device": 4, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18548583984375, "powerusage_W": 341.014}, {"device": 5, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 293.648}, {"device": 6, "total_mem_GB": 95.5771484375, "used_mem_GB": 26.97259521484375, "powerusage_W": 318.306}]}
{"cur_time": "2024-08-20 05:51:46", "all_gpu_status": [{"device": 0, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.14642333984375, "powerusage_W": 379.941}, {"device": 1, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18548583984375, "powerusage_W": 307.755}, {"device": 2, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.19329833984375, "powerusage_W": 376.886}, {"device": 3, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18548583984375, "powerusage_W": 290.816}, {"device": 4, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18548583984375, "powerusage_W": 258.29}, {"device": 5, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 273.436}, {"device": 6, "total_mem_GB": 95.5771484375, "used_mem_GB": 26.97259521484375, "powerusage_W": 425.757}]}
{"cur_time": "2024-08-20 05:51:51", "all_gpu_status": [{"device": 0, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.14642333984375, "powerusage_W": 308.645}, {"device": 1, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 407.742}, {"device": 2, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.19329833984375, "powerusage_W": 338.193}, {"device": 3, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 317.69}, {"device": 4, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18548583984375, "powerusage_W": 349.583}, {"device": 5, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 351.25}, {"device": 6, "total_mem_GB": 95.5771484375, "used_mem_GB": 26.97259521484375, "powerusage_W": 408.942}]}
{"cur_time": "2024-08-20 05:51:56", "all_gpu_status": [{"device": 0, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.14642333984375, "powerusage_W": 423.708}, {"device": 1, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 330.528}, {"device": 2, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.19329833984375, "powerusage_W": 335.454}, {"device": 3, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 335.813}, {"device": 4, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18548583984375, "powerusage_W": 302.27}, {"device": 5, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 276.465}, {"device": 6, "total_mem_GB": 95.5771484375, "used_mem_GB": 26.97259521484375, "powerusage_W": 425.131}]}
{"cur_time": "2024-08-20 05:52:01", "all_gpu_status": [{"device": 0, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.14642333984375, "powerusage_W": 347.216}, {"device": 1, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 336.919}, {"device": 2, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.19329833984375, "powerusage_W": 369.65}, {"device": 3, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 375.804}, {"device": 4, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18548583984375, "powerusage_W": 249.434}, {"device": 5, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 375.997}, {"device": 6, "total_mem_GB": 95.5771484375, "used_mem_GB": 26.97259521484375, "powerusage_W": 336.457}]}
{"cur_time": "2024-08-20 05:52:06", "all_gpu_status": [{"device": 0, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.14642333984375, "powerusage_W": 363.441}, {"device": 1, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 401.156}, {"device": 2, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.19525146484375, "powerusage_W": 357.961}, {"device": 3, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 265.645}, {"device": 4, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18548583984375, "powerusage_W": 286.166}, {"device": 5, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 331.34}, {"device": 6, "total_mem_GB": 95.5771484375, "used_mem_GB": 26.97259521484375, "powerusage_W": 356.879}]}
{"cur_time": "2024-08-20 05:52:11", "all_gpu_status": [{"device": 0, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.14642333984375, "powerusage_W": 264.631}, {"device": 1, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 302.68}, {"device": 2, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.19525146484375, "powerusage_W": 454.116}, {"device": 3, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 289.179}, {"device": 4, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18548583984375, "powerusage_W": 261.585}, {"device": 5, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 326.945}, {"device": 6, "total_mem_GB": 95.5771484375, "used_mem_GB": 26.97259521484375, "powerusage_W": 372.993}]}
{"cur_time": "2024-08-20 05:52:16", "all_gpu_status": [{"device": 0, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.14642333984375, "powerusage_W": 337.774}, {"device": 1, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 345.174}, {"device": 2, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.19525146484375, "powerusage_W": 335.228}, {"device": 3, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 286.173}, {"device": 4, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18548583984375, "powerusage_W": 287.199}, {"device": 5, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 283.311}, {"device": 6, "total_mem_GB": 95.5771484375, "used_mem_GB": 26.97259521484375, "powerusage_W": 422.488}]}
{"cur_time": "2024-08-20 05:52:21", "all_gpu_status": [{"device": 0, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.14642333984375, "powerusage_W": 462.293}, {"device": 1, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 373.353}, {"device": 2, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.19525146484375, "powerusage_W": 321.738}, {"device": 3, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 282.028}, {"device": 4, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 333.428}, {"device": 5, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 406.635}, {"device": 6, "total_mem_GB": 95.5771484375, "used_mem_GB": 26.97259521484375, "powerusage_W": 305.511}]}
{"cur_time": "2024-08-20 05:52:26", "all_gpu_status": [{"device": 0, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.14642333984375, "powerusage_W": 384.12}, {"device": 1, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 335.057}, {"device": 2, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.19525146484375, "powerusage_W": 445.323}, {"device": 3, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 320.908}, {"device": 4, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 355.983}, {"device": 5, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 361.616}, {"device": 6, "total_mem_GB": 95.5771484375, "used_mem_GB": 26.97259521484375, "powerusage_W": 301.339}]}
{"cur_time": "2024-08-20 05:52:31", "all_gpu_status": [{"device": 0, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.14642333984375, "powerusage_W": 366.053}, {"device": 1, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 269.164}, {"device": 2, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.19525146484375, "powerusage_W": 377.02}, {"device": 3, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 343.33}, {"device": 4, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 316.924}, {"device": 5, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 324.999}, {"device": 6, "total_mem_GB": 95.5771484375, "used_mem_GB": 26.97259521484375, "powerusage_W": 403.598}]}
{"cur_time": "2024-08-20 05:52:36", "all_gpu_status": [{"device": 0, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.14642333984375, "powerusage_W": 336.677}, {"device": 1, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 344.461}, {"device": 2, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.19525146484375, "powerusage_W": 409.826}, {"device": 3, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 280.51}, {"device": 4, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 381.218}, {"device": 5, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 352.569}, {"device": 6, "total_mem_GB": 95.5771484375, "used_mem_GB": 26.97259521484375, "powerusage_W": 324.622}]}
{"cur_time": "2024-08-20 05:52:41", "all_gpu_status": [{"device": 0, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.14642333984375, "powerusage_W": 343.702}, {"device": 1, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 326.647}, {"device": 2, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.19525146484375, "powerusage_W": 407.118}, {"device": 3, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 374.119}, {"device": 4, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 355.588}, {"device": 5, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 267.23}, {"device": 6, "total_mem_GB": 95.5771484375, "used_mem_GB": 26.97259521484375, "powerusage_W": 273.832}]}
{"cur_time": "2024-08-20 05:52:46", "all_gpu_status": [{"device": 0, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.14642333984375, "powerusage_W": 298.007}, {"device": 1, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 352.17}, {"device": 2, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.19525146484375, "powerusage_W": 329.054}, {"device": 3, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 319.866}, {"device": 4, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 323.981}, {"device": 5, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 388.235}, {"device": 6, "total_mem_GB": 95.5771484375, "used_mem_GB": 26.97454833984375, "powerusage_W": 264.466}]}
{"cur_time": "2024-08-20 05:52:51", "all_gpu_status": [{"device": 0, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.14642333984375, "powerusage_W": 353.069}, {"device": 1, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 342.228}, {"device": 2, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.19720458984375, "powerusage_W": 385.345}, {"device": 3, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 318.174}, {"device": 4, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 361.891}, {"device": 5, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 318.237}, {"device": 6, "total_mem_GB": 95.5771484375, "used_mem_GB": 26.97454833984375, "powerusage_W": 321.576}]}
{"cur_time": "2024-08-20 05:52:56", "all_gpu_status": [{"device": 0, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.14642333984375, "powerusage_W": 379.997}, {"device": 1, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 360.658}, {"device": 2, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.19720458984375, "powerusage_W": 310.646}, {"device": 3, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 348.472}, {"device": 4, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 390.949}, {"device": 5, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 332.858}, {"device": 6, "total_mem_GB": 95.5771484375, "used_mem_GB": 26.97454833984375, "powerusage_W": 380.053}]}
{"cur_time": "2024-08-20 05:53:01", "all_gpu_status": [{"device": 0, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.14642333984375, "powerusage_W": 301.181}, {"device": 1, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 299.387}, {"device": 2, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.19720458984375, "powerusage_W": 378.149}, {"device": 3, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 317.838}, {"device": 4, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 250.537}, {"device": 5, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 376.788}, {"device": 6, "total_mem_GB": 95.5771484375, "used_mem_GB": 26.97454833984375, "powerusage_W": 311.201}]}
{"cur_time": "2024-08-20 05:53:06", "all_gpu_status": [{"device": 0, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.14642333984375, "powerusage_W": 327.049}, {"device": 1, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 353.297}, {"device": 2, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.19720458984375, "powerusage_W": 312.501}, {"device": 3, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 373.248}, {"device": 4, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 367.924}, {"device": 5, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 382.531}, {"device": 6, "total_mem_GB": 95.5771484375, "used_mem_GB": 26.97454833984375, "powerusage_W": 300.593}]}
{"cur_time": "2024-08-20 05:53:11", "all_gpu_status": [{"device": 0, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.14642333984375, "powerusage_W": 391.24}, {"device": 1, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 397.56}, {"device": 2, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.19720458984375, "powerusage_W": 302.363}, {"device": 3, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 375.895}, {"device": 4, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 302.32}, {"device": 5, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 346.986}, {"device": 6, "total_mem_GB": 95.5771484375, "used_mem_GB": 26.97454833984375, "powerusage_W": 307.599}]}
{"cur_time": "2024-08-20 05:53:16", "all_gpu_status": [{"device": 0, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.14642333984375, "powerusage_W": 385.476}, {"device": 1, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 365.405}, {"device": 2, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.19720458984375, "powerusage_W": 338.363}, {"device": 3, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 304.719}, {"device": 4, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 343.358}, {"device": 5, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 371.967}, {"device": 6, "total_mem_GB": 95.5771484375, "used_mem_GB": 26.97454833984375, "powerusage_W": 294.259}]}
{"cur_time": "2024-08-20 05:53:21", "all_gpu_status": [{"device": 0, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.14642333984375, "powerusage_W": 296.326}, {"device": 1, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 282.231}, {"device": 2, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.19720458984375, "powerusage_W": 287.786}, {"device": 3, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 353.057}, {"device": 4, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 362.785}, {"device": 5, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 416.269}, {"device": 6, "total_mem_GB": 95.5771484375, "used_mem_GB": 26.97454833984375, "powerusage_W": 441.139}]}
{"cur_time": "2024-08-20 05:53:26", "all_gpu_status": [{"device": 0, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.14642333984375, "powerusage_W": 341.974}, {"device": 1, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 368.386}, {"device": 2, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.19720458984375, "powerusage_W": 313.315}, {"device": 3, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 343.119}, {"device": 4, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 285.099}, {"device": 5, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 358.607}, {"device": 6, "total_mem_GB": 95.5771484375, "used_mem_GB": 26.97454833984375, "powerusage_W": 338.021}]}
{"cur_time": "2024-08-20 05:53:31", "all_gpu_status": [{"device": 0, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.14642333984375, "powerusage_W": 373.733}, {"device": 1, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 390.075}, {"device": 2, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.19720458984375, "powerusage_W": 380.943}, {"device": 3, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 338.585}, {"device": 4, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18939208984375, "powerusage_W": 345.427}, {"device": 5, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 311.943}, {"device": 6, "total_mem_GB": 95.5771484375, "used_mem_GB": 26.97454833984375, "powerusage_W": 339.123}]}
{"cur_time": "2024-08-20 05:53:36", "all_gpu_status": [{"device": 0, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.14642333984375, "powerusage_W": 271.227}, {"device": 1, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18939208984375, "powerusage_W": 402.251}, {"device": 2, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.19720458984375, "powerusage_W": 295.655}, {"device": 3, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 271.54}, {"device": 4, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18939208984375, "powerusage_W": 330.76}, {"device": 5, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 311.469}, {"device": 6, "total_mem_GB": 95.5771484375, "used_mem_GB": 26.97454833984375, "powerusage_W": 329.33}]}
{"cur_time": "2024-08-20 05:53:41", "all_gpu_status": [{"device": 0, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.14642333984375, "powerusage_W": 333.173}, {"device": 1, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18939208984375, "powerusage_W": 364.767}, {"device": 2, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.19720458984375, "powerusage_W": 320.433}, {"device": 3, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 334.655}, {"device": 4, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18939208984375, "powerusage_W": 292.792}, {"device": 5, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 392.604}, {"device": 6, "total_mem_GB": 95.5771484375, "used_mem_GB": 26.97454833984375, "powerusage_W": 364.216}]}
{"cur_time": "2024-08-20 05:53:46", "all_gpu_status": [{"device": 0, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.14642333984375, "powerusage_W": 324.805}, {"device": 1, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18939208984375, "powerusage_W": 353.805}, {"device": 2, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.19720458984375, "powerusage_W": 386.361}, {"device": 3, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 350.295}, {"device": 4, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18939208984375, "powerusage_W": 336.418}, {"device": 5, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 337.784}, {"device": 6, "total_mem_GB": 95.5771484375, "used_mem_GB": 26.97454833984375, "powerusage_W": 285.147}]}
{"cur_time": "2024-08-20 05:53:51", "all_gpu_status": [{"device": 0, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.14642333984375, "powerusage_W": 348.019}, {"device": 1, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18939208984375, "powerusage_W": 381.006}, {"device": 2, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.19720458984375, "powerusage_W": 292.084}, {"device": 3, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 378.336}, {"device": 4, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18939208984375, "powerusage_W": 306.725}, {"device": 5, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 313.626}, {"device": 6, "total_mem_GB": 95.5771484375, "used_mem_GB": 26.97454833984375, "powerusage_W": 301.81}]}
{"cur_time": "2024-08-20 05:53:56", "all_gpu_status": [{"device": 0, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.14642333984375, "powerusage_W": 302.408}, {"device": 1, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18939208984375, "powerusage_W": 312.465}, {"device": 2, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.19720458984375, "powerusage_W": 387.608}, {"device": 3, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 355.132}, {"device": 4, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18939208984375, "powerusage_W": 308.182}, {"device": 5, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 384.889}, {"device": 6, "total_mem_GB": 95.5771484375, "used_mem_GB": 26.97454833984375, "powerusage_W": 368.923}]}
{"cur_time": "2024-08-20 05:54:01", "all_gpu_status": [{"device": 0, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.14642333984375, "powerusage_W": 273.023}, {"device": 1, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18939208984375, "powerusage_W": 363.172}, {"device": 2, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.19720458984375, "powerusage_W": 302.859}, {"device": 3, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 345.856}, {"device": 4, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18939208984375, "powerusage_W": 319.584}, {"device": 5, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 351.944}, {"device": 6, "total_mem_GB": 95.5771484375, "used_mem_GB": 26.97454833984375, "powerusage_W": 259.696}]}
{"cur_time": "2024-08-20 05:54:06", "all_gpu_status": [{"device": 0, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.14642333984375, "powerusage_W": 324.606}, {"device": 1, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18939208984375, "powerusage_W": 351.956}, {"device": 2, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.19720458984375, "powerusage_W": 364.526}, {"device": 3, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 328.148}, {"device": 4, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18939208984375, "powerusage_W": 339.942}, {"device": 5, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 400.083}, {"device": 6, "total_mem_GB": 95.5771484375, "used_mem_GB": 26.97454833984375, "powerusage_W": 266.148}]}
{"cur_time": "2024-08-20 05:54:11", "all_gpu_status": [{"device": 0, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.14642333984375, "powerusage_W": 332.903}, {"device": 1, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18939208984375, "powerusage_W": 384.642}, {"device": 2, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.19720458984375, "powerusage_W": 297.204}, {"device": 3, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 315.529}, {"device": 4, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18939208984375, "powerusage_W": 319.422}, {"device": 5, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18743896484375, "powerusage_W": 407.878}, {"device": 6, "total_mem_GB": 95.5771484375, "used_mem_GB": 26.97454833984375, "powerusage_W": 367.279}]}
{"cur_time": "2024-08-20 05:54:17", "all_gpu_status": [{"device": 0, "total_mem_GB": 95.5771484375, "used_mem_GB": 2.5882568359375, "powerusage_W": 96.803}, {"device": 1, "total_mem_GB": 95.5771484375, "used_mem_GB": 2.5882568359375, "powerusage_W": 132.721}, {"device": 2, "total_mem_GB": 95.5771484375, "used_mem_GB": 2.5882568359375, "powerusage_W": 142.463}, {"device": 3, "total_mem_GB": 95.5771484375, "used_mem_GB": 2.8695068359375, "powerusage_W": 132.029}, {"device": 4, "total_mem_GB": 95.5771484375, "used_mem_GB": 27.18939208984375, "powerusage_W": 132.193}, {"device": 5, "total_mem_GB": 95.5771484375, "used_mem_GB": 2.8695068359375, "powerusage_W": 139.223}, {"device": 6, "total_mem_GB": 95.5771484375, "used_mem_GB": 2.5882568359375, "powerusage_W": 130.807}]}
{"cur_time": "2024-08-20 05:54:22", "all_gpu_status": [{"device": 0, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.58502197265625, "powerusage_W": 95.073}, {"device": 1, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.58502197265625, "powerusage_W": 90.727}, {"device": 2, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.58502197265625, "powerusage_W": 95.418}, {"device": 3, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.58502197265625, "powerusage_W": 85.977}, {"device": 4, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.58502197265625, "powerusage_W": 87.475}, {"device": 5, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.58502197265625, "powerusage_W": 89.583}, {"device": 6, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.58502197265625, "powerusage_W": 87.108}]}
{"cur_time": "2024-08-20 05:54:27", "all_gpu_status": [{"device": 0, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.58502197265625, "powerusage_W": 94.58}, {"device": 1, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.58502197265625, "powerusage_W": 90.199}, {"device": 2, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.58502197265625, "powerusage_W": 95.173}, {"device": 3, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.58502197265625, "powerusage_W": 85.671}, {"device": 4, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.58502197265625, "powerusage_W": 87.091}, {"device": 5, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.58502197265625, "powerusage_W": 89.351}, {"device": 6, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.58502197265625, "powerusage_W": 86.854}]}
{"cur_time": "2024-08-20 05:54:32", "all_gpu_status": [{"device": 0, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.58502197265625, "powerusage_W": 94.221}, {"device": 1, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.58502197265625, "powerusage_W": 89.774}, {"device": 2, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.58502197265625, "powerusage_W": 95.025}, {"device": 3, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.58502197265625, "powerusage_W": 85.39}, {"device": 4, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.58502197265625, "powerusage_W": 86.804}, {"device": 5, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.58502197265625, "powerusage_W": 89.268}, {"device": 6, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.58502197265625, "powerusage_W": 86.775}]}
{"cur_time": "2024-08-20 05:54:37", "all_gpu_status": [{"device": 0, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.58502197265625, "powerusage_W": 93.85}, {"device": 1, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.58502197265625, "powerusage_W": 89.503}, {"device": 2, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.58502197265625, "powerusage_W": 94.85}, {"device": 3, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.58502197265625, "powerusage_W": 85.257}, {"device": 4, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.58502197265625, "powerusage_W": 86.531}, {"device": 5, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.58502197265625, "powerusage_W": 89.092}, {"device": 6, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.58502197265625, "powerusage_W": 86.579}]}
{"cur_time": "2024-08-20 05:54:42", "all_gpu_status": [{"device": 0, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.5882568359375, "powerusage_W": 93.662}, {"device": 1, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.5882568359375, "powerusage_W": 89.253}, {"device": 2, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.5882568359375, "powerusage_W": 94.765}, {"device": 3, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.5882568359375, "powerusage_W": 85.178}, {"device": 4, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.5882568359375, "powerusage_W": 86.353}, {"device": 5, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.5882568359375, "powerusage_W": 88.994}, {"device": 6, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.5882568359375, "powerusage_W": 86.514}]}
{"cur_time": "2024-08-20 05:54:47", "all_gpu_status": [{"device": 0, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.5882568359375, "powerusage_W": 93.461}, {"device": 1, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.5882568359375, "powerusage_W": 89.096}, {"device": 2, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.5882568359375, "powerusage_W": 94.762}, {"device": 3, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.5882568359375, "powerusage_W": 85.215}, {"device": 4, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.5882568359375, "powerusage_W": 86.216}, {"device": 5, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.5882568359375, "powerusage_W": 88.896}, {"device": 6, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.5882568359375, "powerusage_W": 86.302}]}
{"cur_time": "2024-08-20 05:54:52", "all_gpu_status": [{"device": 0, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.5882568359375, "powerusage_W": 93.454}, {"device": 1, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.5882568359375, "powerusage_W": 89.061}, {"device": 2, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.5882568359375, "powerusage_W": 94.933}, {"device": 3, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.5882568359375, "powerusage_W": 85.302}, {"device": 4, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.5882568359375, "powerusage_W": 86.108}, {"device": 5, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.5882568359375, "powerusage_W": 88.924}, {"device": 6, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.5882568359375, "powerusage_W": 86.403}]}
{"cur_time": "2024-08-20 05:54:57", "all_gpu_status": [{"device": 0, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.5882568359375, "powerusage_W": 93.476}, {"device": 1, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.5882568359375, "powerusage_W": 89.042}, {"device": 2, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.5882568359375, "powerusage_W": 94.94}, {"device": 3, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.5882568359375, "powerusage_W": 85.43}, {"device": 4, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.5882568359375, "powerusage_W": 86.101}, {"device": 5, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.5882568359375, "powerusage_W": 88.949}, {"device": 6, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.5882568359375, "powerusage_W": 86.494}]}
{"cur_time": "2024-08-20 05:55:02", "all_gpu_status": [{"device": 0, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.5882568359375, "powerusage_W": 93.606}, {"device": 1, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.5882568359375, "powerusage_W": 89.173}, {"device": 2, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.5882568359375, "powerusage_W": 95.116}, {"device": 3, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.5882568359375, "powerusage_W": 85.563}, {"device": 4, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.5882568359375, "powerusage_W": 86.198}, {"device": 5, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.5882568359375, "powerusage_W": 89.054}, {"device": 6, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.5882568359375, "powerusage_W": 86.605}]}
{"cur_time": "2024-08-20 05:55:07", "all_gpu_status": [{"device": 0, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.5882568359375, "powerusage_W": 93.707}, {"device": 1, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.5882568359375, "powerusage_W": 89.223}, {"device": 2, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.5882568359375, "powerusage_W": 95.233}, {"device": 3, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.5882568359375, "powerusage_W": 85.641}, {"device": 4, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.5882568359375, "powerusage_W": 86.24}, {"device": 5, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.5882568359375, "powerusage_W": 89.084}, {"device": 6, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.5882568359375, "powerusage_W": 86.726}]}
{"cur_time": "2024-08-20 05:55:12", "all_gpu_status": [{"device": 0, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.5882568359375, "powerusage_W": 93.749}, {"device": 1, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.5882568359375, "powerusage_W": 89.282}, {"device": 2, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.5882568359375, "powerusage_W": 95.408}, {"device": 3, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.5882568359375, "powerusage_W": 85.832}, {"device": 4, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.5882568359375, "powerusage_W": 86.356}, {"device": 5, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.5882568359375, "powerusage_W": 89.124}, {"device": 6, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.5882568359375, "powerusage_W": 86.797}]}
{"cur_time": "2024-08-20 05:55:17", "all_gpu_status": [{"device": 0, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.5882568359375, "powerusage_W": 93.837}, {"device": 1, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.5882568359375, "powerusage_W": 89.383}, {"device": 2, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.5882568359375, "powerusage_W": 95.543}, {"device": 3, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.5882568359375, "powerusage_W": 85.961}, {"device": 4, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.5882568359375, "powerusage_W": 86.468}, {"device": 5, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.5882568359375, "powerusage_W": 89.18}, {"device": 6, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.5882568359375, "powerusage_W": 86.924}]}
{"cur_time": "2024-08-20 05:55:22", "all_gpu_status": [{"device": 0, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.5882568359375, "powerusage_W": 94.009}, {"device": 1, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.5882568359375, "powerusage_W": 89.535}, {"device": 2, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.5882568359375, "powerusage_W": 95.621}, {"device": 3, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.5882568359375, "powerusage_W": 86.066}, {"device": 4, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.5882568359375, "powerusage_W": 86.539}, {"device": 5, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.5882568359375, "powerusage_W": 89.316}, {"device": 6, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.5882568359375, "powerusage_W": 86.971}]}
{"cur_time": "2024-08-20 06:03:43", "all_gpu_status": [{"device": 0, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.58502197265625, "powerusage_W": 94.105}, {"device": 1, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.58502197265625, "powerusage_W": 89.662}, {"device": 2, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.58502197265625, "powerusage_W": 95.544}, {"device": 3, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.58502197265625, "powerusage_W": 86.076}, {"device": 4, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.58502197265625, "powerusage_W": 86.668}, {"device": 5, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.58502197265625, "powerusage_W": 89.385}, {"device": 6, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.58502197265625, "powerusage_W": 86.917}]}
{"cur_time": "2024-08-20 06:03:48", "all_gpu_status": [{"device": 0, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.58502197265625, "powerusage_W": 94.086}, {"device": 1, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.58502197265625, "powerusage_W": 89.609}, {"device": 2, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.58502197265625, "powerusage_W": 95.47}, {"device": 3, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.58502197265625, "powerusage_W": 85.968}, {"device": 4, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.58502197265625, "powerusage_W": 86.603}, {"device": 5, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.58502197265625, "powerusage_W": 89.373}, {"device": 6, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.58502197265625, "powerusage_W": 86.86}]}
{"cur_time": "2024-08-20 06:03:53", "all_gpu_status": [{"device": 0, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.58502197265625, "powerusage_W": 94.047}, {"device": 1, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.58502197265625, "powerusage_W": 89.557}, {"device": 2, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.58502197265625, "powerusage_W": 95.434}, {"device": 3, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.58502197265625, "powerusage_W": 85.979}, {"device": 4, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.58502197265625, "powerusage_W": 86.584}, {"device": 5, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.58502197265625, "powerusage_W": 89.323}, {"device": 6, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.58502197265625, "powerusage_W": 86.841}]}
{"cur_time": "2024-08-20 06:03:58", "all_gpu_status": [{"device": 0, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.58502197265625, "powerusage_W": 94.047}, {"device": 1, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.58502197265625, "powerusage_W": 89.547}, {"device": 2, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.58502197265625, "powerusage_W": 95.377}, {"device": 3, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.58502197265625, "powerusage_W": 85.943}, {"device": 4, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.58502197265625, "powerusage_W": 86.59}, {"device": 5, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.58502197265625, "powerusage_W": 89.324}, {"device": 6, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.58502197265625, "powerusage_W": 86.842}]}
{"cur_time": "2024-08-20 06:04:03", "all_gpu_status": [{"device": 0, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.58502197265625, "powerusage_W": 94.001}, {"device": 1, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.58502197265625, "powerusage_W": 89.506}, {"device": 2, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.58502197265625, "powerusage_W": 95.413}, {"device": 3, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.58502197265625, "powerusage_W": 85.966}, {"device": 4, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.58502197265625, "powerusage_W": 86.54}, {"device": 5, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.58502197265625, "powerusage_W": 89.257}, {"device": 6, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.58502197265625, "powerusage_W": 86.836}]}
{"cur_time": "2024-08-20 06:04:08", "all_gpu_status": [{"device": 0, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.58502197265625, "powerusage_W": 94.021}, {"device": 1, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.58502197265625, "powerusage_W": 89.486}, {"device": 2, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.58502197265625, "powerusage_W": 95.345}, {"device": 3, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.58502197265625, "powerusage_W": 85.986}, {"device": 4, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.58502197265625, "powerusage_W": 86.52}, {"device": 5, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.58502197265625, "powerusage_W": 89.311}, {"device": 6, "total_mem_GB": 95.5771484375, "used_mem_GB": 0.58502197265625, "powerusage_W": 86.823}]}

View File

@ -0,0 +1,132 @@
08/20/2024 05:50:08 - INFO - llamafactory.cli - Initializing distributed tasks at: 127.0.0.1:25478
08/20/2024 05:50:13 - WARNING - llamafactory.hparams.parser - `ddp_find_unused_parameters` needs to be set as False for LoRA in DDP training.
08/20/2024 05:50:13 - INFO - llamafactory.hparams.parser - Process rank: 0, device: cuda:0, n_gpu: 1, distributed training: True, compute dtype: torch.bfloat16
08/20/2024 05:50:14 - INFO - llamafactory.data.loader - Loading dataset identity.json...
08/20/2024 05:50:15 - WARNING - llamafactory.hparams.parser - `ddp_find_unused_parameters` needs to be set as False for LoRA in DDP training.
08/20/2024 05:50:15 - INFO - llamafactory.hparams.parser - Process rank: 6, device: cuda:6, n_gpu: 1, distributed training: True, compute dtype: torch.bfloat16
08/20/2024 05:50:15 - WARNING - llamafactory.hparams.parser - `ddp_find_unused_parameters` needs to be set as False for LoRA in DDP training.
08/20/2024 05:50:15 - INFO - llamafactory.hparams.parser - Process rank: 5, device: cuda:5, n_gpu: 1, distributed training: True, compute dtype: torch.bfloat16
08/20/2024 05:50:15 - INFO - llamafactory.data.loader - Loading dataset alpaca_en_demo.json...
08/20/2024 05:50:15 - WARNING - llamafactory.hparams.parser - `ddp_find_unused_parameters` needs to be set as False for LoRA in DDP training.
08/20/2024 05:50:15 - INFO - llamafactory.hparams.parser - Process rank: 3, device: cuda:3, n_gpu: 1, distributed training: True, compute dtype: torch.bfloat16
08/20/2024 05:50:15 - WARNING - llamafactory.hparams.parser - `ddp_find_unused_parameters` needs to be set as False for LoRA in DDP training.
08/20/2024 05:50:15 - INFO - llamafactory.hparams.parser - Process rank: 4, device: cuda:4, n_gpu: 1, distributed training: True, compute dtype: torch.bfloat16
08/20/2024 05:50:15 - WARNING - llamafactory.hparams.parser - `ddp_find_unused_parameters` needs to be set as False for LoRA in DDP training.
08/20/2024 05:50:15 - INFO - llamafactory.hparams.parser - Process rank: 1, device: cuda:1, n_gpu: 1, distributed training: True, compute dtype: torch.bfloat16
08/20/2024 05:50:16 - WARNING - llamafactory.hparams.parser - `ddp_find_unused_parameters` needs to be set as False for LoRA in DDP training.
08/20/2024 05:50:16 - INFO - llamafactory.hparams.parser - Process rank: 2, device: cuda:2, n_gpu: 1, distributed training: True, compute dtype: torch.bfloat16
08/20/2024 05:50:22 - INFO - llamafactory.data.loader - Loading dataset identity.json...
08/20/2024 05:50:22 - INFO - llamafactory.data.loader - Loading dataset identity.json...
08/20/2024 05:50:22 - INFO - llamafactory.data.loader - Loading dataset identity.json...
08/20/2024 05:50:22 - INFO - llamafactory.data.loader - Loading dataset identity.json...
08/20/2024 05:50:22 - INFO - llamafactory.data.loader - Loading dataset identity.json...
08/20/2024 05:50:22 - INFO - llamafactory.data.loader - Loading dataset identity.json...
08/20/2024 05:50:23 - INFO - llamafactory.data.loader - Loading dataset alpaca_en_demo.json...
08/20/2024 05:50:23 - INFO - llamafactory.data.loader - Loading dataset alpaca_en_demo.json...
training example:
input_ids:
[1, 518, 25580, 29962, 7251, 518, 29914, 25580, 29962, 15043, 29991, 306, 626, 8620, 978, 11656, 385, 319, 29902, 20255, 8906, 491, 8620, 8921, 27243, 1128, 508, 306, 6985, 366, 9826, 29973, 2]
inputs:
<s> [INST] hi [/INST] Hello! I am {{name}}, an AI assistant developed by {{author}}. How can I assist you today?</s>
label_ids:
[-100, -100, -100, -100, -100, -100, -100, -100, -100, 15043, 29991, 306, 626, 8620, 978, 11656, 385, 319, 29902, 20255, 8906, 491, 8620, 8921, 27243, 1128, 508, 306, 6985, 366, 9826, 29973, 2]
labels:
Hello! I am {{name}}, an AI assistant developed by {{author}}. How can I assist you today?</s>
08/20/2024 05:50:23 - INFO - llamafactory.data.loader - Loading dataset alpaca_en_demo.json...
08/20/2024 05:50:23 - INFO - llamafactory.data.loader - Loading dataset alpaca_en_demo.json...
08/20/2024 05:50:23 - INFO - llamafactory.data.loader - Loading dataset alpaca_en_demo.json...
08/20/2024 05:50:23 - INFO - llamafactory.data.loader - Loading dataset alpaca_en_demo.json...
08/20/2024 05:51:10 - INFO - llamafactory.model.model_utils.checkpointing - Gradient checkpointing enabled.
08/20/2024 05:51:10 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.
08/20/2024 05:51:10 - INFO - llamafactory.model.adapter - Upcasting trainable params to float32.
08/20/2024 05:51:10 - INFO - llamafactory.model.adapter - Fine-tuning method: LoRA
08/20/2024 05:51:10 - INFO - llamafactory.model.model_utils.misc - Found linear modules: o_proj,k_proj,gate_proj,up_proj,v_proj,down_proj,q_proj
08/20/2024 05:51:10 - INFO - llamafactory.model.loader - trainable params: 19,988,480 || all params: 6,758,404,096 || trainable%: 0.2958
08/20/2024 05:51:11 - INFO - llamafactory.model.model_utils.checkpointing - Gradient checkpointing enabled.
08/20/2024 05:51:11 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.
08/20/2024 05:51:11 - INFO - llamafactory.model.adapter - Upcasting trainable params to float32.
08/20/2024 05:51:11 - INFO - llamafactory.model.adapter - Fine-tuning method: LoRA
08/20/2024 05:51:11 - INFO - llamafactory.model.model_utils.misc - Found linear modules: gate_proj,k_proj,o_proj,up_proj,down_proj,v_proj,q_proj
08/20/2024 05:51:11 - INFO - llamafactory.model.loader - trainable params: 19,988,480 || all params: 6,758,404,096 || trainable%: 0.2958
08/20/2024 05:51:15 - INFO - llamafactory.model.model_utils.checkpointing - Gradient checkpointing enabled.
08/20/2024 05:51:15 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.
08/20/2024 05:51:15 - INFO - llamafactory.model.adapter - Upcasting trainable params to float32.
08/20/2024 05:51:15 - INFO - llamafactory.model.adapter - Fine-tuning method: LoRA
08/20/2024 05:51:15 - INFO - llamafactory.model.model_utils.checkpointing - Gradient checkpointing enabled.
08/20/2024 05:51:15 - INFO - llamafactory.model.model_utils.checkpointing - Gradient checkpointing enabled.
08/20/2024 05:51:15 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.
08/20/2024 05:51:15 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.
08/20/2024 05:51:15 - INFO - llamafactory.model.adapter - Upcasting trainable params to float32.
08/20/2024 05:51:15 - INFO - llamafactory.model.adapter - Fine-tuning method: LoRA
08/20/2024 05:51:15 - INFO - llamafactory.model.adapter - Upcasting trainable params to float32.
08/20/2024 05:51:15 - INFO - llamafactory.model.adapter - Fine-tuning method: LoRA
08/20/2024 05:51:15 - INFO - llamafactory.model.model_utils.misc - Found linear modules: q_proj,v_proj,down_proj,gate_proj,k_proj,o_proj,up_proj
08/20/2024 05:51:15 - INFO - llamafactory.model.model_utils.checkpointing - Gradient checkpointing enabled.
08/20/2024 05:51:15 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.
08/20/2024 05:51:15 - INFO - llamafactory.model.adapter - Upcasting trainable params to float32.
08/20/2024 05:51:15 - INFO - llamafactory.model.adapter - Fine-tuning method: LoRA
08/20/2024 05:51:15 - INFO - llamafactory.model.model_utils.misc - Found linear modules: gate_proj,v_proj,o_proj,k_proj,down_proj,up_proj,q_proj
08/20/2024 05:51:15 - INFO - llamafactory.model.model_utils.misc - Found linear modules: up_proj,gate_proj,o_proj,q_proj,down_proj,k_proj,v_proj
08/20/2024 05:51:15 - INFO - llamafactory.model.model_utils.misc - Found linear modules: o_proj,k_proj,q_proj,down_proj,up_proj,gate_proj,v_proj
08/20/2024 05:51:15 - INFO - llamafactory.model.model_utils.checkpointing - Gradient checkpointing enabled.
08/20/2024 05:51:15 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.
08/20/2024 05:51:15 - INFO - llamafactory.model.adapter - Upcasting trainable params to float32.
08/20/2024 05:51:15 - INFO - llamafactory.model.adapter - Fine-tuning method: LoRA
08/20/2024 05:51:15 - INFO - llamafactory.model.model_utils.misc - Found linear modules: o_proj,q_proj,gate_proj,up_proj,down_proj,v_proj,k_proj
08/20/2024 05:51:15 - INFO - llamafactory.model.loader - trainable params: 19,988,480 || all params: 6,758,404,096 || trainable%: 0.2958
08/20/2024 05:51:15 - INFO - llamafactory.model.loader - trainable params: 19,988,480 || all params: 6,758,404,096 || trainable%: 0.2958
08/20/2024 05:51:15 - INFO - llamafactory.model.loader - trainable params: 19,988,480 || all params: 6,758,404,096 || trainable%: 0.2958
08/20/2024 05:51:15 - INFO - llamafactory.model.loader - trainable params: 19,988,480 || all params: 6,758,404,096 || trainable%: 0.2958
08/20/2024 05:51:15 - INFO - llamafactory.model.loader - trainable params: 19,988,480 || all params: 6,758,404,096 || trainable%: 0.2958
{'loss': 1.2, 'grad_norm': 0.21601451933383942, 'learning_rate': 3e-05, 'epoch': 0.34, 'num_input_tokens_seen': 97680}
{'loss': 1.1929, 'grad_norm': 0.3155970871448517, 'learning_rate': 6e-05, 'epoch': 0.68, 'num_input_tokens_seen': 190736}
{'loss': 1.2289, 'grad_norm': 0.2989441156387329, 'learning_rate': 9e-05, 'epoch': 1.01, 'num_input_tokens_seen': 276880}
{'loss': 1.1455, 'grad_norm': 0.42915046215057373, 'learning_rate': 9.987820251299122e-05, 'epoch': 1.35, 'num_input_tokens_seen': 369776}
{'loss': 1.0487, 'grad_norm': 0.2830324172973633, 'learning_rate': 9.924038765061042e-05, 'epoch': 1.69, 'num_input_tokens_seen': 461200}
{'loss': 1.0015, 'grad_norm': 0.3625435531139374, 'learning_rate': 9.806308479691595e-05, 'epoch': 2.03, 'num_input_tokens_seen': 551424}
{'loss': 0.9574, 'grad_norm': 0.22050900757312775, 'learning_rate': 9.635919272833938e-05, 'epoch': 2.37, 'num_input_tokens_seen': 649520}
{'loss': 0.9658, 'grad_norm': 0.23838457465171814, 'learning_rate': 9.414737964294636e-05, 'epoch': 2.7, 'num_input_tokens_seen': 742832}
{'loss': 0.9311, 'grad_norm': 0.31824183464050293, 'learning_rate': 9.145187862775209e-05, 'epoch': 3.04, 'num_input_tokens_seen': 827888}
{'loss': 0.9186, 'grad_norm': 0.22274430096149445, 'learning_rate': 8.83022221559489e-05, 'epoch': 3.38, 'num_input_tokens_seen': 921920}
{'loss': 0.9014, 'grad_norm': 0.2516893446445465, 'learning_rate': 8.473291852294987e-05, 'epoch': 3.72, 'num_input_tokens_seen': 1010144}
{'loss': 0.8781, 'grad_norm': 0.18256805837154388, 'learning_rate': 8.07830737662829e-05, 'epoch': 4.06, 'num_input_tokens_seen': 1108752}
{'loss': 0.882, 'grad_norm': 0.21118617057800293, 'learning_rate': 7.649596321166024e-05, 'epoch': 4.39, 'num_input_tokens_seen': 1206192}
{'loss': 0.8807, 'grad_norm': 0.2069379687309265, 'learning_rate': 7.191855733945387e-05, 'epoch': 4.73, 'num_input_tokens_seen': 1294512}
{'loss': 0.8613, 'grad_norm': 0.21595001220703125, 'learning_rate': 6.710100716628344e-05, 'epoch': 5.07, 'num_input_tokens_seen': 1386112}
{'loss': 0.8721, 'grad_norm': 0.16913726925849915, 'learning_rate': 6.209609477998338e-05, 'epoch': 5.41, 'num_input_tokens_seen': 1478224}
{'loss': 0.8438, 'grad_norm': 0.23615054786205292, 'learning_rate': 5.695865504800327e-05, 'epoch': 5.75, 'num_input_tokens_seen': 1576032}
{'loss': 0.8324, 'grad_norm': 0.2467828243970871, 'learning_rate': 5.174497483512506e-05, 'epoch': 6.08, 'num_input_tokens_seen': 1670720}
{'loss': 0.8344, 'grad_norm': 0.21930496394634247, 'learning_rate': 4.6512176312793736e-05, 'epoch': 6.42, 'num_input_tokens_seen': 1761184}
{'loss': 0.7998, 'grad_norm': 0.26676416397094727, 'learning_rate': 4.131759111665349e-05, 'epoch': 6.76, 'num_input_tokens_seen': 1852800}
{'loss': 0.8086, 'grad_norm': 0.27332258224487305, 'learning_rate': 3.6218132209150045e-05, 'epoch': 7.1, 'num_input_tokens_seen': 1948992}
{'loss': 0.8231, 'grad_norm': 0.22892576456069946, 'learning_rate': 3.12696703292044e-05, 'epoch': 7.44, 'num_input_tokens_seen': 2043936}
{'loss': 0.7765, 'grad_norm': 0.27078768610954285, 'learning_rate': 2.6526421860705473e-05, 'epoch': 7.77, 'num_input_tokens_seen': 2135248}
{'loss': 0.8021, 'grad_norm': 0.25509998202323914, 'learning_rate': 2.2040354826462668e-05, 'epoch': 8.11, 'num_input_tokens_seen': 2236064}
{'loss': 0.7974, 'grad_norm': 0.2659051716327667, 'learning_rate': 1.7860619515673033e-05, 'epoch': 8.45, 'num_input_tokens_seen': 2324464}
{'loss': 0.7794, 'grad_norm': 0.27531927824020386, 'learning_rate': 1.4033009983067452e-05, 'epoch': 8.79, 'num_input_tokens_seen': 2416208}
{'loss': 0.7567, 'grad_norm': 0.3656441271305084, 'learning_rate': 1.0599462319663905e-05, 'epoch': 9.13, 'num_input_tokens_seen': 2506000}
{'loss': 0.8035, 'grad_norm': 0.2913309335708618, 'learning_rate': 7.597595192178702e-06, 'epoch': 9.46, 'num_input_tokens_seen': 2600192}
{'loss': 0.7661, 'grad_norm': 0.2591525912284851, 'learning_rate': 5.060297685041659e-06, 'epoch': 9.8, 'num_input_tokens_seen': 2690464}
{'loss': 0.7733, 'grad_norm': 0.28467172384262085, 'learning_rate': 3.0153689607045845e-06, 'epoch': 10.14, 'num_input_tokens_seen': 2783632}
{'loss': 0.7607, 'grad_norm': 0.2611902952194214, 'learning_rate': 1.4852136862001764e-06, 'epoch': 10.48, 'num_input_tokens_seen': 2881376}
{'loss': 0.7535, 'grad_norm': 0.31149354577064514, 'learning_rate': 4.865965629214819e-07, 'epoch': 10.82, 'num_input_tokens_seen': 2965632}
{'loss': 0.7747, 'grad_norm': 0.2638636827468872, 'learning_rate': 3.04586490452119e-08, 'epoch': 11.15, 'num_input_tokens_seen': 3056208}
{'train_runtime': 176.2337, 'train_samples_per_second': 63.552, 'train_steps_per_second': 0.567, 'train_loss': 0.8875525349378586, 'epoch': 11.27, 'num_input_tokens_seen': 3088432}
***** train metrics *****
epoch = 11.2676
num_input_tokens_seen = 3088432
total_flos = 114374223GF
train_loss = 0.8876
train_runtime = 0:02:56.23
train_samples_per_second = 63.552
train_steps_per_second = 0.567
Figure saved at: ./results/lora_sft/Llama2-7B_2/llama2_lora_sft_1_test_token/training_loss.png
08/20/2024 05:54:12 - WARNING - llamafactory.extras.ploting - No metric eval_loss to plot.
08/20/2024 05:54:12 - WARNING - llamafactory.extras.ploting - No metric eval_accuracy to plot.
***** eval metrics *****
epoch = 11.2676
eval_loss = 0.9711
eval_runtime = 0:00:00.52
eval_samples_per_second = 210.469
eval_steps_per_second = 15.307
num_input_tokens_seen = 3088432

View File

@ -0,0 +1,65 @@
---
base_model: /home/user/.cache/modelscope/hub/modelscope/Llama-2-7b-ms
library_name: peft
license: other
tags:
- llama-factory
- lora
- generated_from_trainer
model-index:
- name: llama2_lora_sft_1_test_token
results: []
---
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
should probably proofread and complete it, then remove this comment. -->
# llama2_lora_sft_1_test_token
This model is a fine-tuned version of [/home/user/.cache/modelscope/hub/modelscope/Llama-2-7b-ms](https://huggingface.co//home/user/.cache/modelscope/hub/modelscope/Llama-2-7b-ms) on the identity and the alpaca_en_demo datasets.
It achieves the following results on the evaluation set:
- Loss: 0.9711
- Num Input Tokens Seen: 3088432
## Model description
More information needed
## Intended uses & limitations
More information needed
## Training and evaluation data
More information needed
## Training procedure
### Training hyperparameters
The following hyperparameters were used during training:
- learning_rate: 0.0001
- train_batch_size: 2
- eval_batch_size: 2
- seed: 42
- distributed_type: multi-GPU
- num_devices: 7
- gradient_accumulation_steps: 8
- total_train_batch_size: 112
- total_eval_batch_size: 14
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
- lr_scheduler_type: cosine
- lr_scheduler_warmup_ratio: 0.1
- training_steps: 100
### Training results
### Framework versions
- PEFT 0.12.0
- Transformers 4.43.4
- Pytorch 2.4.0+cu121
- Datasets 2.20.0
- Tokenizers 0.19.1

View File

@ -0,0 +1,34 @@
{
"alpha_pattern": {},
"auto_mapping": null,
"base_model_name_or_path": "/home/user/.cache/modelscope/hub/modelscope/Llama-2-7b-ms",
"bias": "none",
"fan_in_fan_out": false,
"inference_mode": true,
"init_lora_weights": true,
"layer_replication": null,
"layers_pattern": null,
"layers_to_transform": null,
"loftq_config": {},
"lora_alpha": 16,
"lora_dropout": 0.0,
"megatron_config": null,
"megatron_core": "megatron.core",
"modules_to_save": null,
"peft_type": "LORA",
"r": 8,
"rank_pattern": {},
"revision": null,
"target_modules": [
"o_proj",
"k_proj",
"gate_proj",
"up_proj",
"v_proj",
"down_proj",
"q_proj"
],
"task_type": "CAUSAL_LM",
"use_dora": false,
"use_rslora": false
}

View File

@ -0,0 +1,3 @@
{
"<pad>": 32000
}

View File

@ -0,0 +1,13 @@
{
"epoch": 11.267605633802816,
"eval_loss": 0.9710721969604492,
"eval_runtime": 0.5226,
"eval_samples_per_second": 210.469,
"eval_steps_per_second": 15.307,
"num_input_tokens_seen": 3088432,
"total_flos": 1.2280838762790912e+17,
"train_loss": 0.8875525349378586,
"train_runtime": 176.2337,
"train_samples_per_second": 63.552,
"train_steps_per_second": 0.567
}

View File

@ -0,0 +1,202 @@
---
base_model: /home/user/.cache/modelscope/hub/modelscope/Llama-2-7b-ms
library_name: peft
---
# Model Card for Model ID
<!-- Provide a quick summary of what the model is/does. -->
## Model Details
### Model Description
<!-- Provide a longer summary of what this model is. -->
- **Developed by:** [More Information Needed]
- **Funded by [optional]:** [More Information Needed]
- **Shared by [optional]:** [More Information Needed]
- **Model type:** [More Information Needed]
- **Language(s) (NLP):** [More Information Needed]
- **License:** [More Information Needed]
- **Finetuned from model [optional]:** [More Information Needed]
### Model Sources [optional]
<!-- Provide the basic links for the model. -->
- **Repository:** [More Information Needed]
- **Paper [optional]:** [More Information Needed]
- **Demo [optional]:** [More Information Needed]
## Uses
<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
### Direct Use
<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
[More Information Needed]
### Downstream Use [optional]
<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
[More Information Needed]
### Out-of-Scope Use
<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
[More Information Needed]
## Bias, Risks, and Limitations
<!-- This section is meant to convey both technical and sociotechnical limitations. -->
[More Information Needed]
### Recommendations
<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
## How to Get Started with the Model
Use the code below to get started with the model.
[More Information Needed]
## Training Details
### Training Data
<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
[More Information Needed]
### Training Procedure
<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
#### Preprocessing [optional]
[More Information Needed]
#### Training Hyperparameters
- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
#### Speeds, Sizes, Times [optional]
<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
[More Information Needed]
## Evaluation
<!-- This section describes the evaluation protocols and provides the results. -->
### Testing Data, Factors & Metrics
#### Testing Data
<!-- This should link to a Dataset Card if possible. -->
[More Information Needed]
#### Factors
<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
[More Information Needed]
#### Metrics
<!-- These are the evaluation metrics being used, ideally with a description of why. -->
[More Information Needed]
### Results
[More Information Needed]
#### Summary
## Model Examination [optional]
<!-- Relevant interpretability work for the model goes here -->
[More Information Needed]
## Environmental Impact
<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
- **Hardware Type:** [More Information Needed]
- **Hours used:** [More Information Needed]
- **Cloud Provider:** [More Information Needed]
- **Compute Region:** [More Information Needed]
- **Carbon Emitted:** [More Information Needed]
## Technical Specifications [optional]
### Model Architecture and Objective
[More Information Needed]
### Compute Infrastructure
[More Information Needed]
#### Hardware
[More Information Needed]
#### Software
[More Information Needed]
## Citation [optional]
<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
**BibTeX:**
[More Information Needed]
**APA:**
[More Information Needed]
## Glossary [optional]
<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
[More Information Needed]
## More Information [optional]
[More Information Needed]
## Model Card Authors [optional]
[More Information Needed]
## Model Card Contact
[More Information Needed]
### Framework versions
- PEFT 0.12.0

View File

@ -0,0 +1,34 @@
{
"alpha_pattern": {},
"auto_mapping": null,
"base_model_name_or_path": "/home/user/.cache/modelscope/hub/modelscope/Llama-2-7b-ms",
"bias": "none",
"fan_in_fan_out": false,
"inference_mode": true,
"init_lora_weights": true,
"layer_replication": null,
"layers_pattern": null,
"layers_to_transform": null,
"loftq_config": {},
"lora_alpha": 16,
"lora_dropout": 0.0,
"megatron_config": null,
"megatron_core": "megatron.core",
"modules_to_save": null,
"peft_type": "LORA",
"r": 8,
"rank_pattern": {},
"revision": null,
"target_modules": [
"o_proj",
"k_proj",
"gate_proj",
"up_proj",
"v_proj",
"down_proj",
"q_proj"
],
"task_type": "CAUSAL_LM",
"use_dora": false,
"use_rslora": false
}

View File

@ -0,0 +1,3 @@
{
"<pad>": 32000
}

View File

@ -0,0 +1,30 @@
{
"bos_token": {
"content": "<s>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false
},
"eos_token": {
"content": "</s>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false
},
"pad_token": {
"content": "<unk>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false
},
"unk_token": {
"content": "<unk>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false
}
}

View File

@ -0,0 +1,52 @@
{
"add_bos_token": true,
"add_eos_token": false,
"add_prefix_space": null,
"added_tokens_decoder": {
"0": {
"content": "<unk>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": true
},
"1": {
"content": "<s>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": true
},
"2": {
"content": "</s>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": true
},
"32000": {
"content": "<pad>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
}
},
"bos_token": "<s>",
"chat_template": "{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{% endif %}{% for message in messages %}{% set content = message['content'] %}{% if loop.index0 == 0 and system_message is defined %}{% set content = '<<SYS>>\n' + system_message + '\n<</SYS>>\n\n' + message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ '<s>' + '[INST] ' + content + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ content + '</s>' }}{% endif %}{% endfor %}",
"clean_up_tokenization_spaces": false,
"eos_token": "</s>",
"legacy": false,
"model_max_length": 1000000000000000019884624838656,
"pad_token": "<unk>",
"padding_side": "right",
"sp_model_kwargs": {},
"split_special_tokens": false,
"tokenizer_class": "LlamaTokenizer",
"unk_token": "<unk>",
"use_default_system_prompt": false
}

View File

@ -0,0 +1,297 @@
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 11.267605633802816,
"eval_steps": 500,
"global_step": 100,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.3380281690140845,
"grad_norm": 0.21601451933383942,
"learning_rate": 3e-05,
"loss": 1.2,
"num_input_tokens_seen": 97680,
"step": 3
},
{
"epoch": 0.676056338028169,
"grad_norm": 0.3155970871448517,
"learning_rate": 6e-05,
"loss": 1.1929,
"num_input_tokens_seen": 190736,
"step": 6
},
{
"epoch": 1.0140845070422535,
"grad_norm": 0.2989441156387329,
"learning_rate": 9e-05,
"loss": 1.2289,
"num_input_tokens_seen": 276880,
"step": 9
},
{
"epoch": 1.352112676056338,
"grad_norm": 0.42915046215057373,
"learning_rate": 9.987820251299122e-05,
"loss": 1.1455,
"num_input_tokens_seen": 369776,
"step": 12
},
{
"epoch": 1.6901408450704225,
"grad_norm": 0.2830324172973633,
"learning_rate": 9.924038765061042e-05,
"loss": 1.0487,
"num_input_tokens_seen": 461200,
"step": 15
},
{
"epoch": 2.028169014084507,
"grad_norm": 0.3625435531139374,
"learning_rate": 9.806308479691595e-05,
"loss": 1.0015,
"num_input_tokens_seen": 551424,
"step": 18
},
{
"epoch": 2.3661971830985915,
"grad_norm": 0.22050900757312775,
"learning_rate": 9.635919272833938e-05,
"loss": 0.9574,
"num_input_tokens_seen": 649520,
"step": 21
},
{
"epoch": 2.704225352112676,
"grad_norm": 0.23838457465171814,
"learning_rate": 9.414737964294636e-05,
"loss": 0.9658,
"num_input_tokens_seen": 742832,
"step": 24
},
{
"epoch": 3.0422535211267605,
"grad_norm": 0.31824183464050293,
"learning_rate": 9.145187862775209e-05,
"loss": 0.9311,
"num_input_tokens_seen": 827888,
"step": 27
},
{
"epoch": 3.380281690140845,
"grad_norm": 0.22274430096149445,
"learning_rate": 8.83022221559489e-05,
"loss": 0.9186,
"num_input_tokens_seen": 921920,
"step": 30
},
{
"epoch": 3.7183098591549295,
"grad_norm": 0.2516893446445465,
"learning_rate": 8.473291852294987e-05,
"loss": 0.9014,
"num_input_tokens_seen": 1010144,
"step": 33
},
{
"epoch": 4.056338028169014,
"grad_norm": 0.18256805837154388,
"learning_rate": 8.07830737662829e-05,
"loss": 0.8781,
"num_input_tokens_seen": 1108752,
"step": 36
},
{
"epoch": 4.394366197183099,
"grad_norm": 0.21118617057800293,
"learning_rate": 7.649596321166024e-05,
"loss": 0.882,
"num_input_tokens_seen": 1206192,
"step": 39
},
{
"epoch": 4.732394366197183,
"grad_norm": 0.2069379687309265,
"learning_rate": 7.191855733945387e-05,
"loss": 0.8807,
"num_input_tokens_seen": 1294512,
"step": 42
},
{
"epoch": 5.070422535211268,
"grad_norm": 0.21595001220703125,
"learning_rate": 6.710100716628344e-05,
"loss": 0.8613,
"num_input_tokens_seen": 1386112,
"step": 45
},
{
"epoch": 5.408450704225352,
"grad_norm": 0.16913726925849915,
"learning_rate": 6.209609477998338e-05,
"loss": 0.8721,
"num_input_tokens_seen": 1478224,
"step": 48
},
{
"epoch": 5.746478873239437,
"grad_norm": 0.23615054786205292,
"learning_rate": 5.695865504800327e-05,
"loss": 0.8438,
"num_input_tokens_seen": 1576032,
"step": 51
},
{
"epoch": 6.084507042253521,
"grad_norm": 0.2467828243970871,
"learning_rate": 5.174497483512506e-05,
"loss": 0.8324,
"num_input_tokens_seen": 1670720,
"step": 54
},
{
"epoch": 6.422535211267606,
"grad_norm": 0.21930496394634247,
"learning_rate": 4.6512176312793736e-05,
"loss": 0.8344,
"num_input_tokens_seen": 1761184,
"step": 57
},
{
"epoch": 6.76056338028169,
"grad_norm": 0.26676416397094727,
"learning_rate": 4.131759111665349e-05,
"loss": 0.7998,
"num_input_tokens_seen": 1852800,
"step": 60
},
{
"epoch": 7.098591549295775,
"grad_norm": 0.27332258224487305,
"learning_rate": 3.6218132209150045e-05,
"loss": 0.8086,
"num_input_tokens_seen": 1948992,
"step": 63
},
{
"epoch": 7.436619718309859,
"grad_norm": 0.22892576456069946,
"learning_rate": 3.12696703292044e-05,
"loss": 0.8231,
"num_input_tokens_seen": 2043936,
"step": 66
},
{
"epoch": 7.774647887323944,
"grad_norm": 0.27078768610954285,
"learning_rate": 2.6526421860705473e-05,
"loss": 0.7765,
"num_input_tokens_seen": 2135248,
"step": 69
},
{
"epoch": 8.112676056338028,
"grad_norm": 0.25509998202323914,
"learning_rate": 2.2040354826462668e-05,
"loss": 0.8021,
"num_input_tokens_seen": 2236064,
"step": 72
},
{
"epoch": 8.450704225352112,
"grad_norm": 0.2659051716327667,
"learning_rate": 1.7860619515673033e-05,
"loss": 0.7974,
"num_input_tokens_seen": 2324464,
"step": 75
},
{
"epoch": 8.788732394366198,
"grad_norm": 0.27531927824020386,
"learning_rate": 1.4033009983067452e-05,
"loss": 0.7794,
"num_input_tokens_seen": 2416208,
"step": 78
},
{
"epoch": 9.126760563380282,
"grad_norm": 0.3656441271305084,
"learning_rate": 1.0599462319663905e-05,
"loss": 0.7567,
"num_input_tokens_seen": 2506000,
"step": 81
},
{
"epoch": 9.464788732394366,
"grad_norm": 0.2913309335708618,
"learning_rate": 7.597595192178702e-06,
"loss": 0.8035,
"num_input_tokens_seen": 2600192,
"step": 84
},
{
"epoch": 9.80281690140845,
"grad_norm": 0.2591525912284851,
"learning_rate": 5.060297685041659e-06,
"loss": 0.7661,
"num_input_tokens_seen": 2690464,
"step": 87
},
{
"epoch": 10.140845070422536,
"grad_norm": 0.28467172384262085,
"learning_rate": 3.0153689607045845e-06,
"loss": 0.7733,
"num_input_tokens_seen": 2783632,
"step": 90
},
{
"epoch": 10.47887323943662,
"grad_norm": 0.2611902952194214,
"learning_rate": 1.4852136862001764e-06,
"loss": 0.7607,
"num_input_tokens_seen": 2881376,
"step": 93
},
{
"epoch": 10.816901408450704,
"grad_norm": 0.31149354577064514,
"learning_rate": 4.865965629214819e-07,
"loss": 0.7535,
"num_input_tokens_seen": 2965632,
"step": 96
},
{
"epoch": 11.154929577464788,
"grad_norm": 0.2638636827468872,
"learning_rate": 3.04586490452119e-08,
"loss": 0.7747,
"num_input_tokens_seen": 3056208,
"step": 99
}
],
"logging_steps": 3,
"max_steps": 100,
"num_input_tokens_seen": 3088432,
"num_train_epochs": 13,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.2280838762790912e+17,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}

View File

@ -0,0 +1,8 @@
{
"epoch": 11.267605633802816,
"eval_loss": 0.9710721969604492,
"eval_runtime": 0.5226,
"eval_samples_per_second": 210.469,
"eval_steps_per_second": 15.307,
"num_input_tokens_seen": 3088432
}

View File

@ -0,0 +1,30 @@
{
"bos_token": {
"content": "<s>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false
},
"eos_token": {
"content": "</s>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false
},
"pad_token": {
"content": "<unk>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false
},
"unk_token": {
"content": "<unk>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,52 @@
{
"add_bos_token": true,
"add_eos_token": false,
"add_prefix_space": null,
"added_tokens_decoder": {
"0": {
"content": "<unk>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": true
},
"1": {
"content": "<s>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": true
},
"2": {
"content": "</s>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": true
},
"32000": {
"content": "<pad>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
}
},
"bos_token": "<s>",
"chat_template": "{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{% endif %}{% for message in messages %}{% set content = message['content'] %}{% if loop.index0 == 0 and system_message is defined %}{% set content = '<<SYS>>\n' + system_message + '\n<</SYS>>\n\n' + message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ '<s>' + '[INST] ' + content + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ content + '</s>' }}{% endif %}{% endfor %}",
"clean_up_tokenization_spaces": false,
"eos_token": "</s>",
"legacy": false,
"model_max_length": 1000000000000000019884624838656,
"pad_token": "<unk>",
"padding_side": "right",
"sp_model_kwargs": {},
"split_special_tokens": false,
"tokenizer_class": "LlamaTokenizer",
"unk_token": "<unk>",
"use_default_system_prompt": false
}

View File

@ -0,0 +1,9 @@
{
"epoch": 11.267605633802816,
"num_input_tokens_seen": 3088432,
"total_flos": 1.2280838762790912e+17,
"train_loss": 0.8875525349378586,
"train_runtime": 176.2337,
"train_samples_per_second": 63.552,
"train_steps_per_second": 0.567
}

View File

@ -0,0 +1,34 @@
{"current_steps": 3, "total_steps": 100, "loss": 1.2, "learning_rate": 3e-05, "epoch": 0.3380281690140845, "percentage": 3.0, "elapsed_time": "0:00:05", "remaining_time": "0:03:03", "throughput": "17207.49", "total_tokens": 97680}
{"current_steps": 6, "total_steps": 100, "loss": 1.1929, "learning_rate": 6e-05, "epoch": 0.676056338028169, "percentage": 6.0, "elapsed_time": "0:00:10", "remaining_time": "0:02:51", "throughput": "17393.63", "total_tokens": 190736}
{"current_steps": 9, "total_steps": 100, "loss": 1.2289, "learning_rate": 9e-05, "epoch": 1.0140845070422535, "percentage": 9.0, "elapsed_time": "0:00:16", "remaining_time": "0:02:44", "throughput": "17051.31", "total_tokens": 276880}
{"current_steps": 12, "total_steps": 100, "loss": 1.1455, "learning_rate": 9.987820251299122e-05, "epoch": 1.352112676056338, "percentage": 12.0, "elapsed_time": "0:00:21", "remaining_time": "0:02:38", "throughput": "17085.10", "total_tokens": 369776}
{"current_steps": 15, "total_steps": 100, "loss": 1.0487, "learning_rate": 9.924038765061042e-05, "epoch": 1.6901408450704225, "percentage": 15.0, "elapsed_time": "0:00:27", "remaining_time": "0:02:33", "throughput": "17070.24", "total_tokens": 461200}
{"current_steps": 18, "total_steps": 100, "loss": 1.0015, "learning_rate": 9.806308479691595e-05, "epoch": 2.028169014084507, "percentage": 18.0, "elapsed_time": "0:00:32", "remaining_time": "0:02:26", "throughput": "17143.57", "total_tokens": 551424}
{"current_steps": 21, "total_steps": 100, "loss": 0.9574, "learning_rate": 9.635919272833938e-05, "epoch": 2.3661971830985915, "percentage": 21.0, "elapsed_time": "0:00:37", "remaining_time": "0:02:21", "throughput": "17218.05", "total_tokens": 649520}
{"current_steps": 24, "total_steps": 100, "loss": 0.9658, "learning_rate": 9.414737964294636e-05, "epoch": 2.704225352112676, "percentage": 24.0, "elapsed_time": "0:00:42", "remaining_time": "0:02:15", "throughput": "17359.47", "total_tokens": 742832}
{"current_steps": 27, "total_steps": 100, "loss": 0.9311, "learning_rate": 9.145187862775209e-05, "epoch": 3.0422535211267605, "percentage": 27.0, "elapsed_time": "0:00:47", "remaining_time": "0:02:09", "throughput": "17260.99", "total_tokens": 827888}
{"current_steps": 30, "total_steps": 100, "loss": 0.9186, "learning_rate": 8.83022221559489e-05, "epoch": 3.380281690140845, "percentage": 30.0, "elapsed_time": "0:00:53", "remaining_time": "0:02:04", "throughput": "17319.22", "total_tokens": 921920}
{"current_steps": 33, "total_steps": 100, "loss": 0.9014, "learning_rate": 8.473291852294987e-05, "epoch": 3.7183098591549295, "percentage": 33.0, "elapsed_time": "0:00:58", "remaining_time": "0:01:58", "throughput": "17285.32", "total_tokens": 1010144}
{"current_steps": 36, "total_steps": 100, "loss": 0.8781, "learning_rate": 8.07830737662829e-05, "epoch": 4.056338028169014, "percentage": 36.0, "elapsed_time": "0:01:03", "remaining_time": "0:01:53", "throughput": "17363.03", "total_tokens": 1108752}
{"current_steps": 39, "total_steps": 100, "loss": 0.882, "learning_rate": 7.649596321166024e-05, "epoch": 4.394366197183099, "percentage": 39.0, "elapsed_time": "0:01:09", "remaining_time": "0:01:48", "throughput": "17367.71", "total_tokens": 1206192}
{"current_steps": 42, "total_steps": 100, "loss": 0.8807, "learning_rate": 7.191855733945387e-05, "epoch": 4.732394366197183, "percentage": 42.0, "elapsed_time": "0:01:14", "remaining_time": "0:01:42", "throughput": "17419.11", "total_tokens": 1294512}
{"current_steps": 45, "total_steps": 100, "loss": 0.8613, "learning_rate": 6.710100716628344e-05, "epoch": 5.070422535211268, "percentage": 45.0, "elapsed_time": "0:01:19", "remaining_time": "0:01:37", "throughput": "17454.51", "total_tokens": 1386112}
{"current_steps": 48, "total_steps": 100, "loss": 0.8721, "learning_rate": 6.209609477998338e-05, "epoch": 5.408450704225352, "percentage": 48.0, "elapsed_time": "0:01:24", "remaining_time": "0:01:31", "throughput": "17514.67", "total_tokens": 1478224}
{"current_steps": 51, "total_steps": 100, "loss": 0.8438, "learning_rate": 5.695865504800327e-05, "epoch": 5.746478873239437, "percentage": 51.0, "elapsed_time": "0:01:29", "remaining_time": "0:01:26", "throughput": "17531.15", "total_tokens": 1576032}
{"current_steps": 54, "total_steps": 100, "loss": 0.8324, "learning_rate": 5.174497483512506e-05, "epoch": 6.084507042253521, "percentage": 54.0, "elapsed_time": "0:01:35", "remaining_time": "0:01:21", "throughput": "17532.01", "total_tokens": 1670720}
{"current_steps": 57, "total_steps": 100, "loss": 0.8344, "learning_rate": 4.6512176312793736e-05, "epoch": 6.422535211267606, "percentage": 57.0, "elapsed_time": "0:01:40", "remaining_time": "0:01:15", "throughput": "17540.81", "total_tokens": 1761184}
{"current_steps": 60, "total_steps": 100, "loss": 0.7998, "learning_rate": 4.131759111665349e-05, "epoch": 6.76056338028169, "percentage": 60.0, "elapsed_time": "0:01:45", "remaining_time": "0:01:10", "throughput": "17548.28", "total_tokens": 1852800}
{"current_steps": 63, "total_steps": 100, "loss": 0.8086, "learning_rate": 3.6218132209150045e-05, "epoch": 7.098591549295775, "percentage": 63.0, "elapsed_time": "0:01:50", "remaining_time": "0:01:05", "throughput": "17572.71", "total_tokens": 1948992}
{"current_steps": 66, "total_steps": 100, "loss": 0.8231, "learning_rate": 3.12696703292044e-05, "epoch": 7.436619718309859, "percentage": 66.0, "elapsed_time": "0:01:56", "remaining_time": "0:00:59", "throughput": "17582.50", "total_tokens": 2043936}
{"current_steps": 69, "total_steps": 100, "loss": 0.7765, "learning_rate": 2.6526421860705473e-05, "epoch": 7.774647887323944, "percentage": 69.0, "elapsed_time": "0:02:01", "remaining_time": "0:00:54", "throughput": "17570.95", "total_tokens": 2135248}
{"current_steps": 72, "total_steps": 100, "loss": 0.8021, "learning_rate": 2.2040354826462668e-05, "epoch": 8.112676056338028, "percentage": 72.0, "elapsed_time": "0:02:06", "remaining_time": "0:00:49", "throughput": "17632.72", "total_tokens": 2236064}
{"current_steps": 75, "total_steps": 100, "loss": 0.7974, "learning_rate": 1.7860619515673033e-05, "epoch": 8.450704225352112, "percentage": 75.0, "elapsed_time": "0:02:12", "remaining_time": "0:00:44", "throughput": "17588.00", "total_tokens": 2324464}
{"current_steps": 78, "total_steps": 100, "loss": 0.7794, "learning_rate": 1.4033009983067452e-05, "epoch": 8.788732394366198, "percentage": 78.0, "elapsed_time": "0:02:17", "remaining_time": "0:00:38", "throughput": "17593.05", "total_tokens": 2416208}
{"current_steps": 81, "total_steps": 100, "loss": 0.7567, "learning_rate": 1.0599462319663905e-05, "epoch": 9.126760563380282, "percentage": 81.0, "elapsed_time": "0:02:22", "remaining_time": "0:00:33", "throughput": "17568.59", "total_tokens": 2506000}
{"current_steps": 84, "total_steps": 100, "loss": 0.8035, "learning_rate": 7.597595192178702e-06, "epoch": 9.464788732394366, "percentage": 84.0, "elapsed_time": "0:02:27", "remaining_time": "0:00:28", "throughput": "17587.59", "total_tokens": 2600192}
{"current_steps": 87, "total_steps": 100, "loss": 0.7661, "learning_rate": 5.060297685041659e-06, "epoch": 9.80281690140845, "percentage": 87.0, "elapsed_time": "0:02:33", "remaining_time": "0:00:22", "throughput": "17555.63", "total_tokens": 2690464}
{"current_steps": 90, "total_steps": 100, "loss": 0.7733, "learning_rate": 3.0153689607045845e-06, "epoch": 10.140845070422536, "percentage": 90.0, "elapsed_time": "0:02:38", "remaining_time": "0:00:17", "throughput": "17560.40", "total_tokens": 2783632}
{"current_steps": 93, "total_steps": 100, "loss": 0.7607, "learning_rate": 1.4852136862001764e-06, "epoch": 10.47887323943662, "percentage": 93.0, "elapsed_time": "0:02:44", "remaining_time": "0:00:12", "throughput": "17566.34", "total_tokens": 2881376}
{"current_steps": 96, "total_steps": 100, "loss": 0.7535, "learning_rate": 4.865965629214819e-07, "epoch": 10.816901408450704, "percentage": 96.0, "elapsed_time": "0:02:49", "remaining_time": "0:00:07", "throughput": "17537.84", "total_tokens": 2965632}
{"current_steps": 99, "total_steps": 100, "loss": 0.7747, "learning_rate": 3.04586490452119e-08, "epoch": 11.154929577464788, "percentage": 99.0, "elapsed_time": "0:02:54", "remaining_time": "0:00:01", "throughput": "17540.81", "total_tokens": 3056208}
{"current_steps": 100, "total_steps": 100, "epoch": 11.267605633802816, "percentage": 100.0, "elapsed_time": "0:02:56", "remaining_time": "0:00:00", "throughput": "17526.92", "total_tokens": 3088432}

View File

@ -0,0 +1,307 @@
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 11.267605633802816,
"eval_steps": 500,
"global_step": 100,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.3380281690140845,
"grad_norm": 0.21601451933383942,
"learning_rate": 3e-05,
"loss": 1.2,
"num_input_tokens_seen": 97680,
"step": 3
},
{
"epoch": 0.676056338028169,
"grad_norm": 0.3155970871448517,
"learning_rate": 6e-05,
"loss": 1.1929,
"num_input_tokens_seen": 190736,
"step": 6
},
{
"epoch": 1.0140845070422535,
"grad_norm": 0.2989441156387329,
"learning_rate": 9e-05,
"loss": 1.2289,
"num_input_tokens_seen": 276880,
"step": 9
},
{
"epoch": 1.352112676056338,
"grad_norm": 0.42915046215057373,
"learning_rate": 9.987820251299122e-05,
"loss": 1.1455,
"num_input_tokens_seen": 369776,
"step": 12
},
{
"epoch": 1.6901408450704225,
"grad_norm": 0.2830324172973633,
"learning_rate": 9.924038765061042e-05,
"loss": 1.0487,
"num_input_tokens_seen": 461200,
"step": 15
},
{
"epoch": 2.028169014084507,
"grad_norm": 0.3625435531139374,
"learning_rate": 9.806308479691595e-05,
"loss": 1.0015,
"num_input_tokens_seen": 551424,
"step": 18
},
{
"epoch": 2.3661971830985915,
"grad_norm": 0.22050900757312775,
"learning_rate": 9.635919272833938e-05,
"loss": 0.9574,
"num_input_tokens_seen": 649520,
"step": 21
},
{
"epoch": 2.704225352112676,
"grad_norm": 0.23838457465171814,
"learning_rate": 9.414737964294636e-05,
"loss": 0.9658,
"num_input_tokens_seen": 742832,
"step": 24
},
{
"epoch": 3.0422535211267605,
"grad_norm": 0.31824183464050293,
"learning_rate": 9.145187862775209e-05,
"loss": 0.9311,
"num_input_tokens_seen": 827888,
"step": 27
},
{
"epoch": 3.380281690140845,
"grad_norm": 0.22274430096149445,
"learning_rate": 8.83022221559489e-05,
"loss": 0.9186,
"num_input_tokens_seen": 921920,
"step": 30
},
{
"epoch": 3.7183098591549295,
"grad_norm": 0.2516893446445465,
"learning_rate": 8.473291852294987e-05,
"loss": 0.9014,
"num_input_tokens_seen": 1010144,
"step": 33
},
{
"epoch": 4.056338028169014,
"grad_norm": 0.18256805837154388,
"learning_rate": 8.07830737662829e-05,
"loss": 0.8781,
"num_input_tokens_seen": 1108752,
"step": 36
},
{
"epoch": 4.394366197183099,
"grad_norm": 0.21118617057800293,
"learning_rate": 7.649596321166024e-05,
"loss": 0.882,
"num_input_tokens_seen": 1206192,
"step": 39
},
{
"epoch": 4.732394366197183,
"grad_norm": 0.2069379687309265,
"learning_rate": 7.191855733945387e-05,
"loss": 0.8807,
"num_input_tokens_seen": 1294512,
"step": 42
},
{
"epoch": 5.070422535211268,
"grad_norm": 0.21595001220703125,
"learning_rate": 6.710100716628344e-05,
"loss": 0.8613,
"num_input_tokens_seen": 1386112,
"step": 45
},
{
"epoch": 5.408450704225352,
"grad_norm": 0.16913726925849915,
"learning_rate": 6.209609477998338e-05,
"loss": 0.8721,
"num_input_tokens_seen": 1478224,
"step": 48
},
{
"epoch": 5.746478873239437,
"grad_norm": 0.23615054786205292,
"learning_rate": 5.695865504800327e-05,
"loss": 0.8438,
"num_input_tokens_seen": 1576032,
"step": 51
},
{
"epoch": 6.084507042253521,
"grad_norm": 0.2467828243970871,
"learning_rate": 5.174497483512506e-05,
"loss": 0.8324,
"num_input_tokens_seen": 1670720,
"step": 54
},
{
"epoch": 6.422535211267606,
"grad_norm": 0.21930496394634247,
"learning_rate": 4.6512176312793736e-05,
"loss": 0.8344,
"num_input_tokens_seen": 1761184,
"step": 57
},
{
"epoch": 6.76056338028169,
"grad_norm": 0.26676416397094727,
"learning_rate": 4.131759111665349e-05,
"loss": 0.7998,
"num_input_tokens_seen": 1852800,
"step": 60
},
{
"epoch": 7.098591549295775,
"grad_norm": 0.27332258224487305,
"learning_rate": 3.6218132209150045e-05,
"loss": 0.8086,
"num_input_tokens_seen": 1948992,
"step": 63
},
{
"epoch": 7.436619718309859,
"grad_norm": 0.22892576456069946,
"learning_rate": 3.12696703292044e-05,
"loss": 0.8231,
"num_input_tokens_seen": 2043936,
"step": 66
},
{
"epoch": 7.774647887323944,
"grad_norm": 0.27078768610954285,
"learning_rate": 2.6526421860705473e-05,
"loss": 0.7765,
"num_input_tokens_seen": 2135248,
"step": 69
},
{
"epoch": 8.112676056338028,
"grad_norm": 0.25509998202323914,
"learning_rate": 2.2040354826462668e-05,
"loss": 0.8021,
"num_input_tokens_seen": 2236064,
"step": 72
},
{
"epoch": 8.450704225352112,
"grad_norm": 0.2659051716327667,
"learning_rate": 1.7860619515673033e-05,
"loss": 0.7974,
"num_input_tokens_seen": 2324464,
"step": 75
},
{
"epoch": 8.788732394366198,
"grad_norm": 0.27531927824020386,
"learning_rate": 1.4033009983067452e-05,
"loss": 0.7794,
"num_input_tokens_seen": 2416208,
"step": 78
},
{
"epoch": 9.126760563380282,
"grad_norm": 0.3656441271305084,
"learning_rate": 1.0599462319663905e-05,
"loss": 0.7567,
"num_input_tokens_seen": 2506000,
"step": 81
},
{
"epoch": 9.464788732394366,
"grad_norm": 0.2913309335708618,
"learning_rate": 7.597595192178702e-06,
"loss": 0.8035,
"num_input_tokens_seen": 2600192,
"step": 84
},
{
"epoch": 9.80281690140845,
"grad_norm": 0.2591525912284851,
"learning_rate": 5.060297685041659e-06,
"loss": 0.7661,
"num_input_tokens_seen": 2690464,
"step": 87
},
{
"epoch": 10.140845070422536,
"grad_norm": 0.28467172384262085,
"learning_rate": 3.0153689607045845e-06,
"loss": 0.7733,
"num_input_tokens_seen": 2783632,
"step": 90
},
{
"epoch": 10.47887323943662,
"grad_norm": 0.2611902952194214,
"learning_rate": 1.4852136862001764e-06,
"loss": 0.7607,
"num_input_tokens_seen": 2881376,
"step": 93
},
{
"epoch": 10.816901408450704,
"grad_norm": 0.31149354577064514,
"learning_rate": 4.865965629214819e-07,
"loss": 0.7535,
"num_input_tokens_seen": 2965632,
"step": 96
},
{
"epoch": 11.154929577464788,
"grad_norm": 0.2638636827468872,
"learning_rate": 3.04586490452119e-08,
"loss": 0.7747,
"num_input_tokens_seen": 3056208,
"step": 99
},
{
"epoch": 11.267605633802816,
"num_input_tokens_seen": 3088432,
"step": 100,
"total_flos": 1.2280838762790912e+17,
"train_loss": 0.8875525349378586,
"train_runtime": 176.2337,
"train_samples_per_second": 63.552,
"train_steps_per_second": 0.567
}
],
"logging_steps": 3,
"max_steps": 100,
"num_input_tokens_seen": 3088432,
"num_train_epochs": 13,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.2280838762790912e+17,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 37 KiB