{"payload":{"feedbackUrl":"https://github.com/orgs/community/discussions/53140","repo":{"id":176982014,"defaultBranch":"main","name":"Megatron-LM","ownerLogin":"NVIDIA","currentUserCanPush":false,"isFork":false,"isEmpty":false,"createdAt":"2019-03-21T16:15:52.000Z","ownerAvatar":"https://avatars.githubusercontent.com/u/1728152?v=4","public":true,"private":false,"isOrgOwned":true},"refInfo":{"name":"","listCacheKey":"v0:1715376377.0","currentOid":""},"activityList":{"items":[{"before":"7b361319d27b98431bbebe8b0f9fe4700a423817","after":"c4d12e26b2dc25a2eab7da92e2ac30338c0ed3de","ref":"refs/heads/main","pushedAt":"2024-05-31T05:02:49.000Z","pushType":"push","commitsCount":4,"pusher":{"login":"jaredcasper","name":"Jared Casper","path":"/jaredcasper","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/155158?s=80&v=4"},"commit":{"message":"Merge branch 'xuwenc/moe_gmm_infer_fix' into 'main'\n\nFix issue #109 Weird outputs when inferring on models with GroupedGEMM\n\nCloses #109\n\nSee merge request ADLR/megatron-lm!1501","shortMessageHtmlLink":"Merge branch 'xuwenc/moe_gmm_infer_fix' into 'main'"}},{"before":"dace0330ac8b5bbc3a21ff14f7ef7544abb16334","after":"a645f89671be698612170539f2089dc15db66a80","ref":"refs/heads/core_r0.7.0","pushedAt":"2024-05-31T05:02:32.000Z","pushType":"push","commitsCount":12,"pusher":{"login":"jaredcasper","name":"Jared Casper","path":"/jaredcasper","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/155158?s=80&v=4"},"commit":{"message":"Merge branch 'cherry-pick-c4d12e26' into 'core_r0.7.0'\n\nMerge branch 'xuwenc/moe_gmm_infer_fix' into 'main'\n\nSee merge request ADLR/megatron-lm!1519","shortMessageHtmlLink":"Merge branch 'cherry-pick-c4d12e26' into 'core_r0.7.0'"}},{"before":"a5534c8f3e2c49ad8ce486f5cba3408e14f5fcc2","after":"7b361319d27b98431bbebe8b0f9fe4700a423817","ref":"refs/heads/master","pushedAt":"2024-05-30T23:25:43.000Z","pushType":"push","commitsCount":8,"pusher":{"login":"jaredcasper","name":"Jared Casper","path":"/jaredcasper","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/155158?s=80&v=4"},"commit":{"message":"Merge branch 'revert-fdee903f' into 'main'\n\nRevert \"Merge branch 'gg_guard' into 'main'\"\n\nSee merge request ADLR/megatron-lm!1515","shortMessageHtmlLink":"Merge branch 'revert-fdee903f' into 'main'"}},{"before":"a5534c8f3e2c49ad8ce486f5cba3408e14f5fcc2","after":"7b361319d27b98431bbebe8b0f9fe4700a423817","ref":"refs/heads/main","pushedAt":"2024-05-30T23:25:36.000Z","pushType":"push","commitsCount":8,"pusher":{"login":"jaredcasper","name":"Jared Casper","path":"/jaredcasper","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/155158?s=80&v=4"},"commit":{"message":"Merge branch 'revert-fdee903f' into 'main'\n\nRevert \"Merge branch 'gg_guard' into 'main'\"\n\nSee merge request ADLR/megatron-lm!1515","shortMessageHtmlLink":"Merge branch 'revert-fdee903f' into 'main'"}},{"before":"0650d8335d45162845398a97880374b81c4d84b1","after":"a5534c8f3e2c49ad8ce486f5cba3408e14f5fcc2","ref":"refs/heads/master","pushedAt":"2024-05-28T16:45:20.000Z","pushType":"push","commitsCount":17,"pusher":{"login":"jaredcasper","name":"Jared Casper","path":"/jaredcasper","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/155158?s=80&v=4"},"commit":{"message":"Merge branch 'aanoosheh/fix/layernorm-grad' into 'main'\n\nCheck if layernorm gradients even requires grad to avoid AttributeError\n\nSee merge request ADLR/megatron-lm!1476","shortMessageHtmlLink":"Merge branch 'aanoosheh/fix/layernorm-grad' into 'main'"}},{"before":"0650d8335d45162845398a97880374b81c4d84b1","after":"a5534c8f3e2c49ad8ce486f5cba3408e14f5fcc2","ref":"refs/heads/main","pushedAt":"2024-05-28T16:45:14.000Z","pushType":"push","commitsCount":17,"pusher":{"login":"jaredcasper","name":"Jared Casper","path":"/jaredcasper","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/155158?s=80&v=4"},"commit":{"message":"Merge branch 'aanoosheh/fix/layernorm-grad' into 'main'\n\nCheck if layernorm gradients even requires grad to avoid AttributeError\n\nSee merge request ADLR/megatron-lm!1476","shortMessageHtmlLink":"Merge branch 'aanoosheh/fix/layernorm-grad' into 'main'"}},{"before":"f5f3be5dc5aa7dc84f190c1fefacb096d6c224f8","after":"0650d8335d45162845398a97880374b81c4d84b1","ref":"refs/heads/master","pushedAt":"2024-05-23T18:37:05.000Z","pushType":"push","commitsCount":42,"pusher":{"login":"jaredcasper","name":"Jared Casper","path":"/jaredcasper","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/155158?s=80&v=4"},"commit":{"message":"Merge branch 'lmcafee/retro-mcore-0.7' into 'main'\n\nRetro bugfixes for Mcore 0.7\n\nSee merge request ADLR/megatron-lm!1460","shortMessageHtmlLink":"Merge branch 'lmcafee/retro-mcore-0.7' into 'main'"}},{"before":"c3677e09aa4e2eec37048307bd795928b8f8324a","after":"0650d8335d45162845398a97880374b81c4d84b1","ref":"refs/heads/main","pushedAt":"2024-05-23T18:36:58.000Z","pushType":"push","commitsCount":36,"pusher":{"login":"jaredcasper","name":"Jared Casper","path":"/jaredcasper","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/155158?s=80&v=4"},"commit":{"message":"Merge branch 'lmcafee/retro-mcore-0.7' into 'main'\n\nRetro bugfixes for Mcore 0.7\n\nSee merge request ADLR/megatron-lm!1460","shortMessageHtmlLink":"Merge branch 'lmcafee/retro-mcore-0.7' into 'main'"}},{"before":"c1e631517c70a3ba61a2d1a678c330fb13557f99","after":"dace0330ac8b5bbc3a21ff14f7ef7544abb16334","ref":"refs/heads/core_r0.7.0","pushedAt":"2024-05-23T17:51:42.000Z","pushType":"push","commitsCount":5,"pusher":{"login":"ericharper","name":"Eric Harper","path":"/ericharper","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/11999610?s=80&v=4"},"commit":{"message":"Merge branch 'cherry-pick-858f22e2' into 'core_r0.7.0'\n\nMerge branch '158-fix-the-typo-in-topk_with_capacity' into 'main'\n\nSee merge request ADLR/megatron-lm!1450","shortMessageHtmlLink":"Merge branch 'cherry-pick-858f22e2' into 'core_r0.7.0'"}},{"before":"ac8a7e586a9d0cc41a49ab0c63ea5d1877f69025","after":"c3677e09aa4e2eec37048307bd795928b8f8324a","ref":"refs/heads/main","pushedAt":"2024-05-14T16:01:15.000Z","pushType":"push","commitsCount":2,"pusher":{"login":"jaredcasper","name":"Jared Casper","path":"/jaredcasper","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/155158?s=80&v=4"},"commit":{"message":"Merge branch 'dnarayanan/workaround_for_cublas_error' into 'main'\n\nWorkaround for TE bug where it can pick the wrong cuBLAS algorithm\n\nSee merge request ADLR/megatron-lm!1436","shortMessageHtmlLink":"Merge branch 'dnarayanan/workaround_for_cublas_error' into 'main'"}},{"before":"f5f3be5dc5aa7dc84f190c1fefacb096d6c224f8","after":"ac8a7e586a9d0cc41a49ab0c63ea5d1877f69025","ref":"refs/heads/main","pushedAt":"2024-05-13T22:05:06.000Z","pushType":"push","commitsCount":4,"pusher":{"login":"jaredcasper","name":"Jared Casper","path":"/jaredcasper","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/155158?s=80&v=4"},"commit":{"message":"Merge branch 'mblaz/log-debug-verbosity' into 'main'\n\nDecrease fully parallel save/load logging verbosity\n\nSee merge request ADLR/megatron-lm!1419","shortMessageHtmlLink":"Merge branch 'mblaz/log-debug-verbosity' into 'main'"}},{"before":"b7b98ba28db132f064b4cef3f8e0ba598dc3404b","after":"c1e631517c70a3ba61a2d1a678c330fb13557f99","ref":"refs/heads/core_r0.7.0","pushedAt":"2024-05-10T22:40:02.000Z","pushType":"push","commitsCount":2,"pusher":{"login":"jaredcasper","name":"Jared Casper","path":"/jaredcasper","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/155158?s=80&v=4"},"commit":{"message":"Merge branch 'mike/1427_into_0.7' into 'core_r0.7.0'\n\nMerge !1427 into branch core_r0.7.0\n\nSee merge request ADLR/megatron-lm!1434","shortMessageHtmlLink":"Merge branch 'mike/1427_into_0.7' into 'core_r0.7.0'"}},{"before":"db3a3f79d1cda60ea4b3db0ceffcf20c5760e11d","after":"f5f3be5dc5aa7dc84f190c1fefacb096d6c224f8","ref":"refs/heads/master","pushedAt":"2024-05-10T22:39:43.000Z","pushType":"push","commitsCount":30,"pusher":{"login":"jaredcasper","name":"Jared Casper","path":"/jaredcasper","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/155158?s=80&v=4"},"commit":{"message":"Merge branch 'mike/token_counting_behind_argument' into 'main'\n\nPut Per-Token-Cross-Entropy calculation behind an argument\n\nSee merge request ADLR/megatron-lm!1427","shortMessageHtmlLink":"Merge branch 'mike/token_counting_behind_argument' into 'main'"}},{"before":"db3a3f79d1cda60ea4b3db0ceffcf20c5760e11d","after":"f5f3be5dc5aa7dc84f190c1fefacb096d6c224f8","ref":"refs/heads/main","pushedAt":"2024-05-10T22:39:36.000Z","pushType":"push","commitsCount":30,"pusher":{"login":"jaredcasper","name":"Jared Casper","path":"/jaredcasper","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/155158?s=80&v=4"},"commit":{"message":"Merge branch 'mike/token_counting_behind_argument' into 'main'\n\nPut Per-Token-Cross-Entropy calculation behind an argument\n\nSee merge request ADLR/megatron-lm!1427","shortMessageHtmlLink":"Merge branch 'mike/token_counting_behind_argument' into 'main'"}},{"before":null,"after":"b7b98ba28db132f064b4cef3f8e0ba598dc3404b","ref":"refs/heads/core_r0.7.0","pushedAt":"2024-05-10T21:26:17.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"ericharper","name":"Eric Harper","path":"/ericharper","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/11999610?s=80&v=4"},"commit":{"message":"update version\n\nSigned-off-by: eharper ","shortMessageHtmlLink":"update version"}},{"before":"ebb1484327af4eb73ee923bf736e44db72aa1831","after":null,"ref":"refs/tags/InstructRetro","pushedAt":"2024-05-03T19:33:45.000Z","pushType":"branch_deletion","commitsCount":0,"pusher":{"login":"maanug-nv","name":"Maanu Grover","path":"/maanug-nv","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/109391026?s=80&v=4"}},{"before":"0d983e64afcd84cab83124e0b7ca89a3d8ec9655","after":"db3a3f79d1cda60ea4b3db0ceffcf20c5760e11d","ref":"refs/heads/master","pushedAt":"2024-05-03T00:23:11.000Z","pushType":"push","commitsCount":111,"pusher":{"login":"jaredcasper","name":"Jared Casper","path":"/jaredcasper","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/155158?s=80&v=4"},"commit":{"message":"Merge branch 'boxiangw/mlperf-option-add-one-extra-token' into 'main'\n\n[MLPerf] GPT dataset features: drop last partial validation sequence, drop extra token, return sample with 1s loss mask, mock dataset testing\n\nSee merge request ADLR/megatron-lm!1223","shortMessageHtmlLink":"Merge branch 'boxiangw/mlperf-option-add-one-extra-token' into 'main'"}},{"before":"0d983e64afcd84cab83124e0b7ca89a3d8ec9655","after":"db3a3f79d1cda60ea4b3db0ceffcf20c5760e11d","ref":"refs/heads/main","pushedAt":"2024-05-03T00:23:05.000Z","pushType":"push","commitsCount":111,"pusher":{"login":"jaredcasper","name":"Jared Casper","path":"/jaredcasper","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/155158?s=80&v=4"},"commit":{"message":"Merge branch 'boxiangw/mlperf-option-add-one-extra-token' into 'main'\n\n[MLPerf] GPT dataset features: drop last partial validation sequence, drop extra token, return sample with 1s loss mask, mock dataset testing\n\nSee merge request ADLR/megatron-lm!1223","shortMessageHtmlLink":"Merge branch 'boxiangw/mlperf-option-add-one-extra-token' into 'main'"}},{"before":"432683220e5b0eddce2ec0a251c3a0b16cdbff61","after":"3c0f3ef38c645043be18e51ce2698b37eaceb128","ref":"refs/heads/core_r0.7.0.beta","pushedAt":"2024-05-01T12:31:27.000Z","pushType":"push","commitsCount":2,"pusher":{"login":"maanug-nv","name":"Maanu Grover","path":"/maanug-nv","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/109391026?s=80&v=4"},"commit":{"message":"Merge branch 'rachitg/ag' into 'core_r0.7.0.beta'\n\nallow disabling qkv or fc1 overlap\n\nSee merge request ADLR/megatron-lm!1398","shortMessageHtmlLink":"Merge branch 'rachitg/ag' into 'core_r0.7.0.beta'"}},{"before":"85a3a1599231fe07012109dc38fdde0c30061d8e","after":"0d983e64afcd84cab83124e0b7ca89a3d8ec9655","ref":"refs/heads/master","pushedAt":"2024-04-26T17:56:44.000Z","pushType":"push","commitsCount":94,"pusher":{"login":"jaredcasper","name":"Jared Casper","path":"/jaredcasper","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/155158?s=80&v=4"},"commit":{"message":"Merge branch 'dnarayanan/check_param_hashes' into 'main'\n\nCompute hashes on each rank, and compare across DP replicas\n\nSee merge request ADLR/megatron-lm!1368","shortMessageHtmlLink":"Merge branch 'dnarayanan/check_param_hashes' into 'main'"}},{"before":"ccfeda47cb5ca10ee3c4efd9b78c6bb15c2cd3d2","after":"0d983e64afcd84cab83124e0b7ca89a3d8ec9655","ref":"refs/heads/main","pushedAt":"2024-04-26T17:56:30.000Z","pushType":"push","commitsCount":10,"pusher":{"login":"jaredcasper","name":"Jared Casper","path":"/jaredcasper","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/155158?s=80&v=4"},"commit":{"message":"Merge branch 'dnarayanan/check_param_hashes' into 'main'\n\nCompute hashes on each rank, and compare across DP replicas\n\nSee merge request ADLR/megatron-lm!1368","shortMessageHtmlLink":"Merge branch 'dnarayanan/check_param_hashes' into 'main'"}},{"before":"fa5fed80fdb4120aa7d3ec8bd68ab4baec5c4fdf","after":"432683220e5b0eddce2ec0a251c3a0b16cdbff61","ref":"refs/heads/core_r0.7.0.beta","pushedAt":"2024-04-24T22:27:58.000Z","pushType":"push","commitsCount":2,"pusher":{"login":"maanug-nv","name":"Maanu Grover","path":"/maanug-nv","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/109391026?s=80&v=4"},"commit":{"message":"Merge branch 'jbaczek/extend_transformer_block_spec' into 'core_r0.7.0.beta'\n\nAdd layer norm to TransformerBlockSubmodules\n\nSee merge request ADLR/megatron-lm!1350","shortMessageHtmlLink":"Merge branch 'jbaczek/extend_transformer_block_spec' into 'core_r0.7.…"}},{"before":"d9abf064f82f3f768b4f75a30704f01374370328","after":"ccfeda47cb5ca10ee3c4efd9b78c6bb15c2cd3d2","ref":"refs/heads/main","pushedAt":"2024-04-18T22:09:08.000Z","pushType":"push","commitsCount":2,"pusher":{"login":"jaredcasper","name":"Jared Casper","path":"/jaredcasper","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/155158?s=80&v=4"},"commit":{"message":"Merge branch 'fix_overlap_param_gather' into 'main'\n\nfix EP distopt with overlap param gather\n\nSee merge request ADLR/megatron-lm!1345","shortMessageHtmlLink":"Merge branch 'fix_overlap_param_gather' into 'main'"}},{"before":"b26d3e36a7d65e1562f6e6d2e6c18a24624f27e5","after":"cac60ce4c8203e2ed18912be63d4dd577b46830d","ref":"refs/heads/core_r0.6.0","pushedAt":"2024-04-18T22:08:46.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"jaredcasper","name":"Jared Casper","path":"/jaredcasper","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/155158?s=80&v=4"},"commit":{"message":"Merge branch 'fix_overlap_param_gather' into 'main'\n\nfix EP distopt with overlap param gather\n\nSee merge request ADLR/megatron-lm!1345\n\n(cherry picked from commit ccfeda47cb5ca10ee3c4efd9b78c6bb15c2cd3d2)\n\nac93d847 fix EP distopt with overlap param gather\nbb7b4307 change golden metrics\n0ff731ff Minor fix to thrown value error","shortMessageHtmlLink":"Merge branch 'fix_overlap_param_gather' into 'main'"}},{"before":"299f96ffe61a4bae9044a2082570b19b94d13335","after":"b26d3e36a7d65e1562f6e6d2e6c18a24624f27e5","ref":"refs/heads/core_r0.6.0","pushedAt":"2024-04-18T19:28:34.000Z","pushType":"push","commitsCount":2,"pusher":{"login":"jaredcasper","name":"Jared Casper","path":"/jaredcasper","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/155158?s=80&v=4"},"commit":{"message":"Merge branch 'cherry-pick-ad354e08' into 'core_r0.6.0'\n\nCherry-pick pip package fix into core_r0.6.0\n\nSee merge request ADLR/megatron-lm!1361","shortMessageHtmlLink":"Merge branch 'cherry-pick-ad354e08' into 'core_r0.6.0'"}},{"before":"2196398f5252ead6f036b06d45f7acb89b1308da","after":"d9abf064f82f3f768b4f75a30704f01374370328","ref":"refs/heads/main","pushedAt":"2024-04-18T19:28:03.000Z","pushType":"push","commitsCount":16,"pusher":{"login":"jaredcasper","name":"Jared Casper","path":"/jaredcasper","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/155158?s=80&v=4"},"commit":{"message":"Merge branch 'squared_relu_fusion' into 'main'\n\nAdded fusion for squared relu\n\nSee merge request ADLR/megatron-lm!1363","shortMessageHtmlLink":"Merge branch 'squared_relu_fusion' into 'main'"}},{"before":"67bfe999ea3edfc64022f144a314ae7a48adcae5","after":"fa5fed80fdb4120aa7d3ec8bd68ab4baec5c4fdf","ref":"refs/heads/core_r0.7.0.beta","pushedAt":"2024-04-18T00:12:30.000Z","pushType":"push","commitsCount":2,"pusher":{"login":"maanug-nv","name":"Maanu Grover","path":"/maanug-nv","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/109391026?s=80&v=4"},"commit":{"message":"Merge branch 'amax_group_fix' into 'core_r0.7.0.beta'\n\nChange AMAX reduction group to use TP group\n\nSee merge request ADLR/megatron-lm!1362","shortMessageHtmlLink":"Merge branch 'amax_group_fix' into 'core_r0.7.0.beta'"}},{"before":null,"after":"67bfe999ea3edfc64022f144a314ae7a48adcae5","ref":"refs/heads/core_r0.7.0.beta","pushedAt":"2024-04-16T23:40:48.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"maanug-nv","name":"Maanu Grover","path":"/maanug-nv","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/109391026?s=80&v=4"},"commit":{"message":"Update package information","shortMessageHtmlLink":"Update package information"}},{"before":"caf2007e080d65dd7488be7bd409b366e225ab5f","after":"2196398f5252ead6f036b06d45f7acb89b1308da","ref":"refs/heads/main","pushedAt":"2024-04-12T23:05:09.000Z","pushType":"push","commitsCount":6,"pusher":{"login":"jaredcasper","name":"Jared Casper","path":"/jaredcasper","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/155158?s=80&v=4"},"commit":{"message":"Merge branch 'rachitg/fp8dpa' into 'main'\n\nRachitg/fp8dpa\n\nSee merge request ADLR/megatron-lm!1332","shortMessageHtmlLink":"Merge branch 'rachitg/fp8dpa' into 'main'"}},{"before":"fbb375d4b5e88ce52f5f7125053068caff47f93f","after":"caf2007e080d65dd7488be7bd409b366e225ab5f","ref":"refs/heads/main","pushedAt":"2024-04-12T22:07:38.000Z","pushType":"push","commitsCount":2,"pusher":{"login":"jaredcasper","name":"Jared Casper","path":"/jaredcasper","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/155158?s=80&v=4"},"commit":{"message":"Merge branch 'maanug/gen-test-scripts' into 'main'\n\nLocal JET test script generator\n\nSee merge request ADLR/megatron-lm!1315","shortMessageHtmlLink":"Merge branch 'maanug/gen-test-scripts' into 'main'"}}],"hasNextPage":true,"hasPreviousPage":false,"activityType":"all","actor":null,"timePeriod":"all","sort":"DESC","perPage":30,"cursor":"djE6ks8AAAAEWKebvwA","startCursor":null,"endCursor":null}},"title":"Activity · NVIDIA/Megatron-LM"}