cicdatopea
commited on
Update README.md
Browse files
README.md
CHANGED
@@ -101,35 +101,26 @@ for n, m in block.named_modules():
|
|
101 |
if isinstance(m, (torch.nn.Linear, transformers.modeling_utils.Conv1D)):
|
102 |
if "experts" in n and ("shared_experts" not in n) and int(n.split('.')[-2])<63 and "down_proj" not in n :
|
103 |
device ="cuda:1"
|
104 |
-
output_device = "cuda:1"
|
105 |
elif "experts" in n and ("shared_experts" not in n) and "down_proj" in n and int(n.split('.')[-2])<63:
|
106 |
device = "cuda:1"
|
107 |
-
output_device = "cuda:0"
|
108 |
elif "experts" in n and ("shared_experts" not in n) and int(n.split('.')[-2]) >= 63 and int(n.split('.')[-2]) < 128 and "down_proj" not in n:
|
109 |
device = "cuda:2"
|
110 |
-
output_device = "cuda:2"
|
111 |
elif "experts" in n and ("shared_experts" not in n) and "down_proj" in n and int(n.split('.')[-2]) >= 63 and int(n.split('.')[-2]) < 128:
|
112 |
device = "cuda:2"
|
113 |
-
output_device = "cuda:0"
|
114 |
elif "experts" in n and ("shared_experts" not in n) and int(n.split('.')[-2]) >= 128 and int(
|
115 |
n.split('.')[-2]) < 192 and "down_proj" not in n:
|
116 |
device = "cuda:3"
|
117 |
-
output_device = "cuda:3"
|
118 |
elif "experts" in n and ("shared_experts" not in n) and "down_proj" in n and int(
|
119 |
n.split('.')[-2]) >= 128 and int(n.split('.')[-2]) < 192:
|
120 |
device = "cuda:3"
|
121 |
-
output_device = "cuda:0"
|
122 |
elif "experts" in n and ("shared_experts" not in n) and "down_proj" not in n and int(
|
123 |
n.split('.')[-2]) >= 192:
|
124 |
device = "cuda:4"
|
125 |
-
output_device = "cuda:4"
|
126 |
elif "experts" in n and ("shared_experts" not in n) and "down_proj" in n and int(
|
127 |
n.split('.')[-2]) >= 192:
|
128 |
device = "cuda:4"
|
129 |
-
output_device = "cuda:0"
|
130 |
else:
|
131 |
device = "cuda:0"
|
132 |
-
output_device = "cuda:0"
|
133 |
n = n[2:]
|
134 |
device_map.update({n: device})
|
135 |
|
|
|
101 |
if isinstance(m, (torch.nn.Linear, transformers.modeling_utils.Conv1D)):
|
102 |
if "experts" in n and ("shared_experts" not in n) and int(n.split('.')[-2])<63 and "down_proj" not in n :
|
103 |
device ="cuda:1"
|
|
|
104 |
elif "experts" in n and ("shared_experts" not in n) and "down_proj" in n and int(n.split('.')[-2])<63:
|
105 |
device = "cuda:1"
|
|
|
106 |
elif "experts" in n and ("shared_experts" not in n) and int(n.split('.')[-2]) >= 63 and int(n.split('.')[-2]) < 128 and "down_proj" not in n:
|
107 |
device = "cuda:2"
|
|
|
108 |
elif "experts" in n and ("shared_experts" not in n) and "down_proj" in n and int(n.split('.')[-2]) >= 63 and int(n.split('.')[-2]) < 128:
|
109 |
device = "cuda:2"
|
|
|
110 |
elif "experts" in n and ("shared_experts" not in n) and int(n.split('.')[-2]) >= 128 and int(
|
111 |
n.split('.')[-2]) < 192 and "down_proj" not in n:
|
112 |
device = "cuda:3"
|
|
|
113 |
elif "experts" in n and ("shared_experts" not in n) and "down_proj" in n and int(
|
114 |
n.split('.')[-2]) >= 128 and int(n.split('.')[-2]) < 192:
|
115 |
device = "cuda:3"
|
|
|
116 |
elif "experts" in n and ("shared_experts" not in n) and "down_proj" not in n and int(
|
117 |
n.split('.')[-2]) >= 192:
|
118 |
device = "cuda:4"
|
|
|
119 |
elif "experts" in n and ("shared_experts" not in n) and "down_proj" in n and int(
|
120 |
n.split('.')[-2]) >= 192:
|
121 |
device = "cuda:4"
|
|
|
122 |
else:
|
123 |
device = "cuda:0"
|
|
|
124 |
n = n[2:]
|
125 |
device_map.update({n: device})
|
126 |
|