หนึ่งจะมีพารามิเตอร์ในรูปแบบ pytorch ไม่ใบไม้และอยู่ในกราฟการคำนวณได้อย่างไร

10

ฉันกำลังพยายามที่จะปรับปรุง / เปลี่ยนแปลงพารามิเตอร์ของแบบจำลองโครงข่ายประสาทเทียมแล้วมีการส่งต่อของโครงข่ายประสาทที่ได้รับการปรับปรุงให้อยู่ในกราฟการคำนวณ (ไม่ว่าเราจะทำการเปลี่ยนแปลง / อัปเดตจำนวนเท่าใดก็ตาม)

ฉันลองความคิดนี้ แต่เมื่อใดก็ตามที่ฉันทำมัน pytorch จะตั้งค่าเทนเซอร์ที่ปรับปรุงใหม่ของฉัน (ภายในตัวแบบ) เป็นใบไม้ซึ่งจะฆ่าการไล่ระดับสีของการไล่ระดับสีไปยังเครือข่ายที่ฉันต้องการรับการไล่ระดับสี มันฆ่าการไหลของการไล่ระดับสีเนื่องจากโหนดใบไม้ไม่ได้เป็นส่วนหนึ่งของกราฟการคำนวณในแบบที่ฉันต้องการให้มันเป็น (เพราะมันไม่ใช่ใบไม้จริง ๆ )

ฉันลองหลายอย่างแล้วแต่ดูเหมือนว่าจะไม่ทำงาน ฉันสร้างรหัสจำลองที่มีอยู่ในตัวซึ่งพิมพ์การไล่ระดับสีของเครือข่ายที่ฉันต้องการให้มีการไล่ระดับสี:

import torch
import torch.nn as nn

import copy

from collections import OrderedDict

# img = torch.randn([8,3,32,32])
# targets = torch.LongTensor([1, 2, 0, 6, 2, 9, 4, 9])
# img = torch.randn([1,3,32,32])
# targets = torch.LongTensor([1])
x = torch.randn(1)
target = 12.0*x**2

criterion = nn.CrossEntropyLoss()

#loss_net = nn.Sequential(OrderedDict([('conv0',nn.Conv2d(in_channels=3,out_channels=10,kernel_size=32))]))
loss_net = nn.Sequential(OrderedDict([('fc0', nn.Linear(in_features=1,out_features=1))]))

hidden = torch.randn(size=(1,1),requires_grad=True)
updater_net = nn.Sequential(OrderedDict([('fc0',nn.Linear(in_features=1,out_features=1))]))
print(f'updater_net.fc0.weight.is_leaf = {updater_net.fc0.weight.is_leaf}')
#
nb_updates = 2
for i in range(nb_updates):
    print(f'i = {i}')
    new_params = copy.deepcopy( loss_net.state_dict() )
    ## w^<t> := f(w^<t-1>,delta^<t-1>)
    for (name, w) in loss_net.named_parameters():
        print(f'name = {name}')
        print(w.size())
        hidden = updater_net(hidden).view(1)
        print(hidden.size())
        #delta = ((hidden**2)*w/2)
        delta = w + hidden
        wt = w + delta
        print(wt.size())
        new_params[name] = wt
        #del loss_net.fc0.weight
        #setattr(loss_net.fc0, 'weight', nn.Parameter( wt ))
        #setattr(loss_net.fc0, 'weight', wt)
        #loss_net.fc0.weight = wt
        #loss_net.fc0.weight = nn.Parameter( wt )
    ##
    loss_net.load_state_dict(new_params)
#
print()
print(f'updater_net.fc0.weight.is_leaf = {updater_net.fc0.weight.is_leaf}')
outputs = loss_net(x)
loss_val = 0.5*(target - outputs)**2
loss_val.backward()
print()
print(f'-- params that dont matter if they have gradients --')
print(f'loss_net.grad = {loss_net.fc0.weight.grad}')
print('-- params we want to have gradients --')
print(f'hidden.grad = {hidden.grad}')
print(f'updater_net.fc0.weight.grad = {updater_net.fc0.weight.grad}')
print(f'updater_net.fc0.bias.grad = {updater_net.fc0.bias.grad}')

หากใครรู้วิธีการทำเช่นนี้โปรดให้ ping ฉัน ... ฉันกำหนดจำนวนครั้งที่จะอัปเดตเป็น 2 เพราะการดำเนินการอัปเดตควรอยู่ในกราฟการคำนวณเป็นจำนวนเท่าใดก็ได้ ... ดังนั้นจึงต้องใช้ 2

โพสต์ที่เกี่ยวข้องอย่างยิ่ง:

ครอสโพสต์:

— Pinocchio
แหล่งที่มา

คุณลองหาข้อโต้แย้งbackwardใช่ไหม คือretain_graph=Trueและ / หรือcreate_graph=True?

— Szymon Maszke

3

ไม่ทำงานอย่างถูกต้องเพราะโมดูลพารามิเตอร์ที่มีชื่อถูกลบ

ดูเหมือนว่างานนี้:

import torch
import torch.nn as nn

from torchviz import make_dot

import copy

from collections import OrderedDict

# img = torch.randn([8,3,32,32])
# targets = torch.LongTensor([1, 2, 0, 6, 2, 9, 4, 9])
# img = torch.randn([1,3,32,32])
# targets = torch.LongTensor([1])
x = torch.randn(1)
target = 12.0*x**2

criterion = nn.CrossEntropyLoss()

#loss_net = nn.Sequential(OrderedDict([('conv0',nn.Conv2d(in_channels=3,out_channels=10,kernel_size=32))]))
loss_net = nn.Sequential(OrderedDict([('fc0', nn.Linear(in_features=1,out_features=1))]))

hidden = torch.randn(size=(1,1),requires_grad=True)
updater_net = nn.Sequential(OrderedDict([('fc0',nn.Linear(in_features=1,out_features=1))]))
print(f'updater_net.fc0.weight.is_leaf = {updater_net.fc0.weight.is_leaf}')
#
def del_attr(obj, names):
    if len(names) == 1:
        delattr(obj, names[0])
    else:
        del_attr(getattr(obj, names[0]), names[1:])
def set_attr(obj, names, val):
    if len(names) == 1:
        setattr(obj, names[0], val)
    else:
        set_attr(getattr(obj, names[0]), names[1:], val)

nb_updates = 2
for i in range(nb_updates):
    print(f'i = {i}')
    new_params = copy.deepcopy( loss_net.state_dict() )
    ## w^<t> := f(w^<t-1>,delta^<t-1>)
    for (name, w) in list(loss_net.named_parameters()):
        hidden = updater_net(hidden).view(1)
        #delta = ((hidden**2)*w/2)
        delta = w + hidden
        wt = w + delta
        del_attr(loss_net, name.split("."))
        set_attr(loss_net, name.split("."), wt)
    ##
#
print()
print(f'updater_net.fc0.weight.is_leaf = {updater_net.fc0.weight.is_leaf}')
print(f'loss_net.fc0.weight.is_leaf = {loss_net.fc0.weight.is_leaf}')
outputs = loss_net(x)
loss_val = 0.5*(target - outputs)**2
loss_val.backward()
print()
print(f'-- params that dont matter if they have gradients --')
print(f'loss_net.grad = {loss_net.fc0.weight.grad}')
print('-- params we want to have gradients --')
print(f'hidden.grad = {hidden.grad}') # None because this is not a leaf, it is overriden in the for loop above.
print(f'updater_net.fc0.weight.grad = {updater_net.fc0.weight.grad}')
print(f'updater_net.fc0.bias.grad = {updater_net.fc0.bias.grad}')
make_dot(loss_val)

เอาท์พุท:

updater_net.fc0.weight.is_leaf = True
i = 0
i = 1

updater_net.fc0.weight.is_leaf = True
loss_net.fc0.weight.is_leaf = False

-- params that dont matter if they have gradients --
loss_net.grad = None
-- params we want to have gradients --
hidden.grad = None
updater_net.fc0.weight.grad = tensor([[0.7152]])
updater_net.fc0.bias.grad = tensor([-7.4249])

รับทราบ: albanD อันยิ่งใหญ่จากทีม pytorch: https://discuss.pytorch.org/t/how-does-one-have-the-parameters-of-a-model-not-be-leafs/70076/9?u= Pinocchio

— Pinocchio
แหล่งที่มา

พวกนี้มันผิดอย่าใช้รหัสนี้มันไม่อนุญาตให้เผยแพร่การไล่ระดับสีสำหรับมากกว่า 1 ขั้นตอน ใช้สิ่งนี้แทน: github.com/facebookresearch/higher

— Pinocchio

นี้ใช้งานไม่ได้ ppl!

— Pinocchio

ห้องสมุดที่สูงขึ้นยังใช้ไม่ได้สำหรับฉัน

— Pinocchio

0

คุณควรพยายามที่จะรักษาเทนเซอร์เหมือนกันไม่ได้สร้างใหม่

ไปdataแอตทริบิวต์ของพวกเขาและตั้งค่าใหม่

for (name, w) in loss_net.named_parameters():
    ....
    w.data = wt.data

สิ่งนี้ใช้ได้กับฉันในคำถามนี้: จะกำหนดค่าใหม่ให้กับ pytorch Variable ได้อย่างไรโดยไม่ทำให้ backpropagation เสียหาย?

— Daniel Möller
แหล่งที่มา