import numpy as np
import torch
import torch.nn as nn
import torch.functional as functional
from torch.optim import Adam
import mytext
from torchtext.data import get_tokenizer
Sequence Learning Using Long-Short Term Memory (LSTM) [draft] ✅
= get_tokenizer('basic_english') tokenizer
= mytext.imdb_reviews(tokenizer) (reviews, targets)
= mytext.build_vocab(reviews) voc
= mytext.build_tensor(reviews, voc)
reviews_tensor = reviews_tensor[:, :200]
inputs_tensor = torch.tensor(targets, dtype=torch.long) targets_tensor
from torch.utils.data import TensorDataset, DataLoader
= TensorDataset(inputs_tensor, targets_tensor)
dataset = DataLoader(dataset, batch_size=64, shuffle=True) dataloader
class SeqClassifierWithLSTM(nn.Module):
def __init__(self, voc_size, input_dim, state_dim, output_dim):
super().__init__()
self.embedding = nn.Embedding(voc_size, input_dim)
self.lstm = nn.LSTM(input_dim, state_dim, batch_first=True)
self.fc = nn.Linear(state_dim, output_dim)
def forward(self, tokens):
= self.embedding(tokens)
x = self.lstm(x) # (-1, L,
y, (s, c) # y: (-1, L, state_dim)
# s: (1, L, state_dim)
# c: (1, L, state_dim)
return self.fc(c[0])
= 5
epochs = 32
input_dim = 64
state_dim = SeqClassifierWithLSTM(len(voc), input_dim, state_dim, 2)
model = nn.CrossEntropyLoss()
loss = Adam(model.parameters())
optimizer
for epoch in range(epochs):
= []
losses for (x, target) in dataloader:
= model(x)
y = loss(y, target)
l
l.backward()
optimizer.step()
optimizer.zero_grad()
losses.append(l.item())= np.mean(losses)
l print("{}: loss={:.4f}".format(epoch, l))
0: loss=0.6943
1: loss=0.6850
2: loss=0.6682
3: loss=0.6208
4: loss=0.5540
#
# Evaluate
#
from torchmetrics import Accuracy
with torch.no_grad():
= 0
success = 0
total for x, target in dataloader:
= model(x)
y = y.argmax(axis=1)
pred += (pred == target).sum()
success += target.shape[0]
total print("Accuracy = {:.2f}".format(success/total))
Accuracy = 0.75