Skip to content
Snippets Groups Projects
Commit 6d6f4480 authored by TheRiPtide's avatar TheRiPtide
Browse files

chore: rebase conflict when pushing

parents f28b078c fb8e822e
No related branches found
No related tags found
1 merge request!23feat: deep-leaning poly(A) classifier
Pipeline #13779 failed
......@@ -22,17 +22,22 @@
},
{
"cell_type": "code",
<<<<<<< HEAD
<<<<<<< HEAD
"execution_count": 80,
=======
"execution_count": null,
>>>>>>> d2ef840 (chore: started cnn notebook)
=======
"execution_count": 80,
>>>>>>> fb8e822ed92fba85e584305fcb18bdf45ad601df
"outputs": [],
"source": [
"# importing the libraries\n",
"import pandas as pd\n",
"import numpy as np\n",
<<<<<<< HEAD
<<<<<<< HEAD
<<<<<<< HEAD
"import matplotlib.pyplot as plt\n",
=======
......@@ -40,6 +45,9 @@
=======
"import matplotlib.pyplot as plt\n",
>>>>>>> 93ea318 (chore: added training function for cnn)
=======
"import matplotlib.pyplot as plt\n",
>>>>>>> fb8e822ed92fba85e584305fcb18bdf45ad601df
"\n",
"# for creating validation set\n",
"from sklearn.model_selection import train_test_split\n",
......@@ -51,6 +59,7 @@
"# PyTorch libraries and modules\n",
"import torch\n",
"from torch.autograd import Variable\n",
<<<<<<< HEAD
<<<<<<< HEAD
"from torch.nn import Linear, ReLU, CrossEntropyLoss, Sequential, MaxPool1d, Module, Softmax, BatchNorm1d, Dropout, Conv1d\n",
"from torch.optim import Adam\n",
......@@ -58,6 +67,10 @@
"from torch.nn import Linear, ReLU, CrossEntropyLoss, Sequential, Conv2d, MaxPool2d, Module, Softmax, BatchNorm2d, Dropout\n",
"from torch.optim import Adam, SGD\n",
>>>>>>> d2ef840 (chore: started cnn notebook)
=======
"from torch.nn import Linear, ReLU, CrossEntropyLoss, Sequential, MaxPool1d, Module, Softmax, BatchNorm1d, Dropout, Conv1d\n",
"from torch.optim import Adam\n",
>>>>>>> fb8e822ed92fba85e584305fcb18bdf45ad601df
"\n",
"\n",
"# adding the nn\n",
......@@ -67,6 +80,9 @@
"\n",
" self.cnn_layers = Sequential(\n",
<<<<<<< HEAD
<<<<<<< HEAD
=======
>>>>>>> fb8e822ed92fba85e584305fcb18bdf45ad601df
" # Defining a 1D convolution layer\n",
" Conv1d(1, 4, kernel_size=3, stride=1, padding=1),\n",
" BatchNorm1d(4),\n",
......@@ -81,6 +97,7 @@
"\n",
" self.linear_layers = Sequential(\n",
" Linear(4 * 50, 10)\n",
<<<<<<< HEAD
=======
" # Defining a 2D convolution layer\n",
" Conv2d(1, 4, kernel_size=3, stride=1, padding=1),\n",
......@@ -97,6 +114,8 @@
" self.linear_layers = Sequential(\n",
" Linear(4 * 7 * 7, 10)\n",
>>>>>>> d2ef840 (chore: started cnn notebook)
=======
>>>>>>> fb8e822ed92fba85e584305fcb18bdf45ad601df
" )\n",
"\n",
" # Defining the forward pass\n",
......@@ -106,8 +125,11 @@
" x = self.linear_layers(x)\n",
<<<<<<< HEAD
<<<<<<< HEAD
<<<<<<< HEAD
=======
>>>>>>> 93ea318 (chore: added training function for cnn)
=======
>>>>>>> fb8e822ed92fba85e584305fcb18bdf45ad601df
" return x\n",
"\n",
"# defining training function\n",
......@@ -143,11 +165,14 @@
"\n",
" return loss_train, loss_val"
<<<<<<< HEAD
<<<<<<< HEAD
=======
" return x"
>>>>>>> d2ef840 (chore: started cnn notebook)
=======
>>>>>>> 93ea318 (chore: added training function for cnn)
=======
>>>>>>> fb8e822ed92fba85e584305fcb18bdf45ad601df
],
"metadata": {
"collapsed": false,
......@@ -171,6 +196,9 @@
{
"cell_type": "code",
<<<<<<< HEAD
<<<<<<< HEAD
=======
>>>>>>> fb8e822ed92fba85e584305fcb18bdf45ad601df
"execution_count": 81,
"outputs": [
{
......@@ -250,6 +278,7 @@
"\n",
"val_x = torch.from_numpy(val_x)\n",
"val_y = torch.from_numpy(val_y)"
<<<<<<< HEAD
=======
"execution_count": null,
"outputs": [],
......@@ -264,6 +293,8 @@
"\n",
"# TODO: reshape shape from [n, l] to [n, 1, l]\n"
>>>>>>> d2ef840 (chore: started cnn notebook)
=======
>>>>>>> fb8e822ed92fba85e584305fcb18bdf45ad601df
],
"metadata": {
"collapsed": false,
......@@ -287,6 +318,9 @@
{
"cell_type": "code",
<<<<<<< HEAD
<<<<<<< HEAD
=======
>>>>>>> fb8e822ed92fba85e584305fcb18bdf45ad601df
"execution_count": 83,
"outputs": [
{
......@@ -307,6 +341,7 @@
"# defining the loss function\n",
"criterion = CrossEntropyLoss()\n",
"\n",
<<<<<<< HEAD
=======
"execution_count": null,
"outputs": [],
......@@ -324,14 +359,19 @@
=======
"\n",
>>>>>>> 93ea318 (chore: added training function for cnn)
=======
>>>>>>> fb8e822ed92fba85e584305fcb18bdf45ad601df
"# checking if GPU is available\n",
"if torch.cuda.is_available():\n",
" model = model.cuda()\n",
" criterion = criterion.cuda()\n",
<<<<<<< HEAD
<<<<<<< HEAD
<<<<<<< HEAD
=======
>>>>>>> 93ea318 (chore: added training function for cnn)
=======
>>>>>>> fb8e822ed92fba85e584305fcb18bdf45ad601df
"\n",
"# defining the number of epochs\n",
"n_epochs = 25\n",
......@@ -344,6 +384,9 @@
"\n",
"# training the model\n",
<<<<<<< HEAD
<<<<<<< HEAD
=======
>>>>>>> fb8e822ed92fba85e584305fcb18bdf45ad601df
"for epoch in tqdm(range(n_epochs)):\n",
" train_loss, val_loss = train()\n",
" train_losses.append(train_loss)\n",
......@@ -460,6 +503,7 @@
"outputs": [],
"source": [
"torch.save(model.state_dict(), '../models/internal_priming.pth')"
<<<<<<< HEAD
=======
"\n"
>>>>>>> d2ef840 (chore: started cnn notebook)
......@@ -475,6 +519,8 @@
"plt.legend()\n",
"plt.show()"
>>>>>>> 93ea318 (chore: added training function for cnn)
=======
>>>>>>> fb8e822ed92fba85e584305fcb18bdf45ad601df
],
"metadata": {
"collapsed": false,
......
<<<<<<< HEAD
"""Module for classifying polyA tails as internal or real."""
=======
>>>>>>> fb8e822ed92fba85e584305fcb18bdf45ad601df
import torch
from torch.nn import Linear, ReLU, Sequential, MaxPool1d, Module, BatchNorm1d, Conv1d
import numpy as np
......@@ -7,7 +10,10 @@ from typing import Union
class Net(Module):
<<<<<<< HEAD
"""Two layer 1D convolutional neural net"""
=======
>>>>>>> fb8e822ed92fba85e584305fcb18bdf45ad601df
def __init__(self):
......@@ -30,8 +36,13 @@ class Net(Module):
Linear(4 * 50, 10)
)
<<<<<<< HEAD
def forward(self, x):
"""Forward pass function."""
=======
# Defining the forward pass
def forward(self, x):
>>>>>>> fb8e822ed92fba85e584305fcb18bdf45ad601df
x = self.cnn_layers(x)
x = x.view(x.size(0), -1)
......@@ -40,16 +51,23 @@ class Net(Module):
class PolyAClassifier:
<<<<<<< HEAD
"""Classifier object using the state-dict of a pretrained pytorch model"""
=======
>>>>>>> fb8e822ed92fba85e584305fcb18bdf45ad601df
enum = {
'A': 0.0,
'U': 1 / 3,
<<<<<<< HEAD
'T': 1 / 3,
=======
>>>>>>> fb8e822ed92fba85e584305fcb18bdf45ad601df
'G': 2 / 3,
'C': 1.0
}
<<<<<<< HEAD
def __init__(self, model: Module = Net, state_dict_path: str = './models/internal_priming.pth'):
"""Returns a stateless classifier with the model loaded.
......@@ -57,6 +75,9 @@ class PolyAClassifier:
model: An object subclassing the pytorch Module
state_dict_path: A path to a saved state-dict of said object at a trained state.
"""
=======
def __init__(self, model=Net, state_dict_path='./models/internal_priming.pth'):
>>>>>>> fb8e822ed92fba85e584305fcb18bdf45ad601df
self.model = model()
self.model.load_state_dict(torch.load(state_dict_path))
......@@ -73,7 +94,10 @@ class PolyAClassifier:
Raises:
TypeError: If sequence is not str or list(str)
ValueError: If some or all sequences are not of length 200
<<<<<<< HEAD
ValueError: If non-allowed letters in string
=======
>>>>>>> fb8e822ed92fba85e584305fcb18bdf45ad601df
"""
......@@ -91,6 +115,7 @@ class PolyAClassifier:
enum_seqs = []
<<<<<<< HEAD
try:
for s in sequences:
enum_sequence = [self.enum[key.upper()] for key in s]
......@@ -112,6 +137,25 @@ class PolyAClassifier:
raise ValueError('Sequences not of length 200')
=======
for s in sequences:
enum_sequence = [self.enum[key] for key in s]
enum_seqs.append(enum_sequence)
# convert to ndarray and reshape for pytorch
test = np.array(enum_seqs, dtype=np.float32)
try:
test_shape = test.shape
test = test.reshape(test_shape[0], 1, test_shape[1])
if test_shape[1] != 200:
raise ValueError('Sequences not of length 200')
except IndexError:
raise ValueError('Not all sequences of length 200')
>>>>>>> fb8e822ed92fba85e584305fcb18bdf45ad601df
test = torch.from_numpy(test)
......@@ -129,4 +173,29 @@ class PolyAClassifier:
else:
<<<<<<< HEAD
return predictions.tolist()
=======
return predictions
if __name__ == '__main__':
mod = PolyAClassifier(state_dict_path='../models/internal_priming.pth')
real_str = 'CGCCGGAAGAACGAAUCUCCCACUGCCCGGGCAUCCAAUGGACUUCAUAGGAAUGGCAGCUGAUAACACCGCCCCCUGUGGCGCGCCAGAGGGCGCGCUUCGUGUAGGCUUCGAUGUCGCGGUAAAAUUCUUGGAUUAAAGAAGGGGCCCUGUGGUAGCAAGUUUUUUAUUCUGUGGGCGCUCUUACGCGUGUAUUGUCU'
fake_str = 'GUUUGAGGCGCAUGACGCGUUUCGGGGGCCUUGCGUCGCCCACGCCGGCGUUCUCUUUAAAAGGAGCAACGACACCACGCCCCAUGGACCAUGCCGCAGGGUGAACGUCGUCCCGCAACUGCCGUGCACCCGUCAAAAGGAGGCGUCUUCAAAAAAAAAACAAAAUAAAAACACAUACCGCGGCGCGUAUUAGAGCGGCG'
list_test = [real_str, fake_str]
pred = mod.classify(real_str)
print(pred)
pred = mod.classify(fake_str)
print(pred)
pred = mod.classify(list_test)
print(pred)
>>>>>>> fb8e822ed92fba85e584305fcb18bdf45ad601df
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment