19.1 C
New York
Tuesday, September 16, 2025

Constructing an Superior Convolutional Neural Community with Consideration for DNA Sequence Classification and Interpretability


class DNASequenceClassifier:
   def __init__(self, sequence_length=200, num_classes=2):
       self.sequence_length = sequence_length
       self.num_classes = num_classes
       self.mannequin = None
       self.historical past = None
      
   def one_hot_encode(self, sequences):
       mapping = {'A': 0, 'T': 1, 'G': 2, 'C': 3}
       encoded = np.zeros((len(sequences), self.sequence_length, 4))
      
       for i, seq in enumerate(sequences):
           for j, nucleotide in enumerate(seq[:self.sequence_length]):
               if nucleotide in mapping:
                   encoded[i, j, mapping[nucleotide]] = 1
       return encoded
  
   def attention_layer(self, inputs, title="consideration"):
       attention_weights = layers.Dense(1, activation='tanh', title=f"{title}_weights")(inputs)
       attention_weights = layers.Flatten()(attention_weights)
       attention_weights = layers.Activation('softmax', title=f"{title}_softmax")(attention_weights)
       attention_weights = layers.RepeatVector(inputs.form[-1])(attention_weights)
       attention_weights = layers.Permute([2, 1])(attention_weights)
      
       attended = layers.Multiply(title=f"{title}_multiply")([inputs, attention_weights])
       return layers.GlobalMaxPooling1D()(attended)
  
   def build_model(self):
       inputs = layers.Enter(form=(self.sequence_length, 4), title="dna_input")
      
       conv_layers = []
       filter_sizes = [3, 7, 15, 25]
      
       for i, filter_size in enumerate(filter_sizes):
           conv = layers.Conv1D(
               filters=64,
               kernel_size=filter_size,
               activation='relu',
               padding='identical',
               title=f"conv_{filter_size}"
           )(inputs)
           conv = layers.BatchNormalization(title=f"bn_conv_{filter_size}")(conv)
           conv = layers.Dropout(0.2, title=f"dropout_conv_{filter_size}")(conv)
          
           attended = self.attention_layer(conv, title=f"attention_{filter_size}")
           conv_layers.append(attended)
      
       if len(conv_layers) > 1:
           merged = layers.Concatenate(title="concat_multiscale")(conv_layers)
       else:
           merged = conv_layers[0]
      
       dense = layers.Dense(256, activation='relu', title="dense_1")(merged)
       dense = layers.BatchNormalization(title="bn_dense_1")(dense)
       dense = layers.Dropout(0.5, title="dropout_dense_1")(dense)
      
       dense = layers.Dense(128, activation='relu', title="dense_2")(dense)
       dense = layers.BatchNormalization(title="bn_dense_2")(dense)
       dense = layers.Dropout(0.3, title="dropout_dense_2")(dense)
      
       if self.num_classes == 2:
           outputs = layers.Dense(1, activation='sigmoid', title="output")(dense)
           loss="binary_crossentropy"
           metrics = ['accuracy', 'precision', 'recall']
       else:
           outputs = layers.Dense(self.num_classes, activation='softmax', title="output")(dense)
           loss="categorical_crossentropy"
           metrics = ['accuracy']
      
       self.mannequin = keras.Mannequin(inputs=inputs, outputs=outputs, title="DNA_CNN_Classifier")
      
       optimizer = keras.optimizers.Adam(
           learning_rate=0.001,
           beta_1=0.9,
           beta_2=0.999,
           epsilon=1e-7
       )
      
       self.mannequin.compile(
           optimizer=optimizer,
           loss=loss,
           metrics=metrics
       )
      
       return self.mannequin
  
   def generate_synthetic_data(self, n_samples=10000):
       sequences = []
       labels = []
      
       positive_motifs = ['TATAAA', 'CAAT', 'GGGCGG', 'TTGACA']
       negative_motifs = ['AAAAAAA', 'TTTTTTT', 'CCCCCCC', 'GGGGGGG']
      
       nucleotides = ['A', 'T', 'G', 'C']
      
       for i in vary(n_samples):
           sequence="".be a part of(random.decisions(nucleotides, okay=self.sequence_length))
          
           if i < n_samples // 2:
               motif = random.selection(positive_motifs)
               pos = random.randint(0, self.sequence_length - len(motif))
               sequence = sequence[:pos] + motif + sequence[pos + len(motif):]
               label = 1
           else:
               if random.random() < 0.3:
                   motif = random.selection(negative_motifs)
                   pos = random.randint(0, self.sequence_length - len(motif))
                   sequence = sequence[:pos] + motif + sequence[pos + len(motif):]
               label = 0
          
           sequences.append(sequence)
           labels.append(label)
      
       return sequences, np.array(labels)
  
   def prepare(self, X_train, y_train, X_val, y_val, epochs=50, batch_size=32):
       callbacks = [
           keras.callbacks.EarlyStopping(
               monitor="val_loss",
               patience=10,
               restore_best_weights=True
           ),
           keras.callbacks.ReduceLROnPlateau(
               monitor="val_loss",
               factor=0.5,
               patience=5,
               min_lr=1e-6
           )
       ]
      
       self.historical past = self.mannequin.match(
           X_train, y_train,
           validation_data=(X_val, y_val),
           epochs=epochs,
           batch_size=batch_size,
           callbacks=callbacks,
           verbose=1
       )
      
       return self.historical past
  
   def evaluate_and_visualize(self, X_test, y_test):
       y_pred_proba = self.mannequin.predict(X_test)
       y_pred = (y_pred_proba > 0.5).astype(int).flatten()
      
       print("Classification Report:")
       print(classification_report(y_test, y_pred))
      
       fig, axes = plt.subplots(2, 2, figsize=(15, 10))
      
       axes[0,0].plot(self.historical past.historical past['loss'], label="Coaching Loss")
       axes[0,0].plot(self.historical past.historical past['val_loss'], label="Validation Loss")
       axes[0,0].set_title('Coaching Historical past - Loss')
       axes[0,0].set_xlabel('Epoch')
       axes[0,0].set_ylabel('Loss')
       axes[0,0].legend()
      
       axes[0,1].plot(self.historical past.historical past['accuracy'], label="Coaching Accuracy")
       axes[0,1].plot(self.historical past.historical past['val_accuracy'], label="Validation Accuracy")
       axes[0,1].set_title('Coaching Historical past - Accuracy')
       axes[0,1].set_xlabel('Epoch')
       axes[0,1].set_ylabel('Accuracy')
       axes[0,1].legend()
      
       cm = confusion_matrix(y_test, y_pred)
       sns.heatmap(cm, annot=True, fmt="d", ax=axes[1,0], cmap='Blues')
       axes[1,0].set_title('Confusion Matrix')
       axes[1,0].set_ylabel('Precise')
       axes[1,0].set_xlabel('Predicted')
      
       axes[1,1].hist(y_pred_proba[y_test==0], bins=50, alpha=0.7, label="Destructive", density=True)
       axes[1,1].hist(y_pred_proba[y_test==1], bins=50, alpha=0.7, label="Optimistic", density=True)
       axes[1,1].set_title('Prediction Rating Distribution')
       axes[1,1].set_xlabel('Prediction Rating')
       axes[1,1].set_ylabel('Density')
       axes[1,1].legend()
      
       plt.tight_layout()
       plt.present()
      
       return y_pred, y_pred_proba

Related Articles

LEAVE A REPLY

Please enter your comment!
Please enter your name here

Latest Articles