Constructing an Superior Convolutional Neural Community with Consideration for DNA Sequence Classification and Interpretability

16 September 2025

2

class DNASequenceClassifier:
   def __init__(self, sequence_length=200, num_classes=2):
       self.sequence_length = sequence_length
       self.num_classes = num_classes
       self.mannequin = None
       self.historical past = None
      
   def one_hot_encode(self, sequences):
       mapping = {'A': 0, 'T': 1, 'G': 2, 'C': 3}
       encoded = np.zeros((len(sequences), self.sequence_length, 4))
      
       for i, seq in enumerate(sequences):
           for j, nucleotide in enumerate(seq[:self.sequence_length]):
               if nucleotide in mapping:
                   encoded[i, j, mapping[nucleotide]] = 1
       return encoded
  
   def attention_layer(self, inputs, title="consideration"):
       attention_weights = layers.Dense(1, activation='tanh', title=f"{title}_weights")(inputs)
       attention_weights = layers.Flatten()(attention_weights)
       attention_weights = layers.Activation('softmax', title=f"{title}_softmax")(attention_weights)
       attention_weights = layers.RepeatVector(inputs.form[-1])(attention_weights)
       attention_weights = layers.Permute([2, 1])(attention_weights)
      
       attended = layers.Multiply(title=f"{title}_multiply")([inputs, attention_weights])
       return layers.GlobalMaxPooling1D()(attended)
  
   def build_model(self):
       inputs = layers.Enter(form=(self.sequence_length, 4), title="dna_input")
      
       conv_layers = []
       filter_sizes = [3, 7, 15, 25]
      
       for i, filter_size in enumerate(filter_sizes):
           conv = layers.Conv1D(
               filters=64,
               kernel_size=filter_size,
               activation='relu',
               padding='identical',
               title=f"conv_{filter_size}"
           )(inputs)
           conv = layers.BatchNormalization(title=f"bn_conv_{filter_size}")(conv)
           conv = layers.Dropout(0.2, title=f"dropout_conv_{filter_size}")(conv)
          
           attended = self.attention_layer(conv, title=f"attention_{filter_size}")
           conv_layers.append(attended)
      
       if len(conv_layers) > 1:
           merged = layers.Concatenate(title="concat_multiscale")(conv_layers)
       else:
           merged = conv_layers[0]
      
       dense = layers.Dense(256, activation='relu', title="dense_1")(merged)
       dense = layers.BatchNormalization(title="bn_dense_1")(dense)
       dense = layers.Dropout(0.5, title="dropout_dense_1")(dense)
      
       dense = layers.Dense(128, activation='relu', title="dense_2")(dense)
       dense = layers.BatchNormalization(title="bn_dense_2")(dense)
       dense = layers.Dropout(0.3, title="dropout_dense_2")(dense)
      
       if self.num_classes == 2:
           outputs = layers.Dense(1, activation='sigmoid', title="output")(dense)
           loss="binary_crossentropy"
           metrics = ['accuracy', 'precision', 'recall']
       else:
           outputs = layers.Dense(self.num_classes, activation='softmax', title="output")(dense)
           loss="categorical_crossentropy"
           metrics = ['accuracy']
      
       self.mannequin = keras.Mannequin(inputs=inputs, outputs=outputs, title="DNA_CNN_Classifier")
      
       optimizer = keras.optimizers.Adam(
           learning_rate=0.001,
           beta_1=0.9,
           beta_2=0.999,
           epsilon=1e-7
       )
      
       self.mannequin.compile(
           optimizer=optimizer,
           loss=loss,
           metrics=metrics
       )
      
       return self.mannequin
  
   def generate_synthetic_data(self, n_samples=10000):
       sequences = []
       labels = []
      
       positive_motifs = ['TATAAA', 'CAAT', 'GGGCGG', 'TTGACA']
       negative_motifs = ['AAAAAAA', 'TTTTTTT', 'CCCCCCC', 'GGGGGGG']
      
       nucleotides = ['A', 'T', 'G', 'C']
      
       for i in vary(n_samples):
           sequence="".be a part of(random.decisions(nucleotides, okay=self.sequence_length))
          
           if i < n_samples // 2:
               motif = random.selection(positive_motifs)
               pos = random.randint(0, self.sequence_length - len(motif))
               sequence = sequence[:pos] + motif + sequence[pos + len(motif):]
               label = 1
           else:
               if random.random() < 0.3:
                   motif = random.selection(negative_motifs)
                   pos = random.randint(0, self.sequence_length - len(motif))
                   sequence = sequence[:pos] + motif + sequence[pos + len(motif):]
               label = 0
          
           sequences.append(sequence)
           labels.append(label)
      
       return sequences, np.array(labels)
  
   def prepare(self, X_train, y_train, X_val, y_val, epochs=50, batch_size=32):
       callbacks = [
           keras.callbacks.EarlyStopping(
               monitor="val_loss",
               patience=10,
               restore_best_weights=True
           ),
           keras.callbacks.ReduceLROnPlateau(
               monitor="val_loss",
               factor=0.5,
               patience=5,
               min_lr=1e-6
           )
       ]
      
       self.historical past = self.mannequin.match(
           X_train, y_train,
           validation_data=(X_val, y_val),
           epochs=epochs,
           batch_size=batch_size,
           callbacks=callbacks,
           verbose=1
       )
      
       return self.historical past
  
   def evaluate_and_visualize(self, X_test, y_test):
       y_pred_proba = self.mannequin.predict(X_test)
       y_pred = (y_pred_proba > 0.5).astype(int).flatten()
      
       print("Classification Report:")
       print(classification_report(y_test, y_pred))
      
       fig, axes = plt.subplots(2, 2, figsize=(15, 10))
      
       axes[0,0].plot(self.historical past.historical past['loss'], label="Coaching Loss")
       axes[0,0].plot(self.historical past.historical past['val_loss'], label="Validation Loss")
       axes[0,0].set_title('Coaching Historical past - Loss')
       axes[0,0].set_xlabel('Epoch')
       axes[0,0].set_ylabel('Loss')
       axes[0,0].legend()
      
       axes[0,1].plot(self.historical past.historical past['accuracy'], label="Coaching Accuracy")
       axes[0,1].plot(self.historical past.historical past['val_accuracy'], label="Validation Accuracy")
       axes[0,1].set_title('Coaching Historical past - Accuracy')
       axes[0,1].set_xlabel('Epoch')
       axes[0,1].set_ylabel('Accuracy')
       axes[0,1].legend()
      
       cm = confusion_matrix(y_test, y_pred)
       sns.heatmap(cm, annot=True, fmt="d", ax=axes[1,0], cmap='Blues')
       axes[1,0].set_title('Confusion Matrix')
       axes[1,0].set_ylabel('Precise')
       axes[1,0].set_xlabel('Predicted')
      
       axes[1,1].hist(y_pred_proba[y_test==0], bins=50, alpha=0.7, label="Destructive", density=True)
       axes[1,1].hist(y_pred_proba[y_test==1], bins=50, alpha=0.7, label="Optimistic", density=True)
       axes[1,1].set_title('Prediction Rating Distribution')
       axes[1,1].set_xlabel('Prediction Rating')
       axes[1,1].set_ylabel('Density')
       axes[1,1].legend()
      
       plt.tight_layout()
       plt.present()
      
       return y_pred, y_pred_proba

Constructing an Superior Convolutional Neural Community with Consideration for DNA Sequence Classification and Interpretability

Related Articles

Break down information silos and seamlessly question Iceberg tables in Amazon SageMaker from Snowflake

Survey pinpoints Rust compiler ache factors

Constructing a Basis for Mannequin-Primarily based Methods Engineering in Digital Engineering

LEAVE A REPLY Cancel reply

Latest Articles

Break down information silos and seamlessly question Iceberg tables in Amazon SageMaker from Snowflake

Survey pinpoints Rust compiler ache factors

Constructing a Basis for Mannequin-Primarily based Methods Engineering in Digital Engineering

Microsoft shares Insiders preview of Visible Studio 2026

Xiaomi 17 Professional Max flaunts second display in first official teaser