Computer Science Canada

DNA to RNA to Protein

Author:  Cervantes [ Fri Jan 13, 2006 9:26 pm ]
Post subject:  DNA to RNA to Protein

This code allows a biology student to take a nucleotide sequence (of DNA), convert it to RNA, then convert that to an amino acid sequence: a protein.

Ruby:

class Array
        def each_group( group_length, &block )
                self.length.div(group_length).times do |i|
                        yield self [i * group_length .. (i + 1) * group_length - 1]
                end
        end
       
        def apply_hash( hsh )
                result = self.dup
                self.each_with_index do |element, index|
                        result [index] = hsh [element] if hsh.has_key?( element )
                end
                result
        end
end


class InformationMolecule

        attr_reader :sequence

        def initialize( sequence )
                @sequence = []
                if sequence.is_a?( String )
                        sequence.each_byte do |nt|
                                @sequence.push( nt.chr.upcase.to_sym )
                        end
                elsif sequence.is_a?( Array )
                        sequence.each do |nt|
                                @sequence.push( nt.to_s.upcase.to_sym )
                        end
                end
        end
       
        def each_codon( &block )
                @sequence.each_group( 3, &block )
        end
       
end

class DNA < InformationMolecule

        def initialize( sequence )
                super
                @@dna2dna = { :A => :T, :T => :A, :C => :G, :G => :C }
                @@dna2rna = { :A => :U, :T => :A, :C => :G, :G => :C }
        end

        def to_rna
                RNA.new( @sequence.apply_hash( @@dna2rna ) )
        end
       
        def to_cdna
                DNA.new( @sequence.apply_hash( @@dna2dna ) )
        end

end

class RNA < InformationMolecule

        def initialize( sequence )
                super
                @@rna2dna = { :A => :T, :U => :A, :C => :G, :G => :C }
                @@rna2rna = { :A => :U, :U => :A, :C => :G, :G => :C }
                @@genetic_code = {
                        :UUU => :f, :UUC => :f, :UUA => :l, :UUG => :l,
                        :UCU => :s, :UCC => :s, :UCA => :s, :UCG => :s,
                        :UAU => :y, :UAC => :y, :UAA => :STOP, :UAG => :STOP,
                        :UGU => :c, :UGC => :c, :UGA => :STOP, :UGG => :w,
                        :CUU => :l, :CUC => :l, :CUA => :l, :CUG => :l,
                        :CCU => :p, :CCC => :p, :CCA => :p, :CCG => :p,
                        :CAU => :h, :CAC => :h, :CAA => :q, :CAG => :q,
                        :CGU => :r, :CGC => :r, :CGA => :r, :CGG => :r,
                        :AUU => :i, :AUC => :i, :AUA => :i, :AUG => :m,
                        :ACU => :t, :ACC => :t, :ACA => :t, :ACG => :t,
                        :AAU => :n, :AAC => :n, :AAA => :k, :AAG => :k,
                        :AGU => :s, :AGC => :s, :AGA => :r, :AGG => :r,
                        :GUU => :v, :GUC => :v, :GUA => :v, :GUG => :v,
                        :GCU => :a, :GCC => :a, :GCA => :a, :GCG => :a,
                        :GAU => :d, :GAC => :d, :GAA => :e, :GAG => :e,
                        :GGU => :g, :GGC => :g, :GGA => :g, :GGG => :g
                }
        end

        def to_dna
                DNA.new( @sequence.apply_hash( @@rna2dna ) )
        end
       
        def to_crna
                RNA.new( @sequence.apply_hash( @@rna2rna ) )
        end
       
        def to_protein
                aa_sequence = []
                self.each_codon do |codon|
                        aa = @@genetic_code [codon.join.to_sym]
                        break if aa == :STOP
                        aa_sequence.push( aa )
                end
                Protein.new( aa_sequence )
        end

end

class Protein

        attr_reader :sequence
        attr_reader :full_name_sequence

        def initialize( aa_sequence )
                @sequence = []
                if aa_sequence.is_a?( String )
                        aa_sequence.each_byte do |aa|
                                @sequence.push( aa.chr.upcase.to_sym )
                        end
                elsif aa_sequence.is_a?( Array )
                        aa_sequence.each do |aa|
                                @sequence.push( aa.to_sym )
                        end
                end
                @@aa_code = {
                        :f => :Phenylalanine,
                        :l => :Leucine,
                        :i => :Isoleucine,
                        :m => :Methionine,
                        :v => :Valine,
                        :s => :Serine,
                        :p => :Proline,
                        :t => :Threonine,
                        :a => :Alanine,
                        :y => :Tyrosine,
                        :h => :Histidine,
                        :q => :Glutamine,
                        :n => :Asparagine,
                        :k => :Lysine,
                        :d => :"Aspartic acid",
                        :e => :"Glutamic acid",
                        :c => :Cysteine,
                        :w => :Tryptophan,
                        :r => :Arginine,
                        :g => :Glycine
                }
                @full_name_sequence = @sequence.apply_hash( @@aa_code )
        end

end

#nt = ""
#File.open( "nt sequence.txt").each { |line| nt += line.chomp }
#File.open( "aa sequence.txt", "w" ).puts( DNA.new( nt ).to_rna.to_protein.full_name_sequence )

puts DNA.new( "gattacctcggguuu" ).to_rna.to_protein.full_name_sequence


For a bigger nucleotide sequence, try the attached file and switch the commenting at the end of the program.

Author:  wtd [ Fri Jan 13, 2006 11:29 pm ]
Post subject: 

code:
class Array
        def each_group( group_length, &block )
                self.length.div(group_length).times do |i|
                        yield self [i * group_length .. (i + 1) * group_length - 1]
                end
        end


Why does each_group accept &block? This parameter is not used in the method.

Author:  wtd [ Fri Jan 13, 2006 11:34 pm ]
Post subject: 

Also, when you have a method that takes a single argument, you can ellide the parentheses. This is often used to great effect when the method ends in a question mark.

Author:  Cervantes [ Sat Jan 14, 2006 11:09 am ]
Post subject: 

wtd wrote:
code:
class Array
        def each_group( group_length, &block )
                self.length.div(group_length).times do |i|
                        yield self [i * group_length .. (i + 1) * group_length - 1]
                end
        end


Why does each_group accept &block? This parameter is not used in the method.


Thanks for pointing that out. I had thought I needed it as a way to pass the block to each_group from each_codon, but apparently not: I can still call each_group from each_codon with
code:

@sequence.each_group( 3, &block )


Thanks. Smile

Author:  Andy [ Thu Feb 02, 2006 9:44 am ]
Post subject: 

wow.. believe it or not, that was part of an assignment in cs134 at waterloo.. well the assignment had alot more to it.. but this was definitely in it

Author:  wtd [ Thu Feb 02, 2006 2:17 pm ]
Post subject: 

wtd wrote:
code:
class Array
        def each_group( group_length, &block )
                self.length.div(group_length).times do |i|
                        yield self [i * group_length .. (i + 1) * group_length - 1]
                end
        end


Why does each_group accept &block? This parameter is not used in the method.


In fact, let's clean that up some more.

code:
def each_group(group_length)
   (length / group_length).times do |i|
      yield self[i * group_length ... (i + 1) * group_length]
   end
end


: