dotplots

Dot Plots a generic and basic example two sequences that have a predetermined, optimal alignment, no gaps create a matrix with each sequence as an axis place a 0 if there is a mismatch between the two sequences at that position place a 1 if there is a match visualize matrix by coloring a grid according to 1's or 0's here 1's are yellow, 0's are purple

In [3]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt

sequence_1 = 'gctagctagtagcttaggatgatcgtacgtagctagctgattatagagagagaaggagaa'
sequence_2 = 'gctagctagtaccttaggatgatcgtacgaagctaactgattatagagagagcaagcgaa'
dot_matrix = np.zeros((len(sequence_1),len(sequence_2)))
for base in range(0,len(sequence_1)):
    if sequence_1[base] == sequence_2[base]:
        dot_matrix[base,base] = 1
plt.imshow(dot_matrix,cmap="Purples_r",interpolation='none')        
plt.show()
In [5]:
def dot_plot(seq_record,comparison_sequence,complement=True,window=3):
    #subject_strand = str(seq_record.seq).upper()
    subject_strand = seq_record
    #seq_two = str(comparison_sequence).upper()
    seq_two = comparison_sequence
    #seq_two_complement = str(comparison_sequence.complement).upper()
    data = np.array([[int((subject_strand[i:i + window] != seq_two[j:j + window]))
                       for i in range(len(subject_strand) - window)]
                      for j in range(len(seq_two) - window)])
    if complement==True:
        data = data + np.array([[2 * int((subject_strand[i:i + window] != seq_two_complement[j:j + window]))
                               for i in range(len(subject_strand) - window)]
                              for j in range(len(seq_two) - window)])
    return data
In [6]:
#from Bio import pairwise2
#from Bio import SeqIO
#from Bio.Seq import Seq
def make_quick_plot(window):
    import matplotlib.pyplot as plt

    print 'window size: ' + str(window)
    plt.rcParams['figure.figsize'] = 10,10
    plt.imshow(dot_plot(sequence_1,sequence_2,complement=False,window=window),cmap="Purples_r",interpolation='none')
    plt.show()
for i in range(0,10):
    make_quick_plot(i)
window size: 0
window size: 1
window size: 2
window size: 3
window size: 4
window size: 5
window size: 6
window size: 7
window size: 8
window size: 9
In [7]:
sequence_4 = 'gatcgatc'
sequence_3 = 'gctagctagtgatcgatcaccttaggatgatcgtgatcgatcacgaagctaagatcgatcctgattatagaggatcgatcagagatcgatcgatcgatcgatcgatcgcaagcgaa'
def make_quick_plot(window):
    import matplotlib.pyplot as plt

    print 'window size: ' + str(window)
    plt.rcParams['figure.figsize'] = 10,1
    plt.imshow(dot_plot(sequence_3,sequence_4,complement=False,window=window),cmap="Purples_r",interpolation='none')
    plt.show()
for i in range(0,8):
    make_quick_plot(i)
window size: 0
window size: 1
window size: 2
window size: 3
window size: 4
window size: 5
window size: 6
window size: 7
In [8]:
sequence_4 = 'inside my DNA'
sequence_3 = 'I got, I got, I got, I got  Loyalty, got royalty inside my DNA Cocaine quarter piece, got war and peace inside my DNA I got power, poison, pain and joy inside my  NA I got hustle though, ambition, flow, inside my DNA I was born like this, since one like this Immaculate conception I transform like this, perform like this Was Yeshuas new weapon I dont contemplate, I meditate, then off your fucking head This that put-the-kids-to-bed This that I got, I got, I got, I got Realness, I just kill shit cause its in my DNA I got millions, I got riches buildin’ in my DNA I got dark, I got evil, that rot inside my DNA I got off, I got troublesome, heart inside my DNA I just win again, then win again like Wimbledon, I serve Yeah, thats'
def make_quick_plot(window):
    import matplotlib.pyplot as plt

    print 'window size: ' + str(window)
    plt.imshow(dot_plot(sequence_3,sequence_4,complement=False,window=window),cmap="Purples_r",interpolation='none')
    plt.show()
    plt.rcParams['figure.figsize'] = 500,1
for i in range(0,8):
    make_quick_plot(i)
window size: 0
window size: 1
window size: 2
window size: 3
window size: 4
window size: 5
window size: 6
window size: 7
In [9]:
sequence_5 = 'I got, I got, I got, I got  Loyalty, got royalty inside my DNA Cocaine quarter piece, got war and peace inside my DNA I got power, poison, pain and joy inside my DNA I got hustle though, ambition, flow, inside my DNA I was born like this, since one like this Immaculate conception I transform like this, perform like this Was Yeshuas new weapon I dont contemplate, I meditate, then off your fucking head This that put-the-kids-to-bed This that I got, I got, I got, I got Realness, I just kill shit cause its in my DNA I got millions, I got riches buildin’ in my DNA I got dark, I got evil, that rot inside my DNA I got off, I got troublesome, heart inside my DNA I just win again, then win again like Wimbledon, I serve Yeah, thats'
sequence_6 = 'I got, I got, I got, I got  Loyalty, got royalty inside my DNA Cocaine quarter piece, got war and peace inside my DNA I got power, poison, pain and joy inside my DNA I got hustle though, ambition, flow, inside my DNA I was born like this, since one like this Immaculate conception I transform like this, perform like this Was Yeshuas new weapon I dont contemplate, I meditate, then off your fucking head This that put-the-kids-to-bed This that I got, I got, I got, I got Realness, I just kill shit cause its in my DNA I got millions, I got riches buildin’ in my DNA I got dark, I got evil, that rot inside my DNA I got off, I got troublesome, heart inside my DNA I just win again, then win again like Wimbledon, I serve Yeah, thats'
def make_quick_plot(window):
    import matplotlib.pyplot as plt

    print 'window size: ' + str(window)
    plt.rcParams['figure.figsize'] = 10,10
    plt.imshow(dot_plot(sequence_5,sequence_6,complement=False,window=window),cmap="Purples_r",interpolation='none')
    plt.show()
    
for i in range(0,8):
    make_quick_plot(i)
window size: 0
window size: 1
window size: 2
window size: 3
window size: 4
window size: 5
window size: 6
window size: 7
In [11]:
def count_mismatches(seq_A,seq_B):
    count = 0
    for i in range(0,len(seq_A)):
        if seq_A[i] == seq_B[i]:
            count += 1
    return count
def dot_plot_tolerant(seq_record,comparison_sequence,complement=True,window=3):
    #subject_strand = str(seq_record.seq).upper()
    subject_strand = seq_record
    #seq_two = str(comparison_sequence).upper()
    seq_two = comparison_sequence
    #seq_two_complement = str(comparison_sequence.complement).upper()
    data = np.array([[count_mismatches(subject_strand[i:i + window], seq_two[j:j + window])
                       for i in range(len(subject_strand) - window)]
                      for j in range(len(seq_two) - window)])
    if complement==True:
        data = data + np.array([[2 * int((subject_strand[i:i + window] != seq_two_complement[j:j + window]))
                               for i in range(len(subject_strand) - window)]
                              for j in range(len(seq_two) - window)])
    print data
    return data
In [12]:
sequence_5 = 'I got, I got, I got, I got  Loyalty, got royalty inside my DNA Cocaine quarter piece, got war and peace inside my DNA I got power, poison, pain and joy inside my DNA I got hustle though, ambition, flow, inside my DNA I was born like this, since one like this Immaculate conception I transform like this, perform like this Was Yeshuas new weapon I dont contemplate, I meditate, then off your fucking head This that put-the-kids-to-bed This that I got, I got, I got, I got Realness, I just kill shit cause its in my DNA I got millions, I got riches buildin’ in my DNA I got dark, I got evil, that rot inside my DNA I got off, I got troublesome, heart inside my DNA I just win again, then win again like Wimbledon, I serve Yeah, thats'
sequence_6 = 'I got, I got, I got, I got  Loyalty, got royalty inside my DNA Cocaine quarter piece, got war and peace inside my DNA I got power, poison, pain and joy inside my DNA I got hustle though, ambition, flow, inside my DNA I was born like this, since one like this Immaculate conception I transform like this, perform like this Was Yeshuas new weapon I dont contemplate, I meditate, then off your fucking head This that put-the-kids-to-bed This that I got, I got, I got, I got Realness, I just kill shit cause its in my DNA I got millions, I got riches buildin’ in my DNA I got dark, I got evil, that rot inside my DNA I got off, I got troublesome, heart inside my DNA I just win again, then win again like Wimbledon, I serve Yeah, thats'
def make_quick_plot(window):
    import matplotlib.pyplot as plt


    print 'window size: ' + str(window)
    plt.rcParams['figure.figsize'] = 10,10
    plt.imshow(dot_plot_tolerant(sequence_5,sequence_6,complement=False,window=window),cmap="viridis",interpolation='none')
    plt.show()
    
for i in range(1,20):
    make_quick_plot(i)
window size: 1
[[1 0 0 ..., 0 0 0]
 [0 1 0 ..., 0 0 0]
 [0 0 1 ..., 0 0 0]
 ..., 
 [0 0 0 ..., 1 0 0]
 [0 0 0 ..., 0 1 0]
 [0 0 0 ..., 0 0 1]]
window size: 2
[[2 0 0 ..., 0 0 0]
 [0 2 0 ..., 0 0 0]
 [0 0 2 ..., 0 0 0]
 ..., 
 [0 0 0 ..., 2 0 0]
 [0 0 0 ..., 0 2 0]
 [0 0 0 ..., 0 0 2]]
window size: 3
[[3 0 0 ..., 0 0 0]
 [0 3 0 ..., 1 0 0]
 [0 0 3 ..., 0 0 1]
 ..., 
 [0 1 0 ..., 3 0 0]
 [0 0 0 ..., 0 3 0]
 [0 0 1 ..., 0 0 3]]