# Given an "fna" file, this counts all substrings with length 2 and
# print them in decreasing order with their frequecies
def load(file):
    '''
    load DNA sequence from given "file" and
    return the sequence as a string
    '''
    import re # regular expression modeule

    f = open(file, "r")

    alllines = f.readlines() # read all lines
    del alllines[0]  # contents begin with the second line

    line = ''.join(alllines) # join all lines together
    line = re.sub('\n', '', line)# remove "KAIGYOU" from line
        
    return line
    
def countsubstrings(str):
    '''
    count all substrings with length 2 and
    return count = [(n1, str1), (n2, str2), ...]
    the order is not considered
    '''
    count = []

    for i in range(len(str)):
        substr = str[i:i+2] # substring with length 2
        if len(substr) < 2: continue # substr must have at least two length

        # check if 'substr' is in count
        for i in range(len(count)):
            (n, s) = count[i]
            if s == substr:
                count[i] = (n+1, s)
                break
        else: # this part is executed only if no 'break' in the above for loop
            count.append((1, substr))

    return(count)

if __name__ == '__main__':
    import sys
    import time # time module

    start = time.time() # current time

    seq = load(sys.argv[1]) # load DNA sequence
    #print seq # for debug
    
    count = countsubstrings(seq)
    #print count # for debug
    
    #count.sort()
    #count.reverse()

    # print all substrings in decreasing order with their frequencies
    for (n, s) in count:
        print s, " occurs ", n, " times"

    end = time.time() # current time
    print end - start # print required time