# Given an "fna" file, this counts all substrings with length 2 and
# print them in decreasing order with their frequecies
def load(file):
    '''
    load DNA sequence from given "file" and
    return the sequence as a string
    '''
    import re # regular expression modeule

    f = open(file, "r")

    alllines = f.readlines() # read all lines
    del alllines[0]  # contents begin with the second line

    line = ''.join(alllines) # join all lines together
    line = re.sub('\n', '', line)# remove "KAIGYOU" from line
        
    return line
    
def countsubstrings(str):
    dict = {} # dictionary: key => substring , value => its frequency

    for i in range(len(str)):
        substr = str[i:i+2] # substring with length 2
        if len(substr) < 2: continue # substr must have at least two length

        if dict.has_key(substr):
            dict[substr] = dict[substr] + 1
        else:
            dict[substr] = 1
    return(dict)

if __name__ == '__main__':
    import sys
    import time # time module

    start = time.time() # current time

    import CountByDict2
    seq = CountByDict2.load(sys.argv[1]) # load DNA sequence
    #print seq # for debug
    
    count = countsubstrings(seq)
    #print count # for debug

    # dictionary data strucuture does NOT have sort and reverse methods
    #count.sort()
    #count.reverse()

    # print all substrings in decreasing order with their frequencies
    for (k, v) in count.items():
        print k, " occurs ", v, " times"

    end = time.time() # current time
    print end - start # print required time