#!/usr/bin/env python

import Numeric
import random
from gnuradio import gr, eng_notation
import math
from math import cos,sin
import Numeric
import sys
import datetime

from gnuradio.eng_option import eng_option
from optparse import OptionParser

from Numeric import zeros

from operator import add, mul




def flatten(in_data):
  """returns a single flat list with the elements of every contained list or tuple in in_data"""
  result = []
  for element in in_data:
      if hasattr(element, "__len__"):
          result.extend(flatten(element))
      else:
          result.append(element)
  return result
   
def abs_max_reclist(in_data):
  """returns the absolute maximum of the elements in every recursive list in in_data"""
  flat_data=flatten(in_data)
  return max(max(flat_data),abs(min(flat_data)))

def max_reclist(in_data):
  """returns the maximum of the elements in every recursive list in in_data"""
  flat_data=flatten(in_data)
  return max(flat_data)

def min_reclist(in_data):
  """returns the minimum of the elements in every recursive list in in_data"""
  flat_data=flatten(in_data)
  return min(flat_data)

def sum_reclist(in_data):
  """returns the sum of the elements in every recursive list in in_data"""
  flat_data=flatten(in_data)
  return reduce(add, flat_data)



 
 
def root_raised_cosine (gain, sampling_freq, symbol_rate, alpha, ntaps,mu,do_scale=False,do_scale_abs=False):
  ntaps2 = int(ntaps) #ensure that ntaps is an integer
  ntaps3=ntaps2
  if ntaps3%2==0:
    ntaps3=ntaps3+1
  if ntaps3 != ntaps:
    print "//ntaps is not odd, exiting"
    #sys.exit(0)
  #else:
  ntaps=ntaps2
  #ntaps = ntaps | 1	# ensure that ntaps is odd
  
  M_PI=math.pi
  spb = float(sampling_freq)/float(symbol_rate)# samples per bit/symbol
  #??vector<float> taps(ntaps);
  taps=[]
  for i in range(ntaps):
    taps.append(0.0)
  #print "//ZERO taps",taps
  scale = 0.0
  scale_abs = 0.0
  num=0.0
  den=0.0
  x1=0.0
  x2=0.0
  x3=0.0
  xindx=0.0
  xindx2=0.0
  for i in range (ntaps): #(int i=0;i<ntaps;i++)
      #double x1,x2,x3,num,den;
      xindx = float(i) - int(ntaps)/2 + mu  #TODO float(ntaps)/2
      #print "xindx[",i,"]=",xindx
      x1 = M_PI * xindx/float(spb);
      #if True | (mu==0) | (mu==-0.5):
      #  print i,xindx/float(spb)
      x2 = 4.0 * alpha * xindx / float(spb);
      x3 = x2*x2 - 1.0;
      if( abs(x3) >= 0.000001 ):  # Avoid Rounding errors...
	  if abs(4.0*alpha*xindx/spb)>1.0e-10: #  if( i != ntaps/2 ): #TODO take mu into account  
	    num = cos((1.0+alpha)*x1) + sin((1.0-alpha)*x1)/(4.0*alpha*xindx/spb);
            #xindx2=4.0*alpha*xindx/spb
	  else:
	    num = cos((1.0+alpha)*x1) + (1.0-alpha) * M_PI / (4.0*alpha);
            #xindx2=4.0*alpha
            #print "4.0*alpha*xindx/spb=",4.0*alpha*xindx/spb
          #print "xindx2[",i,"]=",xindx2
	  den = x3 * M_PI;
      else:
	  if(alpha==1.0):
	    #taps[i] = -1.0
            num=-1.0
            den=4.0
          else:
	    #    continue
	    x3 = (1.0-alpha)*x1
	    x2 = (1.0+alpha)*x1
	    num = (sin(x2)*(1.0+alpha)*M_PI
		 - cos(x3)*((1.0-alpha)*M_PI*spb)/(4.0*alpha*xindx)
		 + sin(x3)*spb*spb/(4.0*alpha*xindx*xindx))
	    den = -32.0 * M_PI * alpha * alpha * xindx/spb

      taps[i] = 4.0 * alpha * num / den
      scale = scale + taps[i] #scale for average unity gain
      scale_abs = scale_abs + abs(taps[i]) #scale for max unity gain (no overflow on worst-case scenario)
  #print "rrc ntaps",ntaps,"scale",scale,"scale_abs",scale_abs
  if do_scale:
    scale_fact=1.0
    if do_scale_abs:
      scale_fact=scale_abs
    else:
      scale_fact=scale
    for i in range(ntaps): #(int i=0;i<ntaps;i++)
      taps[i] = taps[i] * gain / scale_fact

  return taps

  
def root_raised_cosine_polyphase(gain, interpolation, decimation,alpha, ntaps_per_filter,do_scale=False,do_scale_abs=False):
    interpolation=float(int(interpolation)) # make sure this is a float with only integer contents
    decimation=float(int(decimation)) # make sure this is a float with only integer contents
    decim_ratio=float(decimation)/float(interpolation)
    interp_ratio=float(interpolation)/float(decimation)
    ii=0
    oo=0
    s=0.0
    f=0.0
    incr=0
    start_phase=0.0
    d_mu_inc=decim_ratio
    d_mu=start_phase
    rrc_taps=[]
    #ntaps_per_filter=int(math.ceil(float(ntaps)/float(interp_ratio)))
    nfilters=int(math.ceil(interpolation))
    ntaps=int(float(ntaps_per_filter)*interp_ratio) #TODO check if this should be int(...) int(math.ceil(...)) or ()
    print "//fractional ntaps",float(ntaps_per_filter)*interp_ratio
    #mu_offset=-0.46666666667/(4.0*alpha)
    #print "BLAAA",float(ntaps_per_filter/2 - int(ntaps/2)/float(interpolation))
    #int(ntaps_per_filter*int(interpolation)/2)/float(interpolation) - ntaps_per_filter/2
    mu_offset=float(ntaps_per_filter/2 - int(ntaps/2)/float(interp_ratio)) #-1.0/float(interpolation) #TODO WHY do we need this
    #1.0*((float(int(int(ntaps_per_filter)/2)) - float(ntaps_per_filter)/2.0) -
                   #(float(int(int(ntaps)/2)) - float(ntaps)/2.0))
    print "//ntaps_per_filter=",ntaps_per_filter
    print "//nfilters=",nfilters
    print "//","s","f","incr","d_mu","ii","oo","step" 
    incr_list=[]
    for j in range(nfilters):
      print "//",s,f,incr,d_mu,ii,oo,j
      #rrc_taps.append(root_raised_cosine(spb, spb, 1.0, \
      #                                             excess_bw, ntaps,d_mu)   )
      rrc_taps.append(root_raised_cosine(1.0, 1.0, 1.0, alpha, ntaps_per_filter,-d_mu-mu_offset,False,False))
      s=d_mu+d_mu_inc
      f=float(math.floor(s)) #TODO should this be float(round(s))
      incr=int(f)
      if incr>0:
        incr_list.append(j) #TODO or should this be j+1 (where 16 whould be 0)
      d_mu=s-f
      ii=ii+incr
      oo=oo+1
    #print"//incr_list",incr_list
    if do_scale:
      scale=1.0 
      if do_scale_abs:
        scale=abs_max_reclist(rrc_taps)
      else:
        scale=sum_reclist(rrc_taps)
      for j in range(nfilters):
        for k in range(ntaps_per_filter):
          rrc_taps[j][k]=gain*rrc_taps[j][k]/scale
    for i in range(nfilters):
      print "//poly_filter[",i,"] = "
      print "//",rrc_taps[i]
      print 
    #rrc_taps.reverse() #MDVH 14feb2007 removed # to re-enable this. Later disabled it again
    #raise SystemExit, 1
    return rrc_taps, incr_list


def generate_summed_taps_memory(all_taps,data_size,num,offset):
    sums_float=[]
    abs_max=-1.0
    address_bits_not_used=0
    int_offset=offset
    nfilters=len(all_taps)
    ntaps_per_filter=len(all_taps[0])
    if (offset<0):
      address_bits_not_used=-offset
      int_offset=0 # = offset+address_bits_not_used
    elif (offset+data_size>ntaps_per_filter):
      address_bits_not_used=offset+data_size-ntaps_per_filter
      int_offset=offset  #-address_bits_not_used
    else:
      address_bits_not_used=0
      int_offset=offset 

    address_bits=data_size-address_bits_not_used-1 #The -1 is because we imply that the next databit is zero and use symmetry if not
    #print "//address_bits",address_bits
    #print "//address_bits_not_used",address_bits_not_used
    #print "//offset", offset
    #print "//int_offset",int_offset
    #print "//1<<address_bits",1<<address_bits
    for mu_index in range(nfilters):
      taps=all_taps[mu_index]
      sums_float.append([])
      if 0==address_bits:
        mac=-taps[int_offset]
        sums_float[mu_index].append(mac)
        abs_max=mac
      else:     
        for i in range(1<<address_bits):
          mac=0.0
          index=int_offset
          for j in range(address_bits):
            index=j+int_offset
            if((index>=0) & (index<ntaps_per_filter)):
              if i & (1<<j):
                mac=mac + taps[index]
                #print "plus ",j,taps[j]
              else:
                mac=mac - taps[index] #TODO TMP - taps[index]
                #print "min  ",j,taps[j]
          mac=mac-taps[index+1] #compensate for the fact that we imply that the next databit is zero, and use symmetry if not (invert the result of the inverted lookup)
          sums_float[mu_index].append(mac)
          if(abs(mac)>abs_max):
            abs_max=abs(mac)

    #sum_16bit=round(32767.0*mac/scale)
    #        if(abs(sum_16bit)>abs_max_16bit):
    #      abs_max_16bit=abs(sum_16bit)
    #    signed_max_16bit=max(signed_max_16bit,sum_16bit)
    #    signed_min_16bit=min(signed_min_16bit,sum_16bit)
    return sums_float,abs_max,address_bits

def generate_summed_taps_memories(all_taps,gain,data_size):
    sums_floats=[]
    abs_max=-1.0
    ntaps_per_filter=len(all_taps[0])
    nfilters=len(all_taps)
    bits_per_memory=data_size
    real_bits_per_memory=bits_per_memory-1
    num_memories=int(math.ceil(float(ntaps_per_filter)/float(bits_per_memory)))
    address_bits=[]
    for i in range(num_memories):
      offset=i*bits_per_memory
      tmp_sums_float,tmp_abs_max,real_used_data_size=generate_summed_taps_memory(all_taps,data_size,i,offset)
      address_bits.append(real_used_data_size)
      sums_floats.append(tmp_sums_float)
      abs_max=max(abs_max,tmp_abs_max)
    sums_16bits=[]
    abs_max_16bit=-1
    signed_max_16bit=-1
    signed_min_16bit=1
    for i in range(num_memories):
      sums_16bits.append([])
      for j in range(nfilters):
        sums_16bits[i].append([])
        for k in range(1<<address_bits[i]):
            sum_16bit=int(round(32767.0*sums_floats[i][j][k]/abs_max))
            sums_16bits[i][j].append(sum_16bit)
            sums_floats[i][j][k]=sums_floats[i][j][k]*gain #/abs_max
    print "//abs_max=",abs_max," abs_max_16bit=",abs_max_reclist(sums_16bits)," signed_max_16bit=",max_reclist(sums_16bits)," signed_min_16bit=",min_reclist(sums_16bits)
    return sums_floats,sums_16bits,address_bits


def generate_verilog_for_summed_taps_mem(sums_16bit,bw,bw_out,data_size,phase_size,num,offset,channel):
    print "module rrc_coeff_ram_generated%i_%i (input clock, input strobe, input [%i:0] phase, input [%i:0] in_data, output [%i:0] out_data,output [%i:0] ext_out_data);" % (channel,num,phase_size-1,data_size-1,bw-1,bw_out-1)
    print ""
    print "reg reg_quadrant;"
    print "wire quadrant=in_data[%i];" % (data_size-1)
    if data_size==1:
      print "wire [%i:0] rd_addr=phase;" % (phase_size + data_size-1-1)
    else:
      print "wire [%i:0] rd_addr={phase,quadrant?~in_data[%i:0]:in_data[%i:0]};" % (phase_size + data_size-1-1,data_size-1-1,data_size-1-1)
    print "reg [%i:0] rd_data;" % (bw-1)
    print ""
    print "   always @(posedge clock)"
    print "   if (strobe)"
    print "     begin"
    print "       reg_quadrant <= #1 quadrant;"
    print "       case (rd_addr)"
    address_bits=data_size-1
    nsteps=len(sums_16bit)
    for mu_index in range(nsteps):
      for i in range(1<<address_bits):
        sum_16bit=sums_16bit[mu_index][i]
        if(sum_16bit<0):
          sign=-1
        else:
          sign=1
        print "         %i'd%i : rd_data <= #1 %i'd%i;" % (address_bits+phase_size,((mu_index<<(data_size-1)) | i),sign*bw,abs(sum_16bit))
    print "       endcase // case(rd_addr)"
    print "     end //if (strobe)"
    print " assign out_data[%i:0]=reg_quadrant?-rd_data:rd_data;" % (bw-1)
    print " sign_extend #(%i,%i) " % (bw,bw_out)
    print "      ext_output (.in(out_data),.out(ext_out_data));"
    print "   "
    print "endmodule // rrc_coeff_ram_generated_%i" % (num)
    print ""
    return 
       
def generate_summed_taps(all_taps,ntaps,bw,bw_out,data_size,phase_size,num,offset,abs_max_val,print_txt,print_verilog):
    if(print_txt):
      print "SUM_",num
    #bw_out=17 #enough bitgain above 16 bit to sum all partial results
    #data_size=5 #5 bits data input
    #phase_size=4 #=int(math.log(nsteps),2)
    if(print_verilog):
      print "module rrc_coeff_ram_generated_%i (input clock, input [%i:0] phase, input [%i:0] in_data, output [%i:0] out_data,output [%i:0] ext_out_data);" % (num,phase_size-1,data_size-1,bw-1,bw_out-1)
      print ""
      #print "wire [%i:0] out_data;" % (bw_out-1)
      print "wire quadrant=in_data[%i];" % (data_size-1)
      print "wire [%i:0] rd_addr={phase,quadrant?~in_data[%i:0]:in_data[%i:0]};" % (phase_size + data_size-1-1,data_size-1-1,data_size-1-1)
      print "reg [%i:0] rd_data;" % (bw-1)

      print ""
      print "   always @(posedge clock)"
      print "    case (rd_addr)"
    sums_float=[]
    sums_16bit=[]
    abs_max_16bit=-1
    abs_max=-1.0
    address_bits_not_used=0
    int_offset=offset
    if (offset<0):
      address_bits_not_used=-offset
      int_offset=0 # = offset+address_bits_not_used
    elif (offset+data_size>ntaps):
      address_bits_not_used=offset+data_size-ntaps
      int_offset=offset  #-address_bits_not_used
    else:
      address_bits_not_used=0
      int_offset=offset 

    address_bits=data_size-address_bits_not_used-1 #The -1 is because we imply that the next databit is zero and use symmetry if not
    #print "//address_bits",address_bits
    #print "//address_bits_not_used",address_bits_not_used
    #print "//offset", offset
    #print "//int_offset",int_offset
    #print "//1<<address_bits",1<<address_bits
    signed_max_16bit=-32768*256
    signed_min_16bit=32768*256
    for mu_index in range(NSTEPS):
      taps=all_taps[mu_index]
      for i in range(1<<address_bits):
        mac=0.0
        index=0
        for j in range(address_bits):
          index=j+int_offset
          if((index>=0) & (index<ntaps)):
            if i & (1<<j):
              mac=mac + taps[index]
              #print "plus ",j,taps[j]
            else:
              mac=mac - taps[index] #TODO TMP - taps[index]
              #print "min  ",j,taps[j]
        mac=mac-taps[index+1] #compensate for the fact that we imply that the next databit is zero, and use symmetry if not (invert the result of the inverted lookup)
        sum_16bit=round(32767.0*mac/abs_max_val)
        sums_float.append(mac)
        sums_16bit.append(sum_16bit)
        if(abs(sum_16bit)>abs_max_16bit):
          abs_max_16bit=abs(sum_16bit)
        signed_max_16bit=max(signed_max_16bit,sum_16bit)
        signed_min_16bit=min(signed_min_16bit,sum_16bit)
        if(print_txt):
          print i,int(sum_16bit)
          print i,mac
        if(print_verilog):
          #print '%(language)s has %(#)03d quote types.' % \
          #  {'language': "Python", "#": 2}
          if(sum_16bit<0):
            sign=-1
          else:
            sign=1
          print "       8'd%i : rd_data <= #1 %i'd%i;" % (((mu_index<<(data_size-1)) | i),sign*bw,abs(sum_16bit))
        if(abs(mac)>abs_max):
          abs_max=abs(mac)
    #print "SIGNED max_16bit",signed_max_16bit,"min_16bit",signed_min_16bit
    if(print_verilog):
      print "     endcase // case(rd_addr)"
      print " assign out_data[%i:0]=quadrant?-rd_data:rd_data;" % (bw-1)
      print " sign_extend #(%i,%i) " % (bw,bw_out)
      print "      ext_output (.in(out_data),.out(ext_out_data));"
      print "   "
      print "endmodule // rrc_coeff_ram_generated_%i" % (num)
      print ""
    return abs_max,abs_max_16bit,sums_float,sums_16bit

def generate_main(num_memories,phase_size,data_size,bw_memory,bw_out,ntaps,incr_list,channel):
  #num_memories=int(math.ceil(float(ntaps)/data_size))
  print "//bw_memory",bw_memory,"data_size",data_size
  num_shift_regs=int(math.ceil(float(ntaps)/16))

  print "module frac_interp_%i" %(channel)
  print \
"""
  (input clock, input reset, input enable,
   output reg strobe_in, input strobe_out,
   input wire [4:0] tx_shift,
   input wire [15:0] signal_in,
   output reg [15:0] signal_out);
"""

  print "reg [%i:0] phase;//max %i phases" %(phase_size-1,1<<phase_size)
  print \
"""
reg [3:0] in_count;//16 bits in every register

reg [15:0] input_16;
"""
  for i in range(num_shift_regs):
    print "reg [15:0] shift_16_%i;" % (i)
  print "wire get_bit=",
  for i in range(len(incr_list)-1):
    print "(phase==4'd%i) || " % (incr_list[i]),
  print "(phase==4'd%i);" % incr_list[len(incr_list)-1]
  print \
"""
   always @(posedge clock)
      if (reset) 
        begin   
          strobe_in <= #1 0;// Reset
          in_count <=#1 0;
          input_16 <= #1 0;
"""
  for i in range(num_shift_regs):
    print "          shift_16_%i <= #1 0;" % (i)
  print \
"""
        end //if (reset) 
      else //if (reset) 
        begin
          if(enable & strobe_out & get_bit & (in_count == 4'd15))
            strobe_in <=#1 1'd1;//Only works if output of previous stage is registered and only updates on this strobe_in
          else //  if(strobe_in) //input a new 16 bit word and shift all regs 16 bit to the right= to next register)
            strobe_in <= #1 0;//notice the else is always executed, even if there is no strobe_out so strobe_in will only be high for 1 clock

          if (enable & strobe_out)
            begin
              if ( get_bit)  

                begin //shift input 1 bit to the left
                  if(in_count == 4'd15) //(strobe_in) //if(in_count == 4'd0) 
                    begin
                      //strobe_in <=#1 1'd1;//Only works if output of previous stage is registered and only updates on this strobe_in
                      input_16 <= #1 signal_in;
                    end //if(in_count == 4'd0)
                  else  //if(in_count == 4'd0) 
                    begin
                      input_16 <= #1 {1'd0,input_16[15:1]};//just shift right //MDVH 14feb2007 
                      //input_16 <= #1 {input_16[14:0],1'd0};//just shift left //MDVH 14feb2007                
                    end // if(in_count == 4'd0) else
                  //shift_16_0 <= #1 {shift_16_0[14:0],input_16[15]};//MDVH 14feb2007
                  shift_16_0 <= #1 {input_16[0],shift_16_0[15:1]};//MDVH 14feb2007
"""
  for i in range(num_shift_regs-1):
    print "              shift_16_%i <= #1 {shift_16_%i[0],shift_16_%i[15:1]};" %(i+1,i,i+1)
  print \
"""
                  in_count <= #1 in_count+4'd1;
                end //if ((phase==....
            end //if (enable & strobe_out)
        end // if (reset) else 

   always @(posedge clock)
      if (reset) 
        phase <=#1 0;
      else if (enable & strobe_out)
        phase <=#1 phase + 4'd1;//phase will roll-over to zero and start again
          
"""
#//if there are 16 phases then d_mu = 0.00,0.31,0.63,0.94,0.25,0.56,0.88,0.19,0.5,0.81,0.13,0.44,0.75,0.06,0.38,0.69
  for i in range(num_memories):
    print "wire [%i:0] data_%i;" % (bw_memory[i]+1-1,i)
  #lsb=0#MDVH 14feb2007
  #msb=lsb+bw_memory[0]+1-1#MDVH 14feb2007
  msb=ntaps-1 #lsb+bw_memory[0]+1-1
  lsb=msb-(bw_memory[0]+1-1)#MDVH 14feb2007
  ls_shiftreg=0
  ms_shiftreg=0
  ntaps_done=0
  for i in range(num_memories):
    msb_full=ntaps-1 - ntaps_done
    msb=msb_full%16
    #lsb_full=msb_full-(bw_memory[i])
    lsb_full=msb_full-(bw_memory[num_memories-i-1]) #23mar2007
    lsb=lsb_full%16
    if msb>15:
      msb=msb-16
      ms_shiftreg=ms_shiftreg+1
    if msb<0:
      msb=msb+16
      ms_shiftreg=ms_shiftreg+1
    if lsb>15:
      ls_shiftreg=ls_shiftreg+1
      lsb=lsb-16
    if lsb<0:
      ls_shiftreg=ls_shiftreg+1
      lsb=lsb+16
    if ms_shiftreg==ls_shiftreg:
      #-assign data_0={shift_16_0[6],shift_16_0[7],shift_16_0[8],shift_16_0[9],shift_16_0[10]};//shift_16_0[10:6];//shift_16_0[4:0];
      #-assign data_1={shift_16_0[1],shift_16_0[2],shift_16_0[3],shift_16_0[4],shift_16_0[5]};//shift_16_0[5:1];//shift_16_0[9:5];
      #-assign data_2=shift_16_0[0];//shift_16_0[0];//shift_16_0[10];
      #TODO assign address_8={phase,1'd0,shift_16_0[2:0]};
      
      #print "assign data_%i={" %(i),
      #for bit in range(lsb,msb,1):
      #  print "shift_16_%i[%i]" % (ms_shiftreg,bit),
      #  if bit!=msb:
      #    print ", ",
      #if (True):#msb!=lsb):
      #  print "shift_16_%i[%i]" % (ms_shiftreg,msb),     
      #print "};"
      
      #print "assign data_%i=shift_16_%i[%i:%i];" % (i,ms_shiftreg,msb,lsb) #MDVH 14feb2007
      print "assign data_%i=shift_16_%i[%i:%i];" % (num_memories-i-1,ms_shiftreg,msb,lsb) #MDVH 23mar2007
    else:
    
      #print "assign data_%i={" %(i),
      #for bit in range(lsb,15,1):
      #  print "shift_16_%i[%i]" % (ls_shiftreg,bit),
      #  if bit!=15:
      #    print ", ",
      #if (True):#15!=lsb):
      #  print "shift_16_%i[%i]" % (ls_shiftreg,15),",",  
      #for bit in range(0,msb,1):
      #  print "shift_16_%i[%i]" % (ms_shiftreg,bit),
      #  if bit!=msb:
      #    print ", ",
      #if (True):#0!=msb):
      #  print "shift_16_%i[%i]" % (ms_shiftreg,msb),
      #print "};"      
    
      #print "assign data_%i={shift_16_%i[%i:%i],shift_16_%i[%i:%i]};" % (i,ms_shiftreg,msb,0,ls_shiftreg,15,lsb) #27feb2007
      print "assign data_%i={shift_16_%i[%i:%i],shift_16_%i[%i:%i]};" % (num_memories-i-1,ms_shiftreg,msb,0,ls_shiftreg,15,lsb) #MDVH 23mar2007
    #lsb=lsb+bw_memory[i]+1#MDVH 14feb2007
    #if (i<num_memories-1):#MDVH 14feb2007
    #  msb=msb+bw_memory[i+1]+1#MDVH 14feb2007
    #msb=msb-(bw_memory[i]+1)#MDVH 14feb2007
    #lsb=lsb-(bw_memory[i]+1)#MDVH 14feb2007
    #ntaps_done=ntaps_done+(bw_memory[i]+1)
    ntaps_done=ntaps_done+(bw_memory[num_memories-i-1]+1) #MDVH 23mar2007

#"""
#//address_5=shift_16_2[15:8];
#//address_6=shift_16_3[7:0];True
#//address_7={2'b0,shift_16_3[13:8]};
#//bits_0 10
#//bits_1 12
#//bits_2 14
#//bits_3 16
#//bits_4 14
#//bits_5 11
#//bits_6 10
#"""
  num_sumregs=int(math.ceil(float(num_memories)/2.0))*2
  for i in range(num_sumregs):
    #print "//wire  [%i:0] sum_%i;" % (bw_memory[i]-1,i)
    print "wire  [%i:0] sum_%i;" % (bw_out-1,i)

  if num_sumregs>num_memories:
    print "assign sum_%i=0;" % (num_sumregs-1)

  print "reg [%i:0] sum_all;" % (bw_out-1)
  print \
"""
//wire [15:0] sum_all_16bit;
"""
  for i in range(num_memories):
    print "   rrc_coeff_ram_generated%i_%i rrc_coeff_ram_%i ( .clock(clock),.strobe(strobe_out),.phase(phase),.in_data(data_%i),.out_data(),.ext_out_data(sum_%i) );" % (channel,i,i,i,i)
  print "//assign sum_all= ",
  for i in range(num_memories-1):
    print "sum_%i + " % (i) ,
  print "sum_%i;" % (num_memories-1)

  num_summers=int(math.ceil(float(num_memories)/2.0))
  num_summers2=num_summers/2
  num_summers3=num_summers2/2
  for i in range(num_summers):
    print "reg [%i:0] sum_%i%i;" % (bw_out-1,i*2,i*2+1)
  if num_summers2>0:
    for i in range(num_summers2):
      print "reg [%i:0] sum_%i%i%i%i;" % (bw_out-1,i*2,i*2+1,i*2+2,i*2+3)  
  if num_summers3>0:
    for i in range(num_summers3): 
      print "reg [%i:0] sum_%i%i%i%i%i%i%i%i;" % (bw_out-1,i*2,i*2+1,i*2+2,i*2+3,i*2+4,i*2+5,i*2+6,i*2+7)     
  print \
"""
   always @(posedge clock)
      if (reset)
        begin
"""
  for i in range(num_summers):
    print "          sum_%i%i <= #1 0;" % (i*2,i*2+1)
  if num_summers2>0:
    for i in range(num_summers2):
      print "          sum_%i%i%i%i <= #1 0;" % (i*2,i*2+1,i*2+2,i*2+3)  
  if num_summers3>0:
    for i in range(num_summers3):
      print "          sum_%i%i%i%i%i%i%i%i <= #1 0;" % (i*2,i*2+1,i*2+2,i*2+3,i*2+4,i*2+5,i*2+6,i*2+7)
  if num_summers3/2>0:
    print "too many summers, exiting"
    sys.exit(0)  
  print \
"""
          sum_all <=#1 0;
          signal_out <= #1 0;
        end
      else if (enable & strobe_out)
        begin
"""
  highest_summer="sum_0"
  for i in range(num_summers):
    print "          sum_%i%i <= #1 sum_%i+sum_%i;" % (i*2,i*2+1,i*2,i*2+1)
    highest_summer="sum_%i%i" % (i*2,i*2+1)  
  if num_summers2>0:
    for i in range(num_summers2):
      print "          sum_%i%i%i%i <= #1 sum_%i%i+sum_%i%i;" % (i*2,i*2+1,i*2+2,i*2+3,i*2,i*2+1,i*2+2,i*2+3)
    highest_summer="sum_%i%i%i%i" % (i*2,i*2+1,i*2+2,i*2+3)  
  if num_summers3>0:
    for i in range(num_summers3):
      print "          sum_%i%i%i%i%i%i%i%i <= #1 sum_%i%i%i%i+sum_%i%i%i%i;" % (i*2,i*2+1,i*2+2,i*2+3,i*2+4,i*2+5,i*2+6,i*2+7,i*2,i*2+1,i*2+2,i*2+3,i*2+4,i*2+5,i*2+6,i*2+7)
    highest_summer="sum_%i%i%i%i%i%i%i%i" % (i*2,i*2+1,i*2+2,i*2+3,i*2+4,i*2+5,i*2+6,i*2+7)
#  print "          sum_all <= #1 %s;" % (highest_summer)
#  print \
#"""
#          signal_out <= #1 round_16(sum_all);
#        end //else if (enable & strobe_out)
#"""

  print "          sum_all <= #1 %s;" % (highest_summer)
  print \
"""
          case(tx_shift)
             //5'd0  : signal_out <= #1 round_16(sum_all);//no gain reduce
             5'd0  : signal_out <= #1 {                      sum_all[16:1]    + (sum_all[17] & ( sum_all[0]  )) };             
             5'd1  : signal_out <= #1 {                      sum_all[17:2]    + (sum_all[17] & (|sum_all[1:0])) };
             5'd2  : signal_out <= #1 { {{(1){sum_all[17]}}, sum_all[17:3]}   + (sum_all[17] & (|sum_all[2:0])) };
             5'd3  : signal_out <= #1 { {{(2){sum_all[17]}}, sum_all[17:4]}   + (sum_all[17] & (|sum_all[3:0])) };
             5'd4  : signal_out <= #1 { {{(3){sum_all[17]}}, sum_all[17:5]}   + (sum_all[17] & (|sum_all[4:0])) };
             5'd5  : signal_out <= #1 { {{(4){sum_all[17]}}, sum_all[17:6]}   + (sum_all[17] & (|sum_all[5:0])) };
             5'd6  : signal_out <= #1 { {{(5){sum_all[17]}}, sum_all[17:7]}   + (sum_all[17] & (|sum_all[6:0])) };
             5'd7  : signal_out <= #1 { {{(6){sum_all[17]}}, sum_all[17:8]}   + (sum_all[17] & (|sum_all[7:0])) };
             5'd8  : signal_out <= #1 { {{(7){sum_all[17]}}, sum_all[17:9]}   + (sum_all[17] & (|sum_all[8:0])) };
             5'd9  : signal_out <= #1 { {{(8){sum_all[17]}}, sum_all[17:10]}  + (sum_all[17] & (|sum_all[9:0])) };
             5'd10  : signal_out <= #1 { {{(9){sum_all[17]}},sum_all[17:11]}  + (sum_all[17] & (|sum_all[10:0]))};
             5'd11  : signal_out <= #1 { {{(10){sum_all[17]}},sum_all[17:12]} + (sum_all[17] & (|sum_all[11:0]))};
             5'd12  : signal_out <= #1 { {{(11){sum_all[17]}},sum_all[17:13]} + (sum_all[17] & (|sum_all[12:0]))};
             5'd13  : signal_out <= #1 { {{(12){sum_all[17]}},sum_all[17:14]} + (sum_all[17] & (|sum_all[13:0]))};
             5'd14  : signal_out <= #1 { {{(13){sum_all[17]}},sum_all[17:15]} + (sum_all[17] & (|sum_all[14:0]))};
             5'd15  : signal_out <= #1 { {{(14){sum_all[17]}},sum_all[17:16]} + (sum_all[17] & (|sum_all[15:0]))};
             
             -5'd1  : signal_out <= #1 {sum_all[15:0]             };
             -5'd2  : signal_out <= #1 {sum_all[14:0] ,{(1){1'b0}}};
             -5'd3  : signal_out <= #1 {sum_all[13:0] ,{(2){1'b0}}};
             -5'd4  : signal_out <= #1 {sum_all[12:0] ,{(3){1'b0}}};
             -5'd5  : signal_out <= #1 {sum_all[11:0] ,{(4){1'b0}}};
             -5'd6  : signal_out <= #1 {sum_all[10:0] ,{(5){1'b0}}};
             -5'd7  : signal_out <= #1 {sum_all[9:0]  ,{(6){1'b0}}};
             -5'd8  : signal_out <= #1 {sum_all[8:0]  ,{(7){1'b0}}};
             -5'd9  : signal_out <= #1 {sum_all[7:0]  ,{(8){1'b0}}};
             -5'd10  : signal_out <= #1 {sum_all[6:0] ,{(9){1'b0}}};
             -5'd11  : signal_out <= #1 {sum_all[5:0] ,{(10){1'b0}}};
             -5'd12  : signal_out <= #1 {sum_all[4:0] ,{(11){1'b0}}};
             -5'd13  : signal_out <= #1 {sum_all[3:0] ,{(12){1'b0}}};
             -5'd14  : signal_out <= #1 {sum_all[2:0] ,{(13){1'b0}}};
             -5'd15  : signal_out <= #1 {sum_all[1:0] ,{(14){1'b0}}};

             default : signal_out <= #1 {                      sum_all[16:1]    + (sum_all[17] & ( sum_all[0]  )) };           
                       //signal_out <= #1 {                      sum_all[17:2]    + (sum_all[17] & (|sum_all[1:0])) };
          endcase // case(shift)

          //sum_all_14bit <= #1 sum_0123[16:3] + (sum_0123[16] & |sum_0123[2:0]);//round to 14 bit (reduce gain by 2 bit = 12 dB)

          //signal_out <= #1 round_16(sum_all);//no gain reduce
		  //signal_out <= #1  {{(2){sum_all_14bit[13]}},sum_all_14bit};//sign_extend to 16 bit (gain reduced by 2 bit = 12 dB)   
		  
      end //else if (enable & strobe_out)
"""
  
  print \
"""
/*assign sum_all_16bit=round_16(sum_all);
//register the output to ease timing of the DDR DAC output
   always @(posedge clock or posedge reset)
      if (reset) 
          signal_out <= #1 0; // Reset
      else
          signal_out <= #1 sum_all_16bit;
*/
//round to 16 bit
//   function [15:0] round_16;
//      input [16:0] in_val;
//      round_16 = in_val[16:1] + (in_val[16] & |in_val[0]);//round_16 = in_val[16:1] + (in_val[16] & in_val[0])
//   endfunction // round_16

"""
  print "endmodule //module frac_interp_%i" %(channel)




def make_and_check_mems_frac_rrc_filter():
  usage="%prog: [options]" # output_filename"
  parser = OptionParser(option_class=eng_option, usage=usage)
  parser.add_option("-c", "--channel", type="int", default=0,
                          help="set which channel to generate (0 or 1) [default=0]")
  parser.add_option("-n", "--ntaps-per-filter", type="int", default=11,
                    help="set number of taps per interpolation phase [default=11]")
  parser.add_option("-d", "--decimation", type="int", default=10,
                          help="set rrc decimation rate to DECIM [default=10]")
  parser.add_option("-i", "--interpolation", type="int", default=16,
                          help="set rrc decimation rate to INTERP [default=16]")
  parser.add_option("-a", "--alpha", type="eng_float", default=0.35,
                    help="set rrc alpha to ALPHA (default=0.35.)\n(Common used values are 0.15, 0.2 and 0.35)", metavar="ALPHA")
  parser.add_option("-D", "--data-size", type="int", default=5,
                    help="number of bits used for data per memory. (default=5)\n(This has nothing to do with precision, but all with the size of ramblocks in the fpga)")
  parser.add_option("-P", "--phase-size", type="int", default=4,
                    help="number of bits used for interpolation phases. (default=4)\n(This has nothing to do with precision, but all with the size of ramblocks in the fpga) ")
  #parser.add_option( "-R", type="int", default=16,
  #                  help="resolution (precision) of the filter (number of bits) (default=16)") #not implemented yet

  (options, args) = parser.parse_args ()
  if len(args) != 0:
      parser.print_help()
      raise SystemExit, 1
  #filename = args[0] #not yet implemented
  print "// -*- verilog -*-"
  print "//"
  print "//  Polyphase interpolating Root-Raised-Cosine filter"
  print "//"
  print "//  Copyright (C) 2007 Martin Dudok van Heel"
  print "//"
  print "//  This program is free software; you can redistribute it and/or modify"
  print "//  it under the terms of the GNU General Public License as published by"
  print "//  the Free Software Foundation; either version 2 of the License, or"
  print "//  (at your option) any later version."
  print "//"
  print "//  This program is distributed in the hope that it will be useful,"
  print "//  but WITHOUT ANY WARRANTY; without even the implied warranty of"
  print "//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the"
  print "//  GNU General Public License for more details."
  print "//"
  print "//  You should have received a copy of the GNU General Public License"
  print "//  along with this program; if not, write to the Free Software"
  print "//  Foundation, Inc., 51 Franklin Street, Boston, MA  02110-1301  USA"
  print "//  "
  print "//  Polyphase interpolating Root-Raised-Cosine filter"
  print "//  Interpolation ratio can be fractional (float)" 
  print "//  "
  print "//  This file was generated by ",sys.argv[0]," at ", datetime.datetime.now().isoformat(' ')
  print "//  Written by Martin Dudok van Heel, 2007"
  print "//  Olifantasia"
  print "//  "
  print "//  Contact: http://www.olifantasia.com/gnuradio/contact.gif  or  nldudok1_olifantasia_com "
  print "//  http://www.olifantasia.com"
  print "//  "
  print "//  This file can be used to build a special purpose firmware for the USRP"
  print "//  USRP = Universal Software Radio Peripheral"
  print "//  This special purpose firmware in combination with the GnuRadio framework can be used to create a high speed QPSK transmitter"
  print "//  Depending on the decimation and interpolation factors used, the symbolrate can be from 2 to 20 Msymbols/sec"
  print "//  The corresponding QPSK bitrate is 4 to 40 Mbit/sec"
  ntaps_per_filter=11#11 #TODO (multiples of data_size)+1 give ERRORS in refcheck
  interpolation=float(int(options.interpolation))#16.0#interpolation does not have to be a multiple of decimation
  nfilters=int(interpolation)
  decimation=float(int(options.decimation))#10.0#1.0#1.0
  interp_ratio=interpolation/decimation
  decim_ratio=decimation/interpolation
  gain=interpolation
  sampling_freq=interpolation
  symbol_rate=1.0
  alpha=float(options.alpha)#0.2#0.35
  data_size=options.data_size#5
  phase_size=options.phase_size#4
  print "//  alpha=",alpha
  print "//  interpolation = ",interpolation,"decimation=",decimation,"interp_ratio=",interp_ratio,"decim_ratio=",decim_ratio
  ntaps=int(math.ceil(float(ntaps_per_filter)*interp_ratio))
  mu=0.0
  print "//  symbol rate  = ",32.0*decimation/interpolation, "Msymbol/sec"
  print "//  QPSK bitrate = ",2*32.0*decimation/interpolation, "Mbit/sec"
  print "//"
  all_taps,incr_list=root_raised_cosine_polyphase(gain, interpolation,decimation, alpha, ntaps_per_filter,True,False)
  print "//  total_ntaps=",len(all_taps)*ntaps_per_filter
  print "//  ntaps_per_filter=",ntaps_per_filter
  print "//  nfilters=",len(all_taps)
  print "//  filter resolution= ",16," bits"
  print "//"
  print "//data_size=",data_size,"phase_size=",phase_size

  sums_float,sums_16bit,address_bits=generate_summed_taps_memories(all_taps,1.0/1.41442712266,data_size) #TODO check why do I need the 1.0/1.41442712266
  print "//nmemories=",len(sums_16bit)
  print "//incr_list =",incr_list
  #now generate the verilog
  bitgain=2 #TODO determine dynamically
  bw_out=16+bitgain
  bw_memory=[]
  for i in range(len(sums_16bit)):
    bw=int(math.ceil(math.log(abs_max_reclist(sums_16bit[i]),2)))+1#32767 will give 16 bit
    tmp_data_size=int(math.ceil(math.log(len(sums_16bit[i][0]),2)))+1 #16 values will give 5 
    bw_memory.append(tmp_data_size-1)
    print "//bw=",bw," data_size=",tmp_data_size,"abs_max=",abs_max_reclist(sums_16bit[i]),"len=",len(sums_16bit[i][0])
    #generate_verilog_for_summed_taps_mem(sums_16bit,bw,bw_out,tmp_data_size,phase_size,num,offset)
    generate_verilog_for_summed_taps_mem(sums_16bit[i],bw,bw_out,tmp_data_size,phase_size,i,i*data_size,options.channel)
  #bw_memory=bw_out
  generate_main(len(sums_16bit),phase_size,data_size,bw_memory,bw_out,ntaps_per_filter,incr_list,options.channel)
  

def main():
    make_and_check_mems_frac_rrc_filter()

      
      
      #check(num_memories2,rrc_taps,ntaps_per_filter,sums_float,sums_16bit,interp_ratio,incr_list)
if __name__ == "__main__":
    main()
        
    

 

