#!/usr/bin/python
#
#  Copyright (c) 2008 Thomas Klimpel and Rutger ter Borg
#
# Distributed under the Boost Software License, Version 1.0.
# (See accompanying file LICENSE_1_0.txt or copy at
# http://www.boost.org/LICENSE_1_0.txt)
#

import re, os.path

comment = re.compile( '^[ ]*\*(.*)' )
multiline = re.compile( '^[ ]*[\$\+][ ]*(.*)$' )
keyword = re.compile( '^[ ]*(LOGICAL|CHARACTER|REAL|INTEGER|DOUBLE PRECISION|COMPLEX\*16|COMPLEX)[ ]+(.*)$' )

type_map = {
	     'CHARACTER': 'char',
	     'LOGICAL': 'bool', 
             'INTEGER': 'int',
	     'REAL': 'float', 
	     'DOUBLE PRECISION': 'double',
	     'COMPLEX': 'traits::complex_f',
	     'COMPLEX*16': 'traits::complex_d', 
	     'ARRAY OF CHARACTER': 'char*',
	     'ARRAY OF LOGICAL': 'bool*',
	     'ARRAY OF INTEGER': 'int*',
	     'ARRAY OF REAL': 'float*',
	     'ARRAY OF DOUBLE PRECISION': 'double*',
	     'ARRAY OF COMPLEX': 'traits::complex_f*',
	     'ARRAY OF COMPLEX*16': 'traits::complex_d*',
	    }


#
# Parse a LAPACK file
# input: filename
# output: a pair of ( function name, map )
#         the map contains: 
#         'arguments': an array of arguments
#         'arg_type': a map of ( argument_name, property ), 
#                     in which property can be
#            'type': Fortran type
#            'cpptype': C++ type
#            'io': the (input/output) part from the comment section
#
def parse_lapack_file( filename ):

	# read the entire fortran source file
	source = open( filename ).read()
	
	# parse and split the code 
	# * merge multilines to one line (using the $ and + characters)
	# * split comments-blocks and code blocks
	# * remove '*' from comments
	# input:  full source code
	# output: an array of lines of code
	#         an array of lines of comments
	code = []
	comments = []
	for i in source.splitlines():
		match_comment = comment.search( i )
		if match_comment == None:
			match_multi = multiline.search( i )
			if match_multi == None:
				code += [ i ]
			else:
				code[-1 ] += match_multi.expand( "\\1" )
		else:
			comments += [ match_comment.expand( "\\1" ) ]

	#
	# Start by parsing the comment fields. 
	# * Gather the (input/output) etc. stuff
	#
	arg_type = {}
	inout = re.compile( '^[ ]*([A-Z]+)[ ]+\(([a-z/ ]+)\)' )
	for line in comments:
		match_i = inout.search( line )
		if match_i != None:
			argum = match_i.group(1)
			if not arg_type.has_key( argum ):
				arg_type[ argum ] = {}
			arg_type[ argum ][ 'io' ] = match_i.group(2)
			
	#
	# Gather the Purpose of the subroutine.
	#
	cpp_comments = "\n//".join( comments )
	#print cpp_comments
	match_purpose = re.compile( 'Purpose[ ]*\n.*?\n(.*)\n.*?Arguments', re.M | re.S ).search( cpp_comments )
	subroutine_description = ''
	if match_purpose != None:
		subroutine_description = match_purpose.group(1)
	
	#
	# Parse the lines of actual Fortran code
	#
	arguments = []
	subroutine_name = ''
	for line in code:
		match_subroutine_name = re.compile( 'SUBROUTINE[ ]+([A-Z]+)\(([^\)]+)' ).search( line )
		if match_subroutine_name != None:
			subroutine_name = match_subroutine_name.group( 1 )
			arguments = match_subroutine_name.group( 2 ).replace( ' ', '' ).split( "," )

		match_variables = keyword.search( line )
		if match_variables != None:
			for variable in re.findall( '([A-Z0-9]+(\([^\)]+\))?)[, ]?', match_variables.group( 2 ) ):
				variable_name = variable[0].strip().split( "(" )
				if not arg_type.has_key( variable_name[0] ):
					arg_type[ variable_name[0] ] = {}
				if len( variable_name ) == 1:
					arg_type[ variable_name[0] ][ 'type' ] = match_variables.group( 1 ).strip()
				else:
					arg_type[ variable_name[0] ][ 'type' ] = "ARRAY OF " + match_variables.group( 1 ).strip()
					
				#
				# Add C++ type declarations; can be different ones for different files.
				#
				arg_type[ variable_name[0] ][ 'cpptype' ] = type_map[ arg_type[ variable_name[0] ][ 'type' ] ]
				# some functions do not have documented whether arguments are input and/or
				# output arguments. Can or should this be deduced from the source code?
				if arg_type[ variable_name[0] ].has_key( 'io' ):
					if arg_type[ variable_name[0] ][ 'io' ] == 'input':
						cur_name = arg_type[ variable_name[0] ][ 'cpptype' ]
						if cur_name[ -1 ] == '*':
							arg_type[ variable_name[0] ][ 'cpptype' ] = cur_name[ 0:-1 ] + ' const*'
						else:
							arg_type[ variable_name[0] ][ 'cpptype' ] += ' const'
					
					#
					# for output-type variables -- force it to be a reference, if not already 
					# a pointer (and thus an array)
					#
					if arg_type[ variable_name[0] ][ 'io' ] == 'output':
						cur_name = arg_type[ variable_name[0] ][ 'cpptype' ]
						if cur_name[ -1 ] != '*':
							arg_type[ variable_name[0] ][ 'cpptype' ] += '&'


	#
	# create a dict object
	#
	info_map = {}
	info_map[ 'arguments' ] = arguments
	info_map[ 'arg_type' ] = {}
	for p in arguments:
		info_map[ 'arg_type' ][ p ] = arg_type[ p ]
	
	subroutine_description.replace( subroutine_name, subroutine_name[ 1: ] )
	info_map[ 'description' ] = subroutine_description
	
	return subroutine_name, info_map


#
# Group subroutines on their name, with the first character removed. This will 
# group them in the same .hpp file as well.
#
def group_subroutines( global_info_map ):
	group_map = {}
	for i in global_info_map.keys():
		short_name = i[ 1: ]
		if not group_map.has_key( short_name ):
			group_map[ short_name ] = []
		group_map[ short_name ] += [ i ]
	for value in group_map.values():
		value.sort()
		
	return group_map
	

#
# Write the lapack_names.h file.
#
def write_lapack_names( global_info_map, group, template_file, dest_file ):
	group_keys = group.keys()
	group_keys.sort()
	content = ''
	for g in group_keys:
		content += '// Variants of ' + g.lower() + '\n'
		for k in group[ g ]:
			content += '#define LAPACK_' + k + ' FORTRAN_ID( ' + k.lower() + ' )\n'
		content += '\n'
		
	result = open( template_file ).read()
	result = result.replace( "$CONTENT", content )
	open( dest_file, 'wb' ).write( result )
	

def write_lapack_h( global_info_map, group, template_file, dest_file ):
	group_keys = group.keys()
	group_keys.sort()
	content = ''
	for g in group_keys:
		content += '$INDENT// Variants of ' + g.lower() + '\n'
		for k in group[ g ]:
			content += '$INDENTvoid LAPACK_' + k + '( '
			
			# add the argument list here
			arg_list = []
			for arg in global_info_map[ k ][ 'arguments' ]:
				arg_string = ''				
				if len( arg_list ) > 0 and len( arg_list ) % 4 == 0:
					arg_string += '\n$INDENT$INDENT$INDENT'
				arg_string += global_info_map[ k ][ 'arg_type' ][ arg ][ 'cpptype' ]
				
				#
				# special lapack_h treatment: everything is a pointer
				#
				if arg_string[ -1 ] != '*' and arg_string[ -1 ] != '&':
					arg_string += '*'
				if arg_string[ -1 ] == '&':
					arg_string = arg_string.replace( '&', '*' )
				arg_string += ' ' + arg.lower()
				
				#
				# special lapack_h treatment: complex data types
				#
				arg_string = arg_string.replace( 'traits::complex_f', 'fcomplex_t' );
				arg_string = arg_string.replace( 'traits::complex_d', 'dcomplex_t' );
				arg_list += [ arg_string ]
		
			content += ", ".join( arg_list )
			content += " );\n"
		content += '\n'
	
	result = open( template_file ).read()
	result = result.replace( "$CONTENT", content )
	result = result.replace( '$INDENT', '    ' )
	
	open( dest_file, 'wb' ).write( result )
	
	
def write_functions( info_map, group, template_file, base_dir ):
	for g in group.keys():
		filename = g.lower() + '.hpp'
		result = open( template_file, 'rb' ).read()
		
		inline_overload_resolve = ''
		for k in group[ g ]:
			inline_overload_resolve += '$INDENTinline void $groupname( '
			
			# add the argument list here
			arg_list = []
			lapack_arg_list = []
			for arg in info_map[ k ][ 'arguments' ]:
				arg_string = ''
				lapack_arg_string = ''
				if len( arg_list ) > 0 and len( arg_list ) % 4 == 0:
					arg_string += '\n$INDENT$INDENT$INDENT'
				if len( arg_list ) > 0 and len( arg_list ) % 8 == 0:
					lapack_arg_string += '\n$INDENT$INDENT$INDENT$INDENT'

				arg_string += info_map[ k ][ 'arg_type' ][ arg ][ 'cpptype' ] + ' '
				arg_string += arg.lower()
				arg_list += [ arg_string ]
				
				# if it is a pointer type, then pass a reference
				if info_map[ k ][ 'arg_type' ][ arg ][ 'cpptype' ][ -1 ] != '*':
					lapack_arg_string += '&' + arg.lower()
				else:   # in case of a pointer, typem do 
					# something special for the complex data types
					if arg_string.find( 'complex' ) == -1:
						lapack_arg_string += arg.lower()
					else:
						lapack_arg_string += 'traits::complex_ptr(' + arg.lower() + ')'
						
				lapack_arg_list += [ lapack_arg_string ]

			inline_overload_resolve += ", ".join( arg_list )
			inline_overload_resolve += ' ) {\n$INDENT$INDENTLAPACK_' + k + '( '
			inline_overload_resolve += ", ".join( lapack_arg_list )
			inline_overload_resolve += ' );\n$INDENT}\n'

		result = result.replace( '$OVERLOADS', inline_overload_resolve )
		
		# replace the global variables as last (this is convenient)
		result = result.replace( '$INDENT', '    ' )
		result = result.replace( '$GROUPNAME', g )
		result = result.replace( '$groupname', g.lower() )
		result = result.replace( '$DESCRIPTION', info_map[ group[g][0] ][ 'description' ] )
		
		open( os.path.join( base_dir, filename ), 'wb' ).write( result )

	
lapack_src_path = 'lapack-3.1.1/SRC'


function_info_map = {}
#right_file = re.compile( '^[cdsz].+\.f$' )
right_file = re.compile( '^[cdsz].+\.f$' )
for lapack_file in os.listdir( lapack_src_path ):
	if right_file.match( lapack_file ) != None:
		#print "Parsing", lapack_file, "..."
		
		#print lapack_file
		key, value = parse_lapack_file( os.path.join( lapack_src_path, lapack_file ) )
		if len( key ) > 1:
			print "Adding LAPACK subroutine", key
			function_info_map[ key ] = value

print "Grouping subroutines..."
groups = group_subroutines( function_info_map )

if not os.path.exists( 'generated' ):
	os.mkdir( 'generated' )

write_lapack_names( function_info_map, groups, 'tmpl_lapack_names.h', 'lapack_names.h' )
write_lapack_h( function_info_map, groups, 'tmpl_lapack.h', 'lapack.h' )
write_functions( function_info_map, groups, 'tmpl_functions.hpp', 'generated' )