# First implementation of numerical evaluation of the Wright omega function # # RMC April 2000 # # Wright omega function # # RMC May 2000 # # This file contains a module which exports "omega", a simple procedure for # manipulation of the Wright omega function in Maple, # and a submodule "Marsaglia", which allows one to compute the branch point # series for omega(z) near z = -1 +/- I*Pi. # # This file also defines the globals # # `evalf/omega` --- user interface to evalf # `diff/omega` --- differentiation of omega # `int/omega` --- integration of omega # `convert/omega` --- convert a Lambert W function expression to a Wright omega function expression # `convert/LambertW` --- convert a Wright omega function expression to a Lambert W function expression # # and defines the local procedures # # `evalf/omega/real` --- evaluate omega over R # `evalf/omega/complex` --- evaluate omega over C minus a nbd of the discontinuities # `evalf/omega/complex/regularized` --- evaluate omega near the discontinuities # # # MODULE DEFINITION # Wright := module( ) export Marsaglia, omega; global `evalf/omega`, `diff/omega`, `convert/omega`, `convert/LambertW`, `int/omega`; local `evalf/omega/real`, `evalf/omega/complex`, `evalf/omega/complex/regularized`, MaxIterations; # # The maximum number of iterations of Halley's method is limited by the # maximum number of Digits available; on the system that this routine # was written on, MaxIterations works out to 15 (which corresponds to # over 268 million Digits---note that at that length, Maple's # arithmetic is too slow anyway.) We make this computation in the module instantiation. # MaxIterations := ilog[3]( kernelopts(maxdigits)/evalhf(Digits) ); # # Basic definition of omega and some special values. # omega := proc( z ) if z = 0 then LambertW( 1 ) elif z = 1 then 1 elif z = -1 + I*Pi then -1 elif z = -1 - I*Pi then -1 elif z = -infinity then 0 elif z = infinity then infinity elif type( z, complex(float) ) then `evalf/omega`( z ) else 'procname(args)' end if; end proc; # # submodule: Marsaglia # # RMC April 16, 2000 # # Module to implement the coefficients of the Marsaglia & Marsaglia branch point # series for W( -exp( -1 - z^2/2 ) ). This allows us to fix some bugs in Maple 6 # with regard to the branch point series for various branches of W composed with exp. # # We have, for z near enough to -1 + I*Pi: using the Batchelor transformation # # omega( z ) = -1 - Sum( a(n)*( I*conjugate(sqrt( conjugate(2*(z+1-I*Pi)) )) )^n, n=1..infinity) # # and, for z near enough to -1 - I*Pi: # # omega( z ) = -1 - Sum( a(n)*(-I*conjugate(sqrt( conjugate(2*(z+1-I*Pi)) )) )^n, n=1..infinity) # # Note that in this region, (-3*Pi < Im(z) <= 3*Pi ) # omega(z) = piecewise( Im(z) > Pi, W(1,exp(z)), Im(z) > -Pi, W(0,exp(z)), W(-1,exp(z)) ) # and thus the series above imply the corresponding one-sided series for W(-1,exp(z)) and # W(1,exp(z)), as well as the series for W(0,exp(z)), all near the branch point exp(z)=-exp(-1). # # Note that we can't use the sum from n=0, even though this works "really", because Maple # will not know to make 0^0 = 1 under certain circumstances, even though it does so correctly # in other cases. ( The circumstance is (0)^n, where n later gets set to zero). # # Output is an exported procedure that computes, in a lazy manner, the series coefficients # from the recurrence relation from the papers referred to below. # # A module was used, for three reasons: (1): to try modules out; (2) to take control of the # remember table; and (3) to avoid unnecessary tail recursion in the hopes that this would # be faster than the option-remember style procedure in [1] below. # # References: # # [1] Robert M. Corless, David J. Jeffrey, and Donald E. Knuth, ``A sequence of series for # the Lambert W function'', Proc. ISSAC 97, Maui, pp. 197--204. # # [2] G. Marsaglia and J. C. Marsaglia, ``A new derivation of Stirling's approximation to n!'', # American Math. Monthly 97 (1990), pp. 826--829. # # # Marsaglia := module( ) export a, already; local la; la := table(); # local table to remember the values computed la[0] := 1; la[1] := 1; already := 1; # remember how many we have computed already # export the procedure "a", which lazily computes coefficients as needed. # The name "a" was chosen to agree with the notation in reference [2]. a := proc( n ) local i,k; if not type( n, nonnegint ) then RETURN( 'procname( args )' ) fi; if n <= already then RETURN( la[n] ) else for i from already+1 to n do la[i] := (la[i-1] - add( k*la[k]*la[i+1-k], k=2..i-1 ))/((i+1)*la[1]) od; already := n; RETURN( la[n] ) fi end proc; end module; # # Interface to evalf, using a global variable, in the current fashion. # `evalf/omega` := proc( ) local z, zr, res; z := evalf( args[1] ); if not type(z, complex(float) ) then RETURN( 'omega'( z ) ); elif type(z,embedded(real)) then # In this case do the computation disregarding the sign of the imaginary part, # but put it back later. zr := Re(z); res := `evalf/omega/real`( evalf( zr ) ); # Because Im( omega( z ) ) > 0 if Im( z ) > 0, we keep the sign of zero # the same as the sign of zero on input. if not type(z,float) then res := res + Im(z)*I; end if; res; else `evalf/omega/complex`( evalf( z ) ); end if; end proc: # # Details of floating-point evaluation: Real Case. # We use Halley's method starting with a low-accuracy initial guess, # using low precision to start with. Increasing precision as the accuracy # is increased saves computation time for large settings of Digits. # `evalf/omega/real` := proc( x::embedded_real ) local Asympt_omega, converged, df, ddf, i, inputDigits, f, guardedDigits, tol, w; inputDigits := Digits; guardedDigits := Digits + 2; # For x < -1, the function has condition number asymptotic to x, and # hence for accurate solution to the problem AS INPUT, we must work to # more figures; ilog10(-x) more figures, to be exact. if x <= -1 then guardedDigits := guardedDigits + ilog10(-x); end if; Digits := trunc( evalhf( Digits ) ); # Start with hardware floats tol := Float(1,-inputDigits); # The following initial guesses are chosen for simplicity and for rough agreement # with each other at their matching points; we only need one or two digits accuracy # to start with, because Halley's method converges very quickly. if x < -2 then # Accurate to within 0.003 on this interval, from the series at x=-infinity. w := evalf(exp(x)-exp(2*x)); elif x <= 2 then # Chebyshev-Pade approximation to omega, accurate to within 0.004 on -2..2 # This expansion was computed with the numapprox package. The denominator # is not zero on this range. w := 9.6171516318087*x-1262.9808843573+172489.11025178/(x+136.51141128679); else # The following routine was created using series and codegen[optimize] # with the "tryhard" option, and codegen[makeproc]. Asympt_omega := proc (x) local t23, t20, t1; t20 := evalf(ln(x)); t23 := t20/(x^2); # The denominator can never be zero, because x >= 2. t1 := x-t20+1/2*(t20-2)*t23+(t20+1/6*(6+(-9+2*t20)*t20)*t23)/x end proc; # x >= 2 and hence this is REAL and POSITIVE, and accurate to within 0.007. w := Asympt_omega(x); end if; userinfo( 1, omega, "Initial guess is", w ); # # Now the main loop using Halley's iteration # # For the first evaluation of the residual, we expect that it will be about 10^(-2), # and so evaluation in hardware floats is more than enough to get it accurately enough # for the next iteration. WELL, NO! A super-accurate initial guess (for x >> 0 and # x << 0) will cause the residual to be zero, which causes everything to stop too early. # So we have to compute the residual accurately. f := Re( evalf( w + ln(w) - x, min(3*Digits, guardedDigits) ) ); df := evalf( 1 + 1/w , min(2*Digits, guardedDigits) ); ddf := evalf( -1/w^2 , min( Digits, guardedDigits) ); # Now set the starting value of Digits. We expect our initial guess to have # given us at least 1 Digits of accuracy, and hence Digits is at least 1. # But if we are lucky, we will often have more accuracy to start with. # This can save time in the Halley iteration. If f=0, i.e. w is exactly right, # then we can potentially get -infinity out of the ilog10; hence the min. # Digits := max( 1, min(guardedDigits, -ilog10( abs(f*x/(1+f) )) ) ); # The condition number of omega is (except at the branch points or the discontinuities, # neither of which is relevant here for the real case) x/(1+omega). Hence we use this # in our convergence test---we say that the iteration has converged if the estimated # accuracy is as good as desired. Note that this is an estimate only, and might # be fooled (for example, for x=0---so we have to handle that case separately). if abs(x) <= 1 then converged := evalb( abs(f) <= tol ); else converged := evalb( abs( f * x/(1+w) ) <= tol ); end if; # HALLEY ITERATION # If Halley's method doesn't converge in MaxIterations iterations, # tripling digits all the while, then at the end we are working in kernelopts(maxdigits) # digits (more than 268 million on my system); hence failure is clear. But really # we should have hit the limit of guardedDigits before then anyway. for i to MaxIterations while not converged do # The denominator of the normalized Halley's method is 2(w+1)^2 + w + ln(w)-x. # Hence it can be zero at omega(x) ONLY if w=-1 and x = -1 + ln(-1), # which is impossible in the real case. # A graph of x = w + ln(w) + 2*(w+1)^2 shows that it approaches x = w+ln(w) only # when x -> -infinity, and indeed for the same value of x the two curves are separated # by a distance of at least 2 (because 2(w+1)^2 >= 2). If the estimate w was bad enough # that it would give a zero denominator, then we would have a problem. # This will not happen, because the initial guess is closer than 2 to w, and # Halley's method improves the residual at each iteration. # If w=0 somehow then df is infinite; but this should not happen for finite x. # If f is accurate to 2*d Digits, then w will be accurate to 3*d Digits, because # the leading d Digits of f are zero. ddf needs only to be accurate to 1*d Digits. # By assumption, the value of w in the right hand side of the following formula is # accurate to (roughly) d = Digits/3 digits. The result will be accurate to Digits. Digits := min( 3*Digits, guardedDigits ); w := w - f/(df - f*ddf/(2*df)); userinfo( 5, omega, "At iteration ", i, " using ", Digits, " Digits, the estimate is ", w ); # The Re in the following statement removes spurious 0.*I's. It should not be needed. # We compute the residual to thrice the Digits used to compute w, because the leading # Digits of f should all be zero. f := Re(evalf( w + ln(w) - x, min( 3*Digits , guardedDigits))); df := evalf( 1 + 1/w , min( 2*Digits , guardedDigits) ); ddf := evalf( -1/w^2 , min( Digits , guardedDigits) ); # Now set the continuing value of Digits. This step will take care of the # fact that we don't really triple Digits at each iteration. Digits := max( 1, min(guardedDigits, -ilog10( abs(f*x/(1+f) )) ) ); if abs(x) <= 1 then converged := evalb( abs(f) <= tol ); else converged := evalb( abs( f * x/(1+w) ) <= tol ); end if; end do; userinfo( 1, omega, "Number of iterations taken is ", i-1); if not converged then ERROR("Convergence failure", "input x = ", x, "current best estimate for omega = ", w, "Residual = ", f, "current setting of Digits = ", Digits, "maximum permitted number of Digits = ", guardedDigits ) end if; # We clean off the guard digits before returning the answer. evalf( w, inputDigits ) end proc: # C O M P L E X C A S E # # Details of floating-point evaluation: Complex Case. (based on real case) # We use Halley's method starting with a low-accuracy initial guess, # using low precision to start with. Increasing precision as the accuracy # is increased saves computation time for large settings of Digits. # `evalf/omega/complex` := proc( z::complex(float) ) local aboveBottomCut, aboveTopCut, Asympt_omega, belowBottomCut, belowTopCut, betweenCuts, converged, df, ddf, i, inputDigits, f, guardedDigits, nearBottomCut, nearTopCut, onBottomCut, onTopCut, p, pade, pi, tol, w, x, y, zpPi, zmPi; x := Re(z); y := Im(z); pi := evalf(Pi); # If the user inputs an imaginary part that evalf's to the same # that Pi does, at the input precision, the imaginary part is # aggressively declared to be exactly pi. Rounding := -infinity; # We close the branches from below. zpPi := evalf(z+Pi*I); # Do these computations at the input precision. zmPi := evalf(z-Pi*I); # Otherwise, doing them later can cause branch crossings. userinfo(5,omega,"z + Pi*I = ", zpPi, "z - Pi*I = ", zmPi ); Rounding := nearest; # Subsequent computations are done round-to-nearest. # On exit, this environment variable will automatically be reset. # Comparisons are done at the current setting of Digits. aboveTopCut := evalb( y > pi ); onTopCut := evalb( y = pi ); belowTopCut := evalb( y < pi and y >= 0 ); aboveBottomCut := evalb( y > -pi and y <= 0 ); betweenCuts := evalb( belowTopCut or aboveBottomCut ); onBottomCut := evalb( y = -pi ); belowBottomCut := evalb( y < -pi ); nearTopCut := evalb( abs(y-pi) <= 1.0e-3 and x <= -1 ); nearBottomCut := evalb( abs(y+pi) <= 1.0e-3 and x <= -1 ); inputDigits := Digits; guardedDigits := Digits + 2; # For x < -1, -Pi < y <= Pi, the function has condition number asymptotic to x, and # hence for accurate solution to the problem AS INPUT, we must work to # more figures; ilog10(-x) more figures, to be exact. if x <= -9 and -pi < y and y <= pi then guardedDigits := guardedDigits + ilog10(-x); end if; # Similarly, if we are # close to the preimages of the branch point, then we need more figures; we take # at most twice as many as previously thought necessary (it is a second order # branch point). if abs(zmPi+1) < 1.0e-1 or abs(zpPi+1) < 1.0e-1 then guardedDigits := guardedDigits - min(-guardedDigits, ilog10(min(abs(zmPi+1),abs(zpPi+1)))) end if; Digits := trunc( evalhf( Digits ) ); # Start with hardware floats tol := Float(1,-inputDigits); # The following initial guesses are chosen for simplicity and for rough agreement # with each other at their matching points; we only need one or two digits accuracy # to start with, because Halley's method converges very quickly. # We do, however, have to be extremely careful near the discontinuities to stay # on the correct side of the discontinuity. We do this by transforming the problem # to a continuous one in that case, and solving it by an auxiliary routine. if x <= -1.6 and (betweenCuts or onTopCut) then # Accurate to within 0.007 on this interval, from the series at x=-infinity. userinfo(5,omega,"Using -infinity series initial guess"); w := evalf(exp(z)-exp(2*z)); elif -2 < x and x <= 1 and 1 < y and y < 5 then # Residual less than 0.01 on this interval userinfo(5,omega,"Using top branch cut series initial guess"); p := conjugate(sqrt(2*conjugate(zmPi+1))); w := evalf(-1+I*p+1/3*p^2-1/36*I*p^3+1/270*p^4+1/4320*I*p^5); elif -2 < x and x <= 1 and -5 < y and y < -1 then userinfo(5,omega,"Using bottom branch cut series initial guess"); p := -conjugate(sqrt(2*conjugate(zpPi+1))); w := evalf(-1+I*p+1/3*p^2-1/36*I*p^3+1/270*p^4+1/4320*I*p^5); elif x >= -1.6 and x < 3 and y >= -5 and y <= 5 then # Pade approximation to omega, accurate to within 0.01 on this square. # This expansion was computed with the numapprox package. The denominator # is not zero on this range, except in a region already dealt with by the # branch point series region, which overlaps with this square. userinfo(5,omega,"Using Pade approximant initial guess"); pade := proc (z) .56714329040978+z/(2.7632228343519+z/(-1.7775896045332+z/ (-2.5361734595222+z/(3.8805909222460+z/(1.2556471682699+z/ (-7.5926539627525+z/(-.68334416990306+z/ (14.054215129395+2.8194978286860*z)))))))) end proc; w := pade(z); else # The following routine was created using series and codegen[optimize] # with the "tryhard" option, and codegen[makeproc]. userinfo(5,omega,"Using asymptotic series initial guess"); Asympt_omega := proc (z) local t23, t20, t1; t20 := evalf(ln(z)); t23 := t20/(z^2); # We need to prove that the denominator cannot be zero. ******** t1 := z-t20+1/2*(t20-2)*t23+(t20+1/6*(6+(-9+2*t20)*t20)*t23)/z end proc; w := Asympt_omega(z); end if; userinfo( 1, omega, "Initial guess is", w ); # If we are on or near the discontinuities, use the transformation v = -w to # reduce it to the continuous problem -v + ln(v) = zeta for zeta near the axis. # The initial guesses will be good enough to distinguish between the cases # v > 1 (corresponding to being outside the strip) and v < 1 (z inside the strip). # The fact that this works is gratifying. For the analysis of why it works, # see the paper. if nearTopCut then RETURN( `evalf/omega/complex/regularized`( zmPi, w, inputDigits, guardedDigits ) ) elif nearBottomCut then RETURN( `evalf/omega/complex/regularized`( zpPi, w, inputDigits, guardedDigits ) ) end if; # # Now the main loop using Halley's iteration, for all other cases. # # We have to compute the residual accurately. f := evalf( w + ln(w) - z, min(3*Digits, guardedDigits) ); df := evalf( 1 + 1/w , min(2*Digits, guardedDigits) ); ddf := evalf( -1/w^2 , min( Digits, guardedDigits) ); # Now set the starting value of Digits. We expect our initial guess to have # given us at least 1 Digits of accuracy, and hence Digits is at least 1. # But if we are lucky, we will often have more accuracy to start with. # This can save time in the Halley iteration. If f=0, i.e. w is exactly right, # then we can potentially get -infinity out of the ilog10; hence the min. # Digits := max( 1, min(guardedDigits, -ilog10( abs(f*z/(1+f))) ) ); # The condition number of omega is (except at the discontinuities), # x/(1+omega). Hence we use this # in our convergence test---we say that the iteration has converged if the estimated # accuracy is as good as desired. Note that this is an estimate only, and might # be fooled (for example, for x=0---so we have to handle that case separately). if abs(z) <= 1 then converged := evalb( abs(f) <= tol ); else converged := evalb( abs( f * z/(1+w) ) <= tol ); end if; # HALLEY ITERATION # If Halley's method doesn't converge in MaxIterations iterations, # tripling digits all the while, then at the end we are working in kernelopts(maxdigits) # digits (more than 268 million on my system); hence failure is clear. But really # we should have hit the limit of guardedDigits before then anyway. for i to MaxIterations while not converged do # The denominator of the normalized Halley's method is 2(w+1)^2 + w + ln(w)-x. # Hence it can be zero at omega(x) ONLY if w=-1 and x = -1 + ln(-1), # which has already been handled. # If w=0 somehow then df is infinite; but this should not happen for finite z. # If f is accurate to 2*d Digits, then w will be accurate to 3*d Digits, because # the leading d Digits of f are zero. ddf needs only to be accurate to 1*d Digits. # By assumption, the value of w in the right hand side of the following formula is # accurate to (roughly) d = Digits/3 digits. The result will be accurate to Digits. Digits := min( 3*Digits, guardedDigits ); w := w*(1 - f/((w+1)+f/(w+1))); # - f/(df - f*ddf/(2*df)); userinfo( 5, omega, "At iteration ", i, " using ", Digits, " Digits, the estimate is ", w ); # We compute the residual to thrice the Digits used to compute w, because the leading # Digits of f should all be zero. f := evalf( w + ln(w) - z, min(3*Digits, guardedDigits) ); df := evalf( 1 + 1/w , min(2*Digits, guardedDigits) ); ddf := evalf( -1/w^2 , min( Digits, guardedDigits) ); # Now set the continuing value of Digits. This step will take care of the # fact that we don't really triple Digits at each iteration. Digits := max( 1, min(guardedDigits, -ilog10( abs(f*z/(1+f) )) ) ); if abs(z) <= 1 then converged := evalb( abs(f) <= tol ); else converged := evalb( abs( f * z/(1+w) ) <= tol ); end if; end do; userinfo( 1, omega, "Number of iterations taken is ", i-1); if not converged then ERROR("Convergence failure", "input z = ", z, "current best estimate for omega = ", w, "Residual = ", f, "current setting of Digits = ", Digits, "maximum permitted number of Digits = ", guardedDigits ) end if; # We clean off the guard digits before returning the answer. evalf( w, inputDigits ) end proc: # COMPLEX CASE # # Solver for -v + ln(v) = z # # Properly used, this regularizes the solution of w + ln(w) = z near the discontinuities # at z = t +/- I*Pi, for t <= -1. # `evalf/omega/complex/regularized` := proc( z0, w0, inputDigits, guardedDigits ) local converged, df, ddf, f, i, tol, v, z; z := evalf( z0,inputDigits); v := evalf(-w0,inputDigits); tol := Float(1,-inputDigits); Digits := trunc( evalhf( Digits ) ); # # Now the main loop using Halley's iteration # # We have to compute the residual accurately. f := evalf(-v + ln(v) - z, min(3*Digits, guardedDigits) ); df := evalf(-1 + 1/v , min(2*Digits, guardedDigits) ); ddf := evalf( -1/v^2 , min( Digits, guardedDigits) ); Digits := max( 1, min(guardedDigits, -ilog10( abs(f)) ) ); if abs(z) <= 1 or abs(v-1) <= tol then converged := evalb( abs(f) <= tol ); else converged := evalb( abs( f * z/(-1+v) ) <= tol ); end if; for i to MaxIterations while not converged do Digits := min( 3*Digits, guardedDigits ); v := v - f/(df - f*ddf/(2*df)); userinfo( 5, omega, "At iteration ", i, " using ", Digits, " Digits, the estimate is ", -v ); # We compute the residual to thrice the Digits used to compute v, because the leading # Digits of f should all be zero. f := evalf( -v + ln(v) - z, min(3*Digits, guardedDigits) ); df := evalf( -1 + 1/v , min(2*Digits, guardedDigits) ); ddf := evalf( -1/v^2 , min( Digits, guardedDigits) ); # Now set the continuing value of Digits. This step will take care of the # fact that we don't really triple Digits at each iteration. We will have # trouble near the branch point v=1; we will really need to provide an # exceptionally good initial guess to counterbalance this. Digits := max( 1, min(guardedDigits, -ilog10( abs(f*z/(-1+f) )) ) ); if abs(z) <= 1 or abs(v-1) <= tol then converged := evalb( abs(f) <= tol ); else converged := evalb( abs( f * z/(-1+v) ) <= tol ); end if; end do; userinfo( 1, omega, "Number of iterations taken is ", i-1); if not converged then ERROR("Convergence failure", "input z = ", z, "current best estimate for omega = ", -v, "Residual = ", f, "current setting of Digits = ", Digits, "maximum permitted number of Digits = ", guardedDigits ) end if; # We clean off the guard digits before returning the answer. evalf( -v, inputDigits ) end proc; # regularized # # Derivative of the Wright omega function. # Nested lexical scoping ensures that omega is Wright:-omega. # `diff/omega` := proc( expr, var ); diff(expr,var)*(omega(expr)/(1+omega(expr))) end proc: # # Convert an expression containing the Lambert W function to # one containing instead the Wright omega function. # `convert/omega` := proc( expr ) local equiv, LAMBERTW; LAMBERTW := proc( n, x ) if nargs=1 then omega( ln(n) ) # if n=1 then it automatically simplifies to LambertW(1) again. else omega( ln(x) + 2*Pi*I*n ) end if; end proc; eval( expr, LambertW=LAMBERTW ); end proc; # # Convert an expression containing the Wright omega function to # one containing the Lambert W function. # # The care that we take with "unwind" of a log of something will # help avoid some difficulties with the ceiling of an argument. # `convert/LambertW` := proc( expr ) local unwind; unwind := proc( z ) if type(z,function) and op(0,z)=ln then 0 else ceil( (Im(z)-Pi)/(2*Pi) ); end if; end proc; eval( expr, omega = (z -> LambertW( unwind(z), exp(z) ) ) ) end proc; # Based on `int/LambertW`, the following procedure # allows Maple to integrate certain functions involving # omega. `int/omega` := proc (f) local gx, h, inds, u; inds := map(proc (x) if op(0,x) = ('omega') then x end if end proc,indets(f,function)); if nops(inds) <> 1 then RETURN(FAIL) end if; inds := inds[1]; if nops(inds) = 1 then gx := op(inds) else RETURN(FAIL) end if; if not type(gx,linear(_X)) then RETURN(FAIL) end if; h := subs(inds = u,_X = (u+ln(u)-coeff(gx,_X,0))/coeff(gx,_X),f); h := h*(1+1/u)/coeff(gx,_X); h := int(h,u); if has(h,int) then RETURN(FAIL) end if; subs(ln(u) = gx-u,u = inds,h) end proc: end module: