点击上方蓝字 江湖评谈设为关注
前言
看下通过EntryPoint反射的Man函数入口,是如何被CLR操控的。它本质上跟普通的反射没有任何区别,些许差距在于托管不同(普通的托管方法反射的是普通的方法,而EntryPoint反射的是托管Main固定入口)构建反射的要素,第一个就是CLR获取到IL的二进制代码,第二个就是通过相对应的函数IL二进制代码调用JIT进行编译。这两个点是EntryPoint调用Main的关键地方。
例子
非常简单的例子
static void Main(string[] args)
{
string path = @"E:\Visual Studio Project\Test_\ConsoleApp1\bin\Debug\net8.0\ConsoleApp1.dll";
byte[] buffer = File.ReadAllBytes(path);
Assembly asm = Assembly.Load(buffer);
MethodInfo Point = asm.EntryPoint;
Point.Invoke(null, new object[] { args });
}
ConsoleApp1.dll 的Program.cs代码如下:
static void Main(string[] args)
{
Console.WriteLine("Call Main");
Console.ReadLine();
}
EntryPoint
Assembly.Load本身通过buffer缓冲的十六进制字节构建了EntryPoint的入口,它的,而buffer则是读取path路径下的托管DLL得来的。Load如下:
public static Assembly Load(byte[] rawAssembly, byte[]? rawSymbolStore)
{
ArgumentNullException.ThrowIfNull(rawAssembly);
if (rawAssembly.Length == 0)
throw new BadImageFormatException(SR.BadImageFormat_BadILFormat);
SerializationInfo.ThrowIfDeserializationInProgress("AllowAssembliesFromByteArrays",
ref s_cachedSerializationSwitch);
AssemblyLoadContext alc = new IndividualAssemblyLoadContext("Assembly.Load(byte[], ...)");
return alc.InternalLoad(rawAssembly, rawSymbolStore);
}
alc.InternalLoad主要是把托管DLL的二进制和长度传入到非托管里面去进行操作,如下:
[RequiresUnreferencedCode("Types and members the loaded assembly depends on might be removed")]
internal unsafe Assembly InternalLoad(ReadOnlySpan<byte> arrAssembly, ReadOnlySpan<byte> arrSymbols)
{
RuntimeAssembly? loadedAssembly = null;
fixed (byte* ptrAssembly = arrAssembly, ptrSymbols = arrSymbols)
{
LoadFromStream(_nativeAssemblyLoadContext, new IntPtr(ptrAssembly), arrAssembly.Length,
new IntPtr(ptrSymbols), arrSymbols.Length, ObjectHandleOnStack.Create(ref loadedAssembly));
}
return loadedAssembly!;
}
LoadFromStream调用的是Qcall的AssemblyNative_LoadFromStream
[RequiresUnreferencedCode("Types and members the loaded assembly depends on might be removed")]
[LibraryImport(RuntimeHelpers.QCall, EntryPoint = "AssemblyNative_LoadFromStream")]
private static partial void LoadFromStream(IntPtr ptrNativeAssemblyBinder, IntPtr ptrAssemblyArray, int iAssemblyArrayLen, IntPtr ptrSymbols, int iSymbolArrayLen, ObjectHandleOnStack retAssembly);
AssemblyNative_LoadFromStream主要做了两件事情,构建程序的IL镜像pILImage以及构建托管DLL的程序集pLoadedAssembly,其它会填充一些内部字段比如EntryPoint,后面会通过asm.EntryPoint直接获得
extern "C" void QCALLTYPE AssemblyNative_LoadFromStream(INT_PTR ptrNativeAssemblyBinder, INT_PTR ptrAssemblyArray,
INT32 cbAssemblyArrayLength, INT_PTR ptrSymbolArray, INT32 cbSymbolArrayLength,
QCall::ObjectHandleOnStack retLoadedAssembly)
{
QCALL_CONTRACT;
BEGIN_QCALL;
// Ensure that the invariants are in place
_ASSERTE(ptrNativeAssemblyBinder != NULL);
_ASSERTE((ptrAssemblyArray != NULL) && (cbAssemblyArrayLength > 0));
_ASSERTE((ptrSymbolArray == NULL) || (cbSymbolArrayLength > 0));
PEImageHolder pILImage(PEImage::CreateFromByteArray((BYTE*)ptrAssemblyArray, (COUNT_T)cbAssemblyArrayLength));
// Need to verify that this is a valid CLR assembly.
if (!pILImage->CheckILFormat())
ThrowHR(COR_E_BADIMAGEFORMAT, BFA_BAD_IL);
// Get the binder context in which the assembly will be loaded
AssemblyBinder *pBinder = reinterpret_cast<AssemblyBinder*>(ptrNativeAssemblyBinder);
LoaderAllocator* pLoaderAllocator = pBinder->GetLoaderAllocator();
if (pLoaderAllocator && pLoaderAllocator->IsCollectible() && !pILImage->IsILOnly())
{
// Loading IJW assemblies into a collectible AssemblyLoadContext is not allowed
ThrowHR(COR_E_BADIMAGEFORMAT, BFA_IJW_IN_COLLECTIBLE_ALC);
}
// Pass the stream based assembly as IL in an attempt to bind and load it
Assembly* pLoadedAssembly = AssemblyNative::LoadFromPEImage(pBinder, pILImage);
{
GCX_COOP();
retLoadedAssembly.Set(pLoadedAssembly->GetExposedObject());
}
LOG((LF_CLASSLOADER,
LL_INFO100,
"\tLoaded assembly from a file\n"));
// In order to assign the PDB image (if present),
// the resulting assembly's image needs to be exactly the one
// we created above. We need pointer comparison instead of pe image equivalence
// to avoid mixed binaries/PDB pairs of other images.
// This applies to both Desktop CLR and CoreCLR, with or without fusion.
BOOL fIsSameAssembly = (pLoadedAssembly->GetPEAssembly()->GetPEImage() == pILImage);
// Setting the PDB info is only applicable for our original assembly.
// This applies to both Desktop CLR and CoreCLR, with or without fusion.
if (fIsSameAssembly)
{
#ifdef DEBUGGING_SUPPORTED
// If we were given symbols, save a copy of them.
if (ptrSymbolArray != NULL)
{
PBYTE pSymbolArray = reinterpret_cast<PBYTE>(ptrSymbolArray);
pLoadedAssembly->GetModule()->SetSymbolBytes(pSymbolArray, (DWORD)cbSymbolArrayLength);
}
#endif // DEBUGGING_SUPPORTED
}
END_QCALL;
}
这样的话,就通过传递进入的托管DLL二进制里面定位到了托管Main函数的入口,也即是EntrPoint入口点的所有托管要素。
RuntimeMethodHandle::InvokeMethod
上面代码Assembly.Load构建了托管入口的托管要素点,那么如何调用这个托管入口Main函数呢?这是第二步,它的代码如下
Point.Invoke(null, new object[] { args });
Invoke会调用InvokeWithOneArg函数,它会检查传递进来的二进制IL以及调用函数InvokeDirectByRefWithFewArgs
internal unsafe object? InvokeWithOneArg(
object? obj,
BindingFlags invokeAttr,
Binder? binder,
object?[] parameters,
CultureInfo? culture)
{
Debug.Assert(_argCount == 1);
object? arg = parameters[0];
var parametersSpan = new ReadOnlySpan<object?>(in arg);
object? copyOfArg = null;
Span<object?> copyOfArgs = new(ref copyOfArg);
bool copyBack = false;
Span<bool> shouldCopyBack = new(ref copyBack);
object? ret;
if ((_strategy & InvokerStrategy.StrategyDetermined_ObjSpanArgs) == 0)
{
DetermineStrategy_ObjSpanArgs(ref _strategy, ref _invokeFunc_ObjSpanArgs, _method, _needsByRefStrategy, backwardsCompat: true);
}
CheckArguments(parametersSpan, copyOfArgs, shouldCopyBack, binder, culture, invokeAttr);
if (_invokeFunc_ObjSpanArgs is not null)
{
try
{
ret = _invokeFunc_ObjSpanArgs(obj, copyOfArgs);
}
catch (Exception e) when ((invokeAttr & BindingFlags.DoNotWrapExceptions) == 0)
{
throw new TargetInvocationException(e);
}
}
else
{
ret = InvokeDirectByRefWithFewArgs(obj, copyOfArgs, invokeAttr);
}
CopyBack(parameters, copyOfArgs, shouldCopyBack);
return ret;
}
InvokeDirectByRefWithFewArgs主要是填充二进制IL,以及调用非托管的RuntimeMethodHandle::InvokeMethod
internal unsafe object? InvokeDirectByRefWithFewArgs(object? obj, Span<object?> copyOfArgs, BindingFlags invokeAttr)
{
Debug.Assert(_argCount <= MaxStackAllocArgCount);
if ((_strategy & InvokerStrategy.StrategyDetermined_RefArgs) == 0)
{
DetermineStrategy_RefArgs(ref _strategy, ref _invokeFunc_RefArgs, _method, backwardsCompat: true);
}
StackAllocatedByRefs byrefs = default;
#pragma warning disable CS8500
IntPtr* pByRefFixedStorage = (IntPtr*)&byrefs;
#pragma warning restore CS8500
for (int i = 0; i < _argCount; i++)
{
#pragma warning disable CS8500
*(ByReference*)(pByRefFixedStorage + i) = (_invokerArgFlags[i] & InvokerArgFlags.IsValueType) != 0 ?
#pragma warning restore CS8500
ByReference.Create(ref copyOfArgs[i]!.GetRawData()) :
ByReference.Create(ref copyOfArgs[i]);
}
try
{
return _invokeFunc_RefArgs!(obj, pByRefFixedStorage);
}
catch (Exception e) when ((invokeAttr & BindingFlags.DoNotWrapExceptions) == 0)
{
throw new TargetInvocationException(e);
}
}
_invokeFunc_RefArgs即是调用RuntimeMethodHandle::InvokeMethod
FCIMPL4(Object*, RuntimeMethodHandle::InvokeMethod,
Object *target,
PVOID* args, // An array of byrefs
SignatureNative* pSigUNSAFE,
CLR_BOOL fConstructor)
{
FCALL_CONTRACT;
//为方便观看,此处省略一万行
}
它这里面主要是调用JIT编译Program.Main函数为机器码,然后进行运行。那么整个的流程基本上清晰的展现出来了。
中间部分不重要的函数以及代码简略不提,依然可能有些繁琐,代码较多,但反射EntryPoint的大致基本上如此。
往期精彩回顾