以前负责DB相关,经常需要写专门工具将原始数据经过一定逻辑处理后导入目标库,工具需要做在不影响业务的情况下以最快的速度完成导入。
1. 一般数据源只有一个,而目的库有多个,对于DB读取要远快于写入,要注意读取的数据顺序保证能够对多个目的库同时写入。
2. Buffer 整个处理过程分为几段分别并行进行,为保证整个流水线每个时期都并行进行,每段之间需要足够大的Buffer。
3. 多线程+同步IO操作,可控性强,保证同时一个DB上执行导入导出的连接数最大值,可以通过库状态和需要的速度做合理调整。
使用生产消费队列控制Buffer大小:
public class BlockingQueue<T>
{
private Queue<T> _queue;
private object _sync = new object();
private object _sync2 = new object();
private int _capacity;
private BlockingQueuePerfCounters _counters;
public BlockingQueue(string queueName, int capacity)
{
_queue = new Queue<T>();
_capacity = capacity;
}
public void SetCapacity(int capacity)
{
_capacity = capacity;
}
public int Capacity
{
get { return _capacity; }
}
public int Count
{
get
{
lock (_sync)
{
return _queue.Count;
}
}
}
public void Enqueue(T item)
{
while (this.Count > this.Capacity)
{
Thread.Sleep(1000);
}
lock (_sync)
{
_queue.Enqueue(item);
}
_counters.EnqueuePerSecond.Increment();
_counters.QueueLength.Increment();
_counters.EnqueueTotal.Increment();
}
public void Enqueue(IEnumerable<T> list)
{
while (this.Count > this.Capacity)
{
Thread.Sleep(1);
}
lock (_sync)
{
foreach (T item in list)
{
_queue.Enqueue(item);
}
}
_counters.EnqueuePerSecond.IncrementBy(list.Count());
_counters.QueueLength.IncrementBy(list.Count());
_counters.EnqueueTotal.IncrementBy(list.Count());
}
public T Dequeue()
{
T val;
lock (_sync)
{
val = _queue.Dequeue();
}
_counters.DequeueTotal.Increment();
_counters.DequeuePerSecond.Increment();
_counters.QueueLength.Decrement();
return val;
}
public List<T> Dequeue(int count)
{
List<T> list = new List<T>();
lock (_sync)
{
while (_queue.Count > 0 && list.Count < count)
{
list.Add(_queue.Dequeue());
}
}
_counters.DequeueTotal.IncrementBy(list.Count);
_counters.DequeuePerSecond.IncrementBy(list.Count);
_counters.QueueLength.IncrementBy(-list.Count);
return list;
}
public List<T> ToList()
{
lock (_sync)
{
return _queue.ToList();
}
}
public void Clear()
{
int count = 0;
lock (_sync)
{
count = _queue.Count;
_queue.Clear();
}
_counters.DequeueTotal.IncrementBy(count);
_counters.DequeuePerSecond.IncrementBy(count);
_counters.QueueLength.IncrementBy(-count);
}
}
多线程管理:
public class MultiThread<T>
{
private ITracing _tracing = TracingManager.GetTracing(typeof(MultiThread<T>));
private BlockingQueue<T> _queue;
private Thread[] _threads;
private int _realQueueLength;
public Action<T> ProcessData;
public Action<List<T>> ProcessDataBatch;
public MultiThread(int threadCount, int queueCapacity, string threadName)
{
_queue = new BlockingQueue<T>(threadName, queueCapacity);
_threads = new Thread[threadCount];
for (int i = 0; i < threadCount; i++)
{
_threads[i] = new Thread(Proc);
_threads[i].IsBackground = true;
_threads[i].Name = string.Format("{0}_{1}", threadName, i);
_threads[i].Start();
}
}
public void Close()
{
foreach (Thread th in _threads)
th.Abort();
}
public int QueueLength
{
get
{
return _queue.Count;
}
}
public void SetCapacity(int capacity)
{
_queue.SetCapacity(capacity);
}
public void WaitForProcessAll()
{
while (true)
{
if (_realQueueLength > 0)
Thread.Sleep(1);
else
break;
}
}
public void Enqueue(IEnumerable<T> list)
{
_queue.Enqueue(list);
Interlocked.Add(ref _realQueueLength, list.Count());
}
public void Enqueue(T item)
{
_queue.Enqueue(item);
Interlocked.Increment(ref _realQueueLength);
}
public void Proc()
{
try
{
while (true)
{
while (this.QueueLength > 0)
{
ProcessDataList(_queue.Dequeue(100));
}
Thread.Sleep(1);
}
}
catch (ThreadAbortException)
{
Thread.ResetAbort();
return;
}
catch (Exception ex)
{
_tracing.ErrorFmt(ex, "Proc Error");
}
}
private void ProcessDataList(List<T> list)
{
if (list == null || list.Count == 0)
return;
if (ProcessDataBatch != null)
{
try
{
ProcessDataBatch(list);
}
catch (Exception ex)
{
_tracing.Error(ex, "ProcessDataList Error");
}
finally
{
Interlocked.Add(ref _realQueueLength, -1 * list.Count);
}
}
else if (ProcessData != null)
{
foreach (T item in list)
{
try
{
ProcessData(item);
}
catch (Exception ex)
{
_tracing.Error(ex, "ProcessDataList Error");
}
finally
{
Interlocked.Decrement(ref _realQueueLength);
}
}
}
}
}