c# – 为什么下载的并发数量有限制?

前端之家收集整理的这篇文章主要介绍了c# – 为什么下载的并发数量有限制?前端之家小编觉得挺不错的,现在分享给大家,也给大家做个参考。
我正在尝试制作自己的简单网络抓取工具.我想从URL下载具有特定扩展名的文件.我写了以下代码
  1. private void button1_Click(object sender,RoutedEventArgs e)
  2. {
  3. if (bw.IsBusy) return;
  4. bw.DoWork += new DoWorkEventHandler(bw_DoWork);
  5. bw.RunWorkerAsync(new string[] { URL.Text,SavePath.Text,Filter.Text });
  6. }
  7. //--------------------------------------------------------------------------------------------
  8. void bw_DoWork(object sender,DoWorkEventArgs e)
  9. {
  10. try
  11. {
  12. ThreadPool.SetMaxThreads(4,4);
  13. string[] strs = e.Argument as string[];
  14. Regex reg = new Regex("<a(\\s*[^>]*?){0,1}\\s*href\\s*\\=\\s*\\\"([^>]*?)\\\"\\s*[^>]*>(.*?)</a>",RegexOptions.Compiled | RegexOptions.CultureInvariant | RegexOptions.IgnoreCase);
  15. int i = 0;
  16. string domainS = strs[0];
  17. string Extensions = strs[2];
  18. string OutDir = strs[1];
  19. var domain = new Uri(domainS);
  20. string[] Filters = Extensions.Split(new char[] { ';',',' ' },StringSplitOptions.RemoveEmptyEntries);
  21. string outPath = System.IO.Path.Combine(OutDir,string.Format("File_{0}.html",i));
  22.  
  23. WebClient webClient = new WebClient();
  24. string str = webClient.DownloadString(domainS);
  25. str = str.Replace("\r\n"," ").Replace('\n',' ');
  26. MatchCollection mc = reg.Matches(str);
  27. int NumOfThreads = mc.Count;
  28.  
  29. Parallel.ForEach(mc.Cast<Match>(),new ParallelOptions { MaxDegreeOfParallelism = 2,},mat =>
  30. {
  31. string val = mat.Groups[2].Value;
  32. var link = new Uri(domain,val);
  33. foreach (string ext in Filters)
  34. if (val.EndsWith("." + ext))
  35. {
  36. Download((object)new object[] { OutDir,link });
  37. break;
  38. }
  39. });
  40. throw new Exception("Finished !");
  41.  
  42. }
  43. catch (System.Exception ex)
  44. {
  45. ReportException(ex);
  46. }
  47. finally
  48. {
  49.  
  50. }
  51. }
  52. //--------------------------------------------------------------------------------------------
  53. private static void Download(object o)
  54. {
  55. try
  56. {
  57. object[] objs = o as object[];
  58. Uri link = (Uri)objs[1];
  59. string outPath = System.IO.Path.Combine((string)objs[0],System.IO.Path.GetFileName(link.ToString()));
  60. if (!File.Exists(outPath))
  61. {
  62. //WebClient webClient = new WebClient();
  63. //webClient.DownloadFile(link,outPath);
  64.  
  65. DownloadFile(link.ToString(),outPath);
  66. }
  67. }
  68. catch (System.Exception ex)
  69. {
  70. ReportException(ex);
  71. }
  72. }
  73. //--------------------------------------------------------------------------------------------
  74. private static bool DownloadFile(string url,string filePath)
  75. {
  76. try
  77. {
  78. HttpWebRequest request = (HttpWebRequest)HttpWebRequest.Create(url);
  79. request.UserAgent = "Web Crawler";
  80. request.Timeout = 40000;
  81. WebResponse response = request.GetResponse();
  82. Stream stream = response.GetResponseStream();
  83. using (FileStream fs = new FileStream(filePath,FileMode.CreateNew))
  84. {
  85. const int siz = 1000;
  86. byte[] bytes = new byte[siz];
  87. for (; ; )
  88. {
  89. int count = stream.Read(bytes,siz);
  90. fs.Write(bytes,count);
  91. if (count == 0) break;
  92. }
  93. fs.Flush();
  94. fs.Close();
  95. }
  96. }
  97. catch (System.Exception ex)
  98. {
  99. ReportException(ex);
  100. return false;
  101. }
  102. finally
  103. {
  104.  
  105. }
  106. return true;
  107. }

问题是虽然它适用于2个并行下载:

  1. new ParallelOptions { MaxDegreeOfParallelism = 2,}

…它不适用于更大程度的并行性,如:

  1. new ParallelOptions { MaxDegreeOfParallelism = 5,}

…我得到连接超时异常.

起初我以为是因为WebClient:

  1. //WebClient webClient = new WebClient();
  2. //webClient.DownloadFile(link,outPath);

…但是当我用使用HttpWebRequest的函数DownloadFile替换它时,我仍然遇到错误.

我已在许多网页上测试过,没有任何改变.我还确认了chrome的扩展“Download Master”,这些Web服务器允许多个并行下载.
有没有人知道为什么我会超时尝试并行下载多个文件

解决方法

您需要分配 ServicePointManager.DefaultConnectionLimit.同一主机的默认并发连接是2.有关使用web.config connectionManagement的信息,请参阅 related SO post.

猜你在找的C#相关文章