配置文件字段不符情况:
一开始,创建索引报错时,查看日志会发现,提示:“undefined field”这种情况是提交的SolrDocument里有的字段名在配置文件managed-schema中没有事先定义,所以会报这种错;
但是最近,在MapReduce运行时,出现如下错误:(ip地址用了“x”替代)
场景一:
18/05/30 10:58:28 INFO mapreduce.Job: Task Id : attempt_1525877339245_0044_m_000001_0, Status : FAILED
Error: org.apache.solr.client.solrj.impl.HttpSolrClient$RemoteSolrException:
Error from server at [图片]http://xx.xx.xx.xx:xxxx/solr/SEARCH_LHLEMR_v8: Connection reset
at org.apache.solr.client.solrj.impl.HttpSolrClient.executeMethod(HttpSolrClient.java:577)
at org.apache.solr.client.solrj.impl.HttpSolrClient.request(HttpSolrClient.java:241)
at org.apache.solr.client.solrj.impl.HttpSolrClient.request(HttpSolrClient.java:230)
at org.apache.solr.client.solrj.SolrRequest.process(SolrRequest.java:149)
at org.apache.solr.client.solrj.SolrClient.add(SolrClient.java:106)
at org.apache.solr.client.solrj.SolrClient.add(SolrClient.java:71)
at org.apache.solr.client.solrj.SolrClient.add(SolrClient.java:85)
at SolrIndexMapReduce.EmrIndexLHLSolrIndexMR$SolrIndexMapper.cleanup(EmrIndexLHLSolrIndexMR.java:234)
at org.apache.hadoop.mapreduce.Mapper.run(Mapper.java:148)
at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:793)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:341)
at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:164)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1920)
at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:158)
request: [图片]http://xx.xx.xx.xx:xxxx/solr/SEARCH_LHLEMR_v8_shard2_replica1/update?update.distrib=TOLEADER&distrib.from=http%3A%2F%2Fxx.xx.xx.xx%3A8081%2Fsolr%2FSEARCH_LHLEMR_v8_shard1_replica1%2F&wt=javabin&version=2
at org.apache.solr.client.solrj.impl.ConcurrentUpdateSolrClient$Runner.sendUpdateStream(ConcurrentUpdateSolrClient.java:290)
at org.apache.solr.client.solrj.impl.ConcurrentUpdateSolrClient$Runner.run(ConcurrentUpdateSolrClient.java:161)
at org.apache.solr.common.util.ExecutorUtil$MDCAwareThreadPoolExecutor.lambda$execute$0(ExecutorUtil.java:229)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:748)
场景二:
org.apache.solr.client.solrj.SolrServerException: IOException occured when talking to server at: http://xx.xx.xx.xx:xxxx:/solr/SEARCH_v1
at org.apache.solr.client.solrj.impl.HttpSolrClient.executeMethod(HttpSolrClient.java:591)
at org.apache.solr.client.solrj.impl.HttpSolrClient.request(HttpSolrClient.java:241)
at org.apache.solr.client.solrj.impl.HttpSolrClient.request(HttpSolrClient.java:230)
at org.apache.solr.client.solrj.impl.LBHttpSolrClient.doRequest(LBHttpSolrClient.java:372)
at org.apache.solr.client.solrj.impl.LBHttpSolrClient.request(LBHttpSolrClient.java:325)
at org.apache.solr.client.solrj.impl.CloudSolrClient.sendRequest(CloudSolrClient.java:1100)
at org.apache.solr.client.solrj.impl.CloudSolrClient.requestWithRetryOnStaleState(CloudSolrClient.java:870)
at org.apache.solr.client.solrj.impl.CloudSolrClient.request(CloudSolrClient.java:806)
at org.apache.solr.client.solrj.SolrRequest.process(SolrRequest.java:149)
at org.apache.solr.client.solrj.SolrClient.add(SolrClient.java:106)
at org.apache.solr.client.solrj.SolrClient.add(SolrClient.java:71)
at org.apache.solr.client.solrj.SolrClient.add(SolrClient.java:85)
at SolrIndexMapReduce.WDSolrSeaIndexMR$SolrIndexMapper.map(WDSolrSeaIndexMR.java:194)
at SolrIndexMapReduce.WDSolrSeaIndexMR$SolrIndexMapper.map(WDSolrSeaIndexMR.java:1)
at org.apache.hadoop.mapreduce.Mapper.run(Mapper.java:145)
at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:793)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:341)
at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:164)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1920)
at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:158)
Caused by: org.apache.http.client.ClientProtocolException
at org.apache.http.impl.client.AbstractHttpClient.execute(AbstractHttpClient.java:909)
at org.apache.http.impl.client.AbstractHttpClient.execute(AbstractHttpClient.java:805)
at org.apache.http.impl.client.AbstractHttpClient.execute(AbstractHttpClient.java:784)
at org.apache.solr.client.solrj.impl.HttpSolrClient.executeMethod(HttpSolrClient.java:482)
... 21 more
Caused by: org.apache.http.client.NonRepeatableRequestException: Cannot retry request with a non-repeatable request entity. The cause lists the reason the original request failed.
at org.apache.http.impl.client.DefaultRequestDirector.tryExecute(DefaultRequestDirector.java:689)
at org.apache.http.impl.client.DefaultRequestDirector.execute(DefaultRequestDirector.java:520)
at org.apache.http.impl.client.AbstractHttpClient.execute(AbstractHttpClient.java:906)
... 24 more
Caused by: java.net.SocketException: Connection reset
at java.net.SocketOutputStream.socketWrite(SocketOutputStream.java:115)
at java.net.SocketOutputStream.write(SocketOutputStream.java:155)
at org.apache.http.impl.io.AbstractSessionOutputBuffer.write(AbstractSessionOutputBuffer.java:169)
at org.apache.http.impl.io.ContentLengthOutputStream.write(ContentLengthOutputStream.java:119)
at org.apache.http.entity.InputStreamEntity.writeTo(InputStreamEntity.java:102)
at org.apache.http.entity.HttpEntityWrapper.writeTo(HttpEntityWrapper.java:98)
at org.apache.http.impl.client.EntityEnclosingRequestWrapper$EntityWrapper.writeTo(EntityEnclosingRequestWrapper.java:108)
at org.apache.http.impl.entity.EntitySerializer.serialize(EntitySerializer.java:122)
at org.apache.http.impl.AbstractHttpClientConnection.sendRequestEntity(AbstractHttpClientConnection.java:271)
at org.apache.http.impl.conn.ManagedClientConnectionImpl.sendRequestEntity(ManagedClientConnectionImpl.java:197)
at org.apache.http.protocol.HttpRequestExecutor.doSendRequest(HttpRequestExecutor.java:257)
at org.apache.http.protocol.HttpRequestExecutor.execute(HttpRequestExecutor.java:125)
at org.apache.http.impl.client.DefaultRequestDirector.tryExecute(DefaultRequestDirector.java:715)
... 26 more
以上两种情况为运行MapReduce时,在ResourceManager Web UI的日志中看到的报错,但是在solr的管理页面中报错却很宽泛看不出是什么错。通过报错内容的行数可以确定是在程序的某一行,都指向CloudSolrClient.add();这里,所以判断是提交的时候报错
导致这个问题是:
Solr配置文件中没有的字段,在程序中添加到了SolrDocument中,然后使用client提交时报错了,通常solr管理页面会报“undefined field”的错误,但是却报connect reset和断开的管道等错误,所以找起来比较麻烦。
后续补充:
再一次报如上错误的时候,偶然发现报错涉及到二节点。在登入二节点的solr管理页面的日志页面时,发现具体异常的原因有记录。这时才明白,分布式的solr在某个节点上的异常由当前solr来记录,而通过mapreduce的报错只是访问异常,即报的是拿不到数据的异常,而更具体的异常则是在solr的日志系统上;