Spark踩坑小记

最新推荐文章于 2023-04-06 11:20:37 发布

YINGWENNICHENG

最新推荐文章于 2023-04-06 11:20:37 发布

阅读量399

点赞数

分类专栏： Spark 文章标签： spark

本文链接：https://blog.csdn.net/YINGWENNICHENG/article/details/123726597

版权

本文记录了在使用SparkSQL从SQLServer导入数据到Hive时遇到的错误，尝试解决包括版本匹配、修改源码、放置winutils文件到指定目录等方法，最终发现需要重启电脑以使修改的动态链路库生效。

摘要由CSDN通过智能技术生成

使用SparkSQL将SQLServer中的数据导入hive中，运行代码一直报这个错。
在这里插入图片描述
1.百度之后说是scala和spark版本问题，然而将使用相兼容的版本后依旧报错。

2.继续百度，需要创建一个目录覆盖apache的目录，并且将NativeIO中的代码复制过来并将此处返回值改为return true 里插入图片描述

修改之后，运行发现依旧报错
重启idea再次运行，还是报错。。。

3.三顾度娘，需要将winutils中的 hadoop.dll 和 winutils.exe放入hadoophome/bin目录下，并且将 hadoop.dll 放入System32目录下。
放好之后，再次运行，发现还是报错。

以下NativeIO中内容（修改返回值后）

/*
  Licensed to the Apache Software Foundation (ASF) under one
  or more contributor license agreements.  See the NOTICE file
  distributed with this work for additional information
  regarding copyright ownership.  The ASF licenses this file
  to you under the Apache License, Version 2.0 (the
  "License"); you may not use this file except in compliance
  with the License.  You may obtain a copy of the License at

      http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License.
 */
package org.apache.hadoop.io.nativeio;

import java.io.File;
import java.io.FileDescriptor;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.lang.reflect.Field;
import java.nio.ByteBuffer;
import java.nio.MappedByteBuffer;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;

import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.CommonConfigurationKeys;
import org.apache.hadoop.io.SecureIOUtils.AlreadyExistsException;
import org.apache.hadoop.util.NativeCodeLoader;
import org.apache.hadoop.util.Shell;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import sun.misc.Unsafe;

import com.google.common.annotations.VisibleForTesting;

/**
 * JNI wrappers for various native IO-related calls not available in Java.
 * These functions should generally be used alongside a fallback to another
 * more portable mechanism.
 */
@InterfaceAudience.Private
@InterfaceStability.Unstable
public class NativeIO {
   
    public static class POSIX {
   
        // Flags for open() call from bits/fcntl.h
        public static final int O_RDONLY   =    00;
        public static final int O_WRONLY   =    01;
        public static final int O_RDWR     =    02;
        public static final int O_CREAT    =  0100;
        public static final int O_EXCL     =  0200;
        public static final int O_NOCTTY   =  0400;
        public static final int O_TRUNC    = 01000;
        public static final int O_APPEND   = 02000;
        public static final int O_NONBLOCK = 04000;
        public static final int O_SYNC   =  010000;
        public static final int O_ASYNC  =  020000;
        public static final int O_FSYNC = O_SYNC;
        public static final int O_NDELAY = O_NONBLOCK;

        // Flags for posix_fadvise() from bits/fcntl.h
        /* No further special treatment.  */
        public static final int POSIX_FADV_NORMAL = 0;
        /* Expect random page references.  */
        public static final int POSIX_FADV_RANDOM = 1;
        /* Expect sequential page references.  */
        public static final int POSIX_FADV_SEQUENTIAL = 2;
        /* Will need these pages.  */
        public static final int POSIX_FADV_WILLNEED = 3;
        /* Don't need these pages.  */
        public static final int POSIX_FADV_DONTNEED = 4;
        /* Data will be accessed once.  */
        public static final int POSIX_FADV_NOREUSE = 5;


        /* Wait upon writeout of all pages
           in the range before performing the
           write.  */
        public static final int SYNC_FILE_RANGE_WAIT_BEFORE = 1;
        /* Initiate writeout of all those
           dirty pages in the range which are
           not presently under writeback.  */
        public static final int SYNC_FILE_RANGE_WRITE = 2;

        /* Wait upon writeout of all pages in
           the range after performing the
           write.  */
        public static final int SYNC_FILE_RANGE_WAIT_AFTER = 4;

        private static final Log LOG = LogFactory.getLog(NativeIO.class);

        private static boolean nativeLoaded = false;
        private static boolean fadvisePossible = true;
        private static boolean syncFileRangePossible = true;

        static final String WORKAROUND_NON_THREADSAFE_CALLS_KEY =
                "hadoop.workaround.non.threadsafe.getpwuid";
        static final boolean WORKAROUND_NON_THREADSAFE_CALLS_DEFAULT = true;

        private static long cacheTimeout = -1;

        private static CacheManipulator cacheManipulator = new CacheManipulator();

        public static CacheManipulator getCacheManipulator() {
   
            return cacheManipulator;
        }

        public static void setCacheManipulator(CacheManipulator cacheManipulator) {
   
            POSIX.cacheManipulator = cacheManipulator;
        }

        /**
         * Used to manipulate the operating system cache.
         */
        @VisibleForTesting
        public static class CacheManipulator {
   
            public void mlock(String identifier, ByteBuffer buffer,
                              long len) throws IOException {
   
                POSIX.mlock(buffer, len);
            }

            public long getMemlockLimit() {
   
                return NativeIO.getMemlockLimit();
            }

            public long getOperatingSystemPageSize() {
   
                return NativeIO.getOperatingSystemPageSize();
            }

            public void posixFadviseIfPossible(String identifier,
                                               FileDescriptor fd, long offset, long len, int flags)
                    throws NativeIOException {
   
                POSIX.posixFadviseIfPossible(identifier, fd, offset,
                        len, flags);
            }

            public boolean verifyCanMlock() {
   
                return NativeIO.isAvailable();
            }
        }

        /**
         * A CacheManipulator used for testing which does not actually call mlock.
         * This allows many tests to be run even when the operating system does not
         * allow mlock, or only allows limited mlocking.
         */
        @VisibleForTesting
        public static class NoMlockCacheManipulator extends CacheManipulator {
   
            public void mlock(String identifier, ByteBuffer buffer,
                              long len) throws IOException {
   
                LOG.info("mlocking " + identifier);
            }

            public long getMemlockLimit() {
   
                return 1125899906842624L;
            }

            public long getOperatingSystemPageSize() {
   
                return 4096;
            }

            public boolean verifyCanMlock() {
   
                return true;