请先使用txt查看器打开.tab文件查看以下重要信息
1. 寻找数据开始位置的分隔符(有的.tab在数据开始前会有一堆数据介绍,然后才是数据)
2. 查找你需要的列的名字和列位置
直接上代码
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
# Path to the input and output files
file_path = "I:\\Edge_Download\\GlobalCrowd.tab"
output_path = "J:\\Greening\\Human_impact\\HI.shp"
# Read the file line by line to find the start of the data
with open(file_path, 'r') as file:
lines = file.readlines()
# Find the index of the line where the data starts
start_index = 0
for i, line in enumerate(lines):
if line.startswith("*/"):#=========================寻找数据开始位置的分隔符
start_index = i + 1
break
# Read the data from the start_index
data = pd.read_csv(file_path, sep="\t", skiprows=start_index)
# Select the required columns
df = data[['Longitude', 'Latitude', 'HI [%] (Human Impact 1)', 'Conf']]#=======================需要数据的列名
# Filter rows where Conf is 0, 10, or 20
df_filtered = df[df['Conf'].isin([0, 10, 20])]
# Add a 'weight' column based on the 'Conf' values
df_filtered['weight'] = df_filtered['Conf'].apply(lambda x: 3 if x == 0 else (2 if x == 10 else 1))#=======请忽略这是我自己的增加列的需求
# Rename columns for convenience
df_filtered.rename(columns={'Longitude': 'lon', 'Latitude': 'lat', 'HI [%] (Human Impact 1)': 'HI'}, inplace=True)
# Create a GeoDataFrame
gdf = gpd.GeoDataFrame(df_filtered, geometry=gpd.points_from_xy(df_filtered.lon, df_filtered.lat))
# Set the coordinate reference system (CRS) to WGS84 (EPSG:4326)
gdf.set_crs(epsg=4326, inplace=True)
# Save the GeoDataFrame to a shapefile
gdf.to_file(output_path)
print(f"Shapefile saved to {output_path}")