import dask.dataframe as dd
import pandas as pd
from decimal import Decimal
# prepare data
data = {
'name': ['Eason', 'Jay', 'JJ', 'Alice', 'yaya'],
'id': [1000, 1001, 1002, 1003, 1004],
'x': [1.9897712, -98.432490, -0.258545, 0.012345, 0.146248],
'y': [1.9897347, -98.902596, -0.254255, 0.012345, 0.145124]
}
# create dask dataframe
df = dd.from_pandas(pd.DataFrame(data), npartitions=1)
decimal_dict = {'x': 0}
decimal_places = decimal_dict.get('x', 0)
decimal_places_len = "0." + "0" * decimal_places
def split_x(x, decimal_places_len):
def format_decimal(x):
return Decimal(x).quantize(Decimal(decimal_places_len))
return x.map(format_decimal)
df['int_format_decimal'] = df['x'].map_partitions(split_x, decimal_places_len)
df = df.compute()
df
decimal_dict = {'x': 0}结果
decimal_dict = {'x': 2}结果
decimal_dict = {'x': 10}结果
import dask.dataframe as dd
import pandas as pd
from decimal import Decimal
# prepare data
data = {
'name': ['Eason', 'Jay', 'JJ', 'Alice', 'yaya'],
'id': [1000, 1001, 1002, 1003, 1004],
'x': [1.9897712, -98.432490, -0.258545, 0.012345, 0.146248],
'y': [1.9897347, -98.902596, -0.254255, 0.012345, 0.145124]
}
# create dask dataframe
df = dd.from_pandas(pd.DataFrame(data), npartitions=1)
decimal_dict = {'x': 10}
def format_decimal(x):
decimal_places = decimal_dict.get('x', 0)
decimal_places_len = "0." + "0" * decimal_places
return Decimal(x).quantize(Decimal(decimal_places_len))
df['x_format_decimal'] = df['x'].map(format_decimal)
df = df.compute()
df
import dask.dataframe as dd
import pandas as pd
from decimal import Decimal
# prepare data
data = {
'name': ['Eason', 'Jay', 'JJ', 'Alice', 'yaya'],
'id': [1000, 1001, 1002, 1003, 1004],
'x': [1.9897712, -98.432490, -0.258545, 0.012345, 0.146248],
'y': [1.9897347, -98.902596, -0.254255, 0.012345, 0.145124]
}
# create dask dataframe
df = dd.from_pandas(pd.DataFrame(data), npartitions=1)
decimal_dict = {'x': 10, 'y': 0}
def format_decimal(x, decimal_places):
format_string = '{:.' + str(decimal_places) + 'f}'
return x.map(lambda val: format_string.format(val))
for column in ['x', 'y']:
decimal_places = decimal_dict.get(column, 0)
df[column] = df[column].map_partitions(format_decimal, decimal_places)
df = df.compute()
df