读取原始数据
原始数据开始的12行是需要删去的,用 readLines 即删去24行。 这里只提取了监测血糖的读数和指尖血糖的读数。 所用缺失值都被删去,不进行 impute。
ReadRawFile < - function( id ) {
lines = readLines( paste( "E:/direction/to/rawdata" , id , ".csv" , sep = "" ) , encoding = "utf-16" , skipNul = TRUE)
lines = lines[ 23 : length( lines) ]
lines = gsub( "\"" , "" , lines)
data = read. table( text = lines, sep = '\t' , quote = "" , header = 1 )
data = checkAndRename( 'Timestamp' , 'time' , data)
data = checkAndRename( 'Sensor.Glucose..mmol.L.' , 'sgReading' , data)
data = checkAndRename( 'BG.Reading..mmol.L.' , 'bgReading' , data)
ix = which( data[ , "Excluded" ] != "TRUE" )
data = data[ ix, ]
cols= c( 'time' , 'sgReading' )
data = data[ , cols]
data = data[ which( !is . na( data$sgReading) ) , ]
return ( data)
}
checkAndRename < - function( oldname, newname, data) {
ixT = which( colnames( data) == oldname)
if ( length( ixT) == 0 ) {
stop( paste( "Column missing from input file:" , oldname) , call. = FALSE)
}
colnames( data) [ ixT] = newname
return ( data)
}
分离每一天的数据和时间段
主要通过对每一天打上 tag 来实现 同理对每一个时间段打上 tag 可以区分时间段
getDays < - function( rawData) {
startDate = as . Date( rawData$time[ 1 ] )
for ( i in 1 : nrow( rawData) ) {
currentDate = as . Date( rawData$time[ i] )
dif = as . numeric( currentDate - startDate)
rawData$dayIdx[ i] = dif + 1
fastingStart = as . POSIXlt( paste( as . character( currentDate) , "04:00:00" ) )
fastingEnd = as . POSIXlt( paste( as . character( currentDate) , "07:00:00" ) )
PPEnd = as . POSIXlt( paste( as . character( currentDate) , "22:00:00" ) )
NightStart = as . POSIXlt( paste( as . character( currentDate) , "00:00:00" ) )
currentTime = as . POSIXlt( rawData$time[ i] )
if ( currentTime>= fastingStart & currentTime<= fastingEnd) rawData$isFasting[ i] = 1 else rawData$isFasting[ i] = 0
if ( currentTime>= fastingEnd & currentTime<= PPEnd) rawData$PP[ i] = 1 else rawData$PP[ i] = 0
if ( currentTime>= NightStart & currentTime<= fastingEnd) rawData$atNight[ i] = 1 else rawData$atNight[ i] = 0
}
return ( rawData)
}
取得某一天的血糖数据
因为不想对缺失值进行 impute,忽略没有完整数据的一天(即读数没有288个)。 idlist 是一个包含所有需要提取某一天的数据 id 的 list Idx 即使这天的index,第一天就是1,第二天就是2。
getDayGlucose < - function( idlist, Idx) {
sgReading_dt= data. frame( timepoint= 1 : 288 )
for ( i in 1 : length( idlist) ) {
rawData= ReadRawFile( idlist[ i] )
rawData= getDays( rawData)
rawData_Day= rawData[ which( rawData$dayIdx== Idx) , "sgReading" ]
if ( length( rawData_Day) == 288 ) {
sgReading_dt= cbind( sgReading_dt, rawData_Day)
} else {
rawData_Day= rep( NA, times= 288 )
sgReading_dt= cbind( sgReading_dt, rawData_Day)
}
}
sgReading_dt= t( data. frame( sgReading_dt, row. names = 1 ) )
sgReading_dt= cbind( idlist, sgReading_dt)
}
计算曲线下面积
calculateAUC < - function( rawData, baseline) {
aucValue = 0
rawData_High= rawData[ which( rawData$sgReading> baseline) , ]
for ( idx in 2 : nrow( rawData_High) ) {
timeDiff = as . numeric( difftime( rawData_High$time[ idx] , rawData_High$time[ idx- 1 ] , units = "mins" ) )
if ( timeDiff == 5 ) {
trapezoid = ( rawData_High$sgReading[ idx] + rawData_High$sgReading[ idx- 1 ] ) * timeDiff* 0.5
} else trapezoid= 0
aucValue = aucValue + trapezoid
}
return ( aucValue)
}
参考文献
Software application profile: GLU: A tool for analysing continuously measured glucose in epidemiology