Python Program Template: Difference between revisions
Jump to navigation
Jump to search
(Created page with "A Python Program Template is a software program template that is a Python program. * <B>Counter-Example(s):</B> ** Perl Program Template. * <B>See:</B> Pytho...") |
|||
Line 7: | Line 7: | ||
==References== | ==References== | ||
< | <pre> | ||
#!/usr/bin/python | #!/usr/bin/python | ||
Line 70: | Line 70: | ||
return(df_tmp) | return(df_tmp) | ||
</ | </pre> | ||
---- | ---- | ||
__NOTOC__ | __NOTOC__ |
Revision as of 21:11, 13 February 2015
A Python Program Template is a software program template that is a Python program.
- Counter-Example(s):
- See: Python Code.
References
#!/usr/bin/python # Template program debug = 1 if debug>=1: print "DEBUG: " . debug # LIBRARIES import pandas as pd if debug: print "pandas version: " + pd.__version__ # pandas version: 0.14.1 from pandas import DataFrame, Series import numpy as np if debug: print "numpy version: " + np.__version__ # numpy version: 1.8.1 from numpy import random # random from re import split import sklearn as skl if debug: print "sklearn version: " + sklearn.__version__ # sklearn version: 0.15.0 # from sklearn import preprocessing, svm, cross_validation # labelEncoder # from sklearn.metrics import accuracy_score # from sklearn.feature_extraction.text import CountVectorizer # import sklearn.feature_extraction.text # from sklearn.externals import joblib import gc # GLOBALS from datetime import datetime from time import time dstamp=datetime.now().strftime("%y%m%d") if debug: print "dstamp=" + dstamp # dstamp=141113 #dstamp="141106" tstamp=datetime.now().strftime("%y%m%d%H%M%S") if debug: print "tstamp=" + tstamp # tstamp=141113032844 dataDir = "../data/" inFile="file.tsv" if debug>=2: minDictionaryTermsFilter=1 array = ['val1','val2'] df = pd.DataFrame([ { 'colId':0, colNameRecordSource : 'VIG', colNameRecordType: 'taxoPath', 'dataFilename': 'pcTaxo_lbld2_VIG.141114.tsv'}, { 'colId':1, colNameRecordSource : 'EB', colNameRecordType: 'taxoPath', 'dataFilename': 'pcTaxo_lbld2_EB.141030b.tsv'}, ]) df.set_index('colId', inplace=True) filesToProcess = pd.Series([2]) ; def getLabeledData(dataFilename, dataSource, debug): # assumes: columns not empty # assumes: column headers are present df_tmp = pd.read_csv(dataDir + dataFilename, delimiter='\t', skipinitialspace=True)[cols_to_keep] df_tmp.columns = [colNameRecordTextContent,colNameRecordLabel] df_tmp[colNameRecordSource]=dataSource if debug>=3: print "--------------------\dataSource =", dataSource, " rows =", len(df_tmp), " sample\n", df_tmp.loc[random.choice(df_tmp.index, 3, replace=False)] return(df_tmp)