Module: classification

Example Google style docstrings.

This module demonstrates documentation as specified by the Google Python Style Guide. Docstrings may extend over multiple lines. Sections are created with a section header and a colon followed by a block of indented text.

Example:

Examples can be given using either the Example or Examples sections. Sections support any reStructuredText formatting, including literal blocks:

$ python example_google.py

Section breaks are created by resuming unindented text. Section breaks are also implicitly created anytime a new section starts.

Attributes:
module_level_variable1 (int): Module level variables may be documented in

either the Attributes section of the module docstring, or in an inline docstring immediately following the variable.

Either form is acceptable, but the two should not be mixed. Choose one convention to document module level variables and be consistent with it.

class DataSet(name, data_sets_list=[], class_labels_list=[], ID_field_name=None, shuffle=True, classifier='rfc', n_estimators=10)[source]

Bases: object

Methods

build([data_sets_list, class_labels_list, ...])
check_is_list(l)
class_mapping(class_map_dic)
cv_predictions_errors([label_class])
decision_tree_pre_filter([th])
do_MC(MC_repl, n_trees)
do_cross_fold_validation([n_folds, verbose])
do_cv_confusion_matrix([plot, save_plot, path])
do_feat_std()
do_features_importance_trend(n_trees[, ...])
do_pca()
entroy_pre_filter([th])
eval_features_importance([sort, plot, ...])
filter_nan()
find_best_tree_num([plot, save_plot, path])
find_most_corr_features(feature_name)
get_cross_validation_score([cv])
get_cv_feature(feature_name[, range, ...])
get_feature(feature_name[, label, range, ...])
get_feature_cut(feature_name, feat_range)
plot_contour_scatter_plot(feature_name_x, ...)
plot_cv_feature_histogram([feature_name, ...])
plot_feature_correlation_matrix([plot, ...])
plot_feature_entropy([feature_name, bins, ...])
plot_feature_histogram([feature_name, bins, ...])
predict_test_set(test_set)
print_cv_predictions()
recursive_features_elimination([...])
remove_correlated_features(n_trees, max_corr)
select_features_ids(ids)
set_classifier([classifier_name])
show_features(image_id)
shuffle_set()
skip_classes(skip_classes)
skip_features(del_names)
skip_images_id(ids)
split_test_set([test_fraction, image_ids])
use_features(use_names[, whole_name])
write_weka_file(file_name)
build(data_sets_list=None, class_labels_list=None, shuffle=True, features_names=None)[source]
check_is_list(l)[source]
class_mapping(class_map_dic)[source]
cv_predictions_errors(label_class=None)[source]
decision_tree_pre_filter(th=0.75)[source]
do_MC(MC_repl, n_trees)[source]
do_cross_fold_validation(n_folds=10, verbose=False)[source]
do_cv_confusion_matrix(plot=False, save_plot=False, path='./')[source]
do_feat_std()[source]
do_features_importance_trend(n_trees, order_increasing=True, plot=False, save_plot=False, path='./', file_type='pdf')[source]
do_pca()[source]
entroy_pre_filter(th=0.75)[source]
eval_features_importance(sort=True, plot=False, save_plot=False, path='./', file_type='pdf', file_name=None)[source]
filter_nan()[source]
find_best_tree_num(plot=False, save_plot=False, path='./')[source]
find_most_corr_features(feature_name)[source]
get_cross_validation_score(cv=10)[source]
get_cv_feature(feature_name, range=[None, None], get_image_id=False, predicted=None, actual=None, cv_predictions=None)[source]
get_feature(feature_name, label=None, range=[None, None], get_image_id=False)[source]
get_feature_cut(feature_name, feat_range)[source]
plot_contour_scatter_plot(feature_name_x, feature_name_y, H_cmap='summer_r', n_levels=30, smoothing_scale=1, plot=False, labels=None, ax=None, exclude_outliers=None, plot_H=True, plot_contour=True, plot_scatter=True, x_range=[None, None], y_range=[None, None], point_size=1, color_list=None, c_map_list=None, lw=1, classes=None)[source]
plot_cv_feature_histogram(feature_name=None, bins=10, range=None, normed=False, cv_error=False, cv_predictions=None)[source]
plot_feature_correlation_matrix(plot=False, save_plot=False, path='./')[source]
plot_feature_entropy(feature_name=None, bins=10, range=None, normed=False, plot=False, save_plot=False, path='./')[source]
plot_feature_histogram(feature_name=None, bins=10, range=None, normed=False, KDE=False, plot=False, save_plot=False, path='./', file_type='pdf', ax=None, exclude_outliers=None)[source]
predict_test_set(test_set)[source]
print_cv_predictions()[source]
recursive_features_elimination(percentile_th=5, feat_n_min=3, path='./', plot=False, save_plot=False, file_type='pdf', file_name='rec_feat_rem')[source]
remove_correlated_features(n_trees, max_corr, order_increasing=False, plot=False, save_plot=False, path='./')[source]
select_features_ids(ids)[source]
set_classifier(classifier_name=None, **kwargs)[source]
show_features(image_id)[source]
shuffle_set()[source]
skip_classes(skip_classes)[source]
skip_features(del_names)[source]
skip_images_id(ids)[source]
split_test_set(test_fraction=None, image_ids=[])[source]
use_features(use_names, whole_name=False)[source]
write_weka_file(file_name)[source]
average_cv_stats(validation_results_obj_list, labels)[source]
contour_scatter_plot(x, y, classes, classes_types, H_cmap='grey', x_name='x', y_name='y', n_levels=20, smoothing_scale=3, plot=False, ax=None, exclude_outliers=None, plot_H=True, plot_contour=True, plot_scatter=True, point_size=1, color_list=None, c_map_list=None, lw=1)[source]
do_MC(features, labels, n_repl=10)[source]
do_confusion_matrix(predictions, plot=True, save_plot=False, file_name='conf_matrix', file_type='pdf', path='./')[source]
do_cross_fold_validation(features, labels, labels_num, original_id, model, n_folds=10, verbose=False)[source]
do_plot(model, X, Y)[source]
do_plot_feature_entropy(feature, feature_name, labels, bins=10, normed=False, range=None, file_name='feat_entropy', plot=True, save_plot=False, path='./', file_type='pdf')[source]
do_prediction(model, train_set_features, train_set_labels, test_set_features, test_set_labels, test_set_original_id, labels_num, verbose=False)[source]
do_roc_test(predictions, classes, label, plot=True, save_plot=False, file_name='roc_test', file_type='pdf', path='./')[source]
eval_accuracy(prediction, actual_classes, verbose=False)[source]
eval_feature_entropy(feature, labels, bins=10, range=None)[source]
eval_features_importance(model, features_names, sort=True, path='./')[source]
eval_score(model, features, labels, cv=None)[source]
features_importance_trend(model, features, features_names, labels, n_trees, order_increasing=True, plot=False, save_plot=True, file_type='pdf', file_name='feat_imp_trend', path='./')[source]
find_best_tree_num(model, features, labels, cv=True, plot=False, save_plot=False, file_name='best_tree_num', file_type='pdf', path='./')[source]
load_data(files_list, label_file=False, label_size=32, label_col_name='class', ext=0, skip_ids=None, max_id=None, msk=None)[source]
main(argv=None)[source]
plot_feature_correlation_matrix(features_names, features, plot=True, save_plot=False, file_name='feat_corr_matrix', file_type='pdf', path='./')[source]
plot_feature_histogram(feature, feature_name, labels, bins=10, range=None, normed=False, KDE=False, gridsize=50, pred=None, plot=True, save_plot=False, file_name='feat_histogram', file_type='pdf', path='./', exclude_outliers=None, ax=None)[source]
plot_fetures_importance(features_importance, plot=True, save_plot=False, file_name='feat_imp', file_type='pdf', path='./')[source]
recursive_features_elimination(features_names, features, model, labels, percentile_th, feat_n_min, path='./', plot=False, save_plot=False, file_type='pdf', file_name='rec_feat_rem')[source]
remove_correlated_features(model, features, features_names, labels, max_corr, plot=False, save_plot=False, path='./')[source]
remove_field_name(data, skip_names)[source]
remove_most_corr_features(features, features_names, model, labels, max_corr, stage)[source]
run(training_set_data_list, training_set_labels_list, test_set_data_list=None, test_set_labels_list=None, test_set_split_fraction=None, test_set_split_ids=None, skip_attr=[], use_attr=[], skip_class=[], skip_images_ids=None, classifier='RFC', skip_nan_entries=True, n_estimators=100, do_pca=False, do_feat_std=False, eval_best_tree_num=False, eval_feature_importance=False, eval_feature_importance_trend=False, feature_correlation_matrix=False, cross_fold_N=None, MC_repl=None, max_par_corr=None, feature_name=None, feature_min=None, feature_max=None, verbose=False, test_th=None, entropy_th=None, class_mapping_dic=None, select_features_ids=None, plot=False, save_plot=False, roc_test_class=None, cv_conf_matrix=False, path='./')[source]
show_single_feature(model, train_set, features_names, labels)[source]
use_field_name(data, use_names)[source]
class validation_results(label, tp, tn, fp, fn, size, scorr_classif=None, wrong_classif=None)[source]

Bases: object

write_table(data, fp, class_col)[source]
write_weka_file(out_file, features, features_names, class_labels, relation='galaxy_shape')[source]