Recent Activity
data have;
input name $ letter $;
datalines;
john j
Jacobs J
Alex A
mathew m
harward h
Liner L
Meesa M
xenon x
;
run;
data want;
set have;
if letter in('A' 'B' 'C' 'D' 'E' 'F' 'G' 'H' 'a' 'b' 'c' 'd' 'e' 'f' 'g' 'h') then group='A';
if letter in('I' 'J' 'K' 'L' 'M' 'N' 'O' 'i' 'j' 'k' 'l' 'm' 'n' 'o') then group = 'B';
if letter in('P' 'Q' 'R' 'S' 'T' 'U' 'W' 'X' 'Y' 'Z' 'p' 'q' 'r' 's' 't' 'u' 'w' 'x' 'y' 'z')
then group = 'C';
run;
this is the result i want from the code i have written above but is there any category like string.ascii_letters (similar to python) from which i can assign group so that i dont have to write individual letters like above?
... View more

0
4
I am Learning the proc template. I tried to create a box plot with sashelp.cars. I've got to make it 80% of what I am thinking , I am stuck at the last 20 % to finish. Looking for help. My code creates graph correctly but stuck at creating the legend. Present graph only show the group legends text in legends, but 1. I want to add the number of to the legends text to display the counts. like (SUV-Asia (N= 25)). I tried creating the macro variable for counts and attach to the group., but it messing up my graph. 2. Is it possible to to diplay all the Asia related in one column, USA in another column, and Europe in another columns in Legends Here is my cars code *create data;
proc sql;
create table car_subjects as
select distinct
cats(Make, "-", Model) as USUBJID length=50,
Type as CarType length=15,
Origin as Region length=15,
Horsepower,
Weight,
MPG_City,
MPG_Highway,
MSRP
from sashelp.cars
where Make is not null and Model is not null;
quit;
*get Count to display in legend;
proc sql;
create table car_counts as
select CarType, Region, count(*) as N
from car_subjects
group by CarType, Region;
quit;
* Add count to dataset;
proc sort data=car_subjects;
by CarType Region;
run;
proc sort data=car_counts;
by CarType Region;
run;
/* Step 4: Merge and build final dummy dataset */
*try with few groups first;
data dummy_cars_final ( where = (region in ('USA' 'Asia' 'Europe') and cartype in ('SUV' 'Sedan' 'Truck')));
merge car_subjects(in=a) car_counts;
by CarType Region;
if a;
/* Derived variables */
region_type = catx("-", CarType, Region);
Score = round(40 + ranuni(0)*60, 0.1);
PowerIndex = round((Horsepower * 0.6 + Weight * 0.0005), 0.1);
EcoScore = round((MPG_City * 0.4 + MPG_Highway * 0.6), 0.1);
length LuxuryLevel $10;
if MSRP > 50000 then LuxuryLevel = "High";
else if MSRP > 30000 then LuxuryLevel = "Medium";
else LuxuryLevel = "Low";
drop Horsepower Weight MPG_City MPG_Highway MSRP;
run;
proc template;
define statgraph boxplot_template;
begingraph;
discreteattrmap name="comboMap" / ignorecase=true;
value "SUV-Asia" / fillattrs=(color=orange);
value "SUV-Europe" / fillattrs=(color=orange) ;
value "SUV-USA" / fillattrs=(color=orange) ;
value "Sedan-Asia" / fillattrs=(color=magenta) ;
value "Sedan-Europe" / fillattrs=(color=orange);
value "Sedan-USA" / fillattrs=(color=magenta);
value "Truck-Asia" / fillattrs=(color=grey) ;
value "Truck-USA" / fillattrs=(color=grey);
enddiscreteattrmap;
discreteattrvar attrvar=patgroup var=region_type attrmap='comboMap';
layout lattice / rows=2 columns=1 columndatarange=union ROWWEIGHTS=(.75 .25) ;
layout overlay /
xaxisopts=(label="Region" labelattrs=(size=12pt weight=bold)
tickvalueattrs=(size=12pt weight=bold))
yaxisopts=(offsetmin=0.05 offsetmax=0.05 label="Rating Score"
linearopts=(tickvaluesequence=(start=0 end=100 increment=10)));
boxplot x=region y=score /
name='BoxLegend'
group=patgroup
groupdisplay=cluster
boxwidth=0.6 clusterwidth=0.5
display=( median mean caps fillpattern )
medianattrs=(pattern=1);
endlayout;
discretelegend 'BoxLegend' /
border=false
valueattrs=(size=8pt weight = bold)
across=3 location=inside valign=top ;
endlayout;
endgraph;
end;
run;
proc template;
define style styles.mypatterns;
parent=styles.listing;
style GraphData1 from GraphData1 / fillpattern="R1" ;
style GraphData2 from GraphData2 / fillpattern="X1" ;
style GraphData3 from GraphData3 / fillpattern="E" ;
style GraphData4 from GraphData4 / fillpattern="R1" ;
style GraphData5 from GraphData5 / fillpattern="X1" ;
style GraphData6 from GraphData6 / fillpattern="E" ;
style GraphData7 from GraphData7 / fillpattern="R1" ;
style GraphData8 from GraphData8 / fillpattern="X1" ;
style GraphData9 from GraphData9/ fillpattern="E" ;
end;
run;
options orientation=landscape;
ods rtf file="boxplot_grouped.rtf" style=mypatterns;
ods graphics / reset width=8.5in height=5.5in border=off;
proc sgrender data=Dummy_cars_final template=boxplot_template;
run;
ods rtf close; Thank you.
... View more

0
5
I am writing my thesis and wanted to make a linear regression model, but unfortunately by data is not normally distributed. The assumptions of the linear regression model are the normal distribution of residuals and the constant variance of residuals, which are not satisfied in my case. My supervisor told me that: "You could create a regression model. As long as you don't start discussing the significance of the parameters, the model can be used for descriptive purposes." Is it really true? How can I describe a model like this for example:
grade = - 4.7 + 0.4*(math_exam_score)+0.1*(sex)
if the variables might not even be relevant (can I even say how big the effect was? for example if math exam score is one point higher then the grade was 0.4 higher?)? Also the R square is quite low (on some models 7%, some have like 35% so it isn't even that good at describing the grade..)
Also if I were to create that model, I have some conflicting exams (for example english exam score that can be either taken as a native or there is a simpler exam for those that are learning it as a second language). So there are very few (if any) that took both of these exams (native and second). Therefor, I can't really put both of these in the model, I would have to make two different ones. But since the same case is with a math exam (one is simpler, one is harder) and a extra exam (that only a few people took), it would in the end take 8 models (1. simpler math & native english & sex, 2. harder math & native english & sex, 1. simpler math & english as a second language & sex, .... , simpler math & native english & sex & extra exam). Seems pointless....
Any ideas? Thank you 🙂
... View more

0
4
Hello, I am trying to find min and max of the date per subject from all datasets in a library I have created I have created DateVariables dataset using proc content to list all date variables from table1 and table2. *Want1; *I would like to list all dates available for the subject in entire library; /*subjID date11 date12 date13 date21 ...(all dates available)*/ subjID date11 date12 date13 date21 --------------------------------------------------------------------------- subj1 01/01/21 01/01/22 01/01/23 02/02/02 subj2 02/01/21 02/01/22 02/01/23 *want2; *find min and max of the date by subjID ; subjID date_min date_max ------------------------------------------------------------ subj1 02/02/02 01/01/23 subj2 02/01/21 02/01/23 data d_test.table1; infile datalines ; input subjID $ ar5 $ var6 $ date11: ddmmyy10. date12: ddmmyy10. date13: ddmmyy10.; format date11 mmddyy10. date12 mmddyy10. date13 mmddyy10.; datalines; subj1 xxx sy 01/01/21 01/01/22 01/01/23 subj2 xttx ry 02/01/21 02/01/22 02/01/23 ; run; data d_test.table2; infile datalines ; input subjID $ var3 $ date21: ddmmyy10.; format date21 mmddyy10.; datalines; subj1 xx 02/02/02 subj2 z . ; run; *list all date variables from table1 and table2; data d_test.DateVariables; infile datalines ; input table $ varname $ ; datalines; table1 date11 table1 date12 table1 date13 table2 date21 table3 date31 table3 date32 table3 date33 table3 date34 ; run; Thanks for your guidance!
... View more

1
25
지난 게시글에는 SAS의 사용법과 함께 실제 statement 를 활용해 데이터를 효과적으로 분석하고 직관적인 보고서를 작성하는 방법에 대해서 다루었습니다.
이를 통해 SAS를 활용한 데이터 분석이 단순한 코드 작성이 아니라, 데이터를 보다 쉽게 이해하고 활용하는 과정에 대해서 알 수 있었습니다.
이번 글에서는 새로운 데이터셋을 활용한 실전 사례를 통해 SAS의 강력한 분석 기능을 한층 더 심도 있게 익혀보겠습니다.
■ 분석 데이터
데이터셋은 총 4개 입니다.
DataSet 1. Customers
1055 개의 데이터와 4개의 칼럼(Customer_name, Province , Customer_Segment, id) 으로 구성되어 있습니다.
PROC IMPORT OUT=WORK.CUSTOMER
DATAFILE= 'C:/customers.xlsx'
DBMS=XLSX
REPLACE;
GETNAMES = YES;
RUN;
proc print data = work.customer; run;
DataSet2. Product
1,093개의 데이터, 5개의 칼럼(Product_Category, Product_Sub_Category, Product_Name, Product_Container, Id)으로 구성되어 있습니다.
DstaSet3. Sales
2,999개의 데이터, 21개의 칼럼(Row_ID, Order_ID, Order_Date, Order_Quantity, Sales, Discount , Ship_Mode, Profit, Unit_Price, Shipping_Cost, Customer_Name, Province, Customer_Segment, Product_Category, Product_Sub_Category, Product_Name, Product_Container, Product_Base_Margin, Ship_Date, customer_id, product_id) 으로 구성되어 있습니다.
DataSet4. Returns
572개의 데이터, 2개의 칼럼( Order ID, Status) 로 구성되어 있습니다.
위 4개의 데이터셋은 Customer I.D 칼럼과 Product 칼럼을 기반으로 서로 연결되어 있으며, 하나의 데이터로 병합할 수 있습니다.
■ 데이터 병합
PROC SQL;
CREATE TABLE MERGE_DATA AS
SELECT A.*, B.*, C.*
FROM WORK.SALES AS A
LEFT JOIN WORK.CUSTOMER AS B
ON A.CUSTOMER_ID = B.ID
LEFT JOIN WORK.PRODUCTS AS C
ON A.PRODUCT_ID = C.ID;
QUIT;
PROC PRINT DATA=MERGE_DATA (OBS=20); RUN;
CREATE TABLE MERGE_DATA: 새로운 테이블 MERGE_DATA를 생성하는 SQL 문입니다.
SELECT A.*, B.*, C.* : A(SALES), B(CUSTOMER),C(PRODUCTS) 테이블의 모든 컬럼을 선택합니다.
LEFT JOIN WORK.CUSTOMER AS B
ON A.CUSTOMER_ID = B.ID : SALES 테이블(A)의 CUSTOMER_ID 칼럼과 PRODUCTS 테이블(B) 의 ID 칼럼을 기준으로 병합니다.
■ 고객별 매출 집계
데이터를 병합하고 SQL을 활용하여 고객별 매출 데이터를 계산하려고 합니다.
PROC SQL;
CREATE TABLE WORK.PERCUS AS
SELECT CUSTOMER_NAME, SUM(PROFIT) AS PROFIT
FROM WORK.MERGE_DATA
GROUP BY CUSTOMER_NAME
ORDER BY PROFIT DESC;
QUIT;
RUN;
WORK라이브러리에 MERGE_DATA 테이블에서 고객(CUSTOMER_NAME)별 총 이익( SUM(PROFIT))을 계산하는 코드입니다.
... View more

1
2
Unanswered topics
These topics from the past 30 days have no replies. Can you help?
Subject | Likes | Author | Latest Post |
---|---|---|---|
0 | |||
0 | |||
0 | |||
0 | |||
0 | |||
0 | |||
0 | |||
0 | |||
2 | |||
0 |